diff --git a/.github/labeler.yml b/.github/labeler.yml
index 6617acbf9187e..be78eba4baf8b 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -93,9 +93,9 @@ SQL:
   - changed-files:
     - all-globs-to-any-file: [
      '**/sql/**/*',
-     '!python/pyspark/sql/avro/**/*',
-     '!python/pyspark/sql/streaming/**/*',
-     '!python/pyspark/sql/tests/streaming/test_streaming*.py'
+     '!python/**/avro/**/*',
+     '!python/**/protobuf/**/*',
+     '!python/**/streaming/**/*'
     ]
     - any-glob-to-any-file: [
      'common/unsafe/**/*',
@@ -119,7 +119,7 @@ AVRO:
   - changed-files:
     - any-glob-to-any-file: [
      'connector/avro/**/*',
-     'python/pyspark/sql/avro/**/*'
+     'python/**/avro/**/*'
     ]
 
 DSTREAM:
@@ -152,9 +152,8 @@ ML:
 MLLIB:
   - changed-files:
     - any-glob-to-any-file: [
-     '**/spark/mllib/**/*',
-     'mllib-local/**/*',
-     'python/pyspark/mllib/**/*'
+     '**/mllib/**/*',
+     'mllib-local/**/*'
     ]
 
 STRUCTURED STREAMING:
@@ -162,8 +161,7 @@ STRUCTURED STREAMING:
     - any-glob-to-any-file: [
      '**/sql/**/streaming/**/*',
      'connector/kafka-0-10-sql/**/*',
-     'python/pyspark/sql/streaming/**/*',
-     'python/pyspark/sql/tests/streaming/test_streaming*.py',
+     'python/pyspark/sql/**/streaming/**/*',
      '**/*streaming.R'
     ]
 
@@ -226,13 +224,12 @@ CONNECT:
     - any-glob-to-any-file: [
      'sql/connect/**/*',
      'connector/connect/**/*',
-     'python/pyspark/sql/**/connect/**/*',
-     'python/pyspark/ml/**/connect/**/*'
+     'python/**/connect/**/*'
     ]
 
 PROTOBUF:
   - changed-files:
     - any-glob-to-any-file: [
      'connector/protobuf/**/*',
-     'python/pyspark/sql/protobuf/**/*'
+     'python/**/protobuf/**/*'
     ]
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 3117872e21680..ef11c8416b0ae 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -41,7 +41,7 @@ on:
         description: Additional environment variables to set when running the tests. Should be in JSON format.
         required: false
         type: string
-        default: '{}'
+        default: '{"PYSPARK_IMAGE_TO_TEST": "python-311", "PYTHON_TO_TEST": "python3.11"}'
       jobs:
         description: >-
           Jobs to run, and should be in JSON format. The values should be matched with the job's key defined
@@ -64,6 +64,8 @@ jobs:
       image_lint_url_link: ${{ steps.infra-image-link.outputs.image_lint_url_link }}
       image_sparkr_url: ${{ steps.infra-image-sparkr-outputs.outputs.image_sparkr_url }}
       image_sparkr_url_link: ${{ steps.infra-image-link.outputs.image_sparkr_url_link }}
+      image_pyspark_url: ${{ steps.infra-image-pyspark-outputs.outputs.image_pyspark_url }}
+      image_pyspark_url_link: ${{ steps.infra-image-link.outputs.image_pyspark_url_link }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v4
@@ -82,10 +84,11 @@ jobs:
       id: set-outputs
       run: |
         if [ -z "${{ inputs.jobs }}" ]; then
-          pyspark_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
+          pyspark_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark') and not m.name.startswith('pyspark-pandas')))"`
+          pyspark_pandas_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark-pandas')))"`
           pyspark=`./dev/is-changed.py -m $pyspark_modules`
+          pandas=`./dev/is-changed.py -m $pyspark_pandas_modules`
           if [[ "${{ github.repository }}" != 'apache/spark' ]]; then
-            pandas=$pyspark
             yarn=`./dev/is-changed.py -m yarn`
             kubernetes=`./dev/is-changed.py -m kubernetes`
             sparkr=`./dev/is-changed.py -m sparkr`
@@ -132,6 +135,28 @@ jobs:
           precondition="${precondition//$'\n'/}"
           echo "required=$precondition" >> $GITHUB_OUTPUT
         fi
+    - name: Check envs
+      id: check-envs
+      if: inputs.branch != 'branch-3.5'
+      env: ${{ fromJSON(inputs.envs) }}
+      run: |
+        if [[ "${{ fromJson(steps.set-outputs.outputs.required).pyspark }}"  == 'true' || "${{ fromJson(steps.set-outputs.outputs.required).pyspark-pandas }}"  == 'true' ]]; then
+          if [[ "${{ env.PYSPARK_IMAGE_TO_TEST }}" == "" ]]; then
+            echo "PYSPARK_IMAGE_TO_TEST is required when pyspark is enabled."
+            exit 1
+          fi
+          PYSPARK_IMAGE_PATH="dev/spark-test-image/${{ env.PYSPARK_IMAGE_TO_TEST }}/Dockerfile"
+          if [ -f $PYSPARK_IMAGE_PATH ]; then
+            echo "Dockerfile $PYSPARK_IMAGE_PATH exists."
+          else
+            echo "Dockerfile $PYSPARK_IMAGE_PATH does NOT exist."
+            exit 1
+          fi
+          if [[ "${{ env.PYTHON_TO_TEST }}" == "" ]]; then
+            echo "PYTHON_TO_TEST is required when pyspark is enabled."
+            exit 1
+          fi
+        fi
     - name: Generate infra image URL
       id: infra-image-outputs
       run: |
@@ -164,8 +189,19 @@ jobs:
         IMG_NAME="apache-spark-ci-image-sparkr:${{ inputs.branch }}-${{ github.run_id }}"
         IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME"
         echo "image_sparkr_url=$IMG_URL" >> $GITHUB_OUTPUT
+    - name: Generate infra image URL (PySpark ${{ env.PYSPARK_IMAGE_TO_TEST }})
+      id: infra-image-pyspark-outputs
+      if: ${{ env.PYSPARK_IMAGE_TO_TEST }}
+      env: ${{ fromJSON(inputs.envs) }}
+      run: |
+        # Convert to lowercase to meet Docker repo name requirement
+        REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
+        IMG_NAME="apache-spark-ci-image-pyspark-${{ env.PYSPARK_IMAGE_TO_TEST }}:${{ inputs.branch }}-${{ github.run_id }}"
+        IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME"
+        echo "image_pyspark_url=$IMG_URL" >> $GITHUB_OUTPUT
     - name: Link the docker images
       id: infra-image-link
+      env: ${{ fromJSON(inputs.envs) }}
       run: |
         # Set the image URL for job "docs"
         # Should delete the link and directly use image_docs_url after SPARK 3.x EOL
@@ -173,10 +209,12 @@ jobs:
           echo "image_docs_url_link=${{ steps.infra-image-outputs.outputs.image_url }}" >> $GITHUB_OUTPUT
           echo "image_lint_url_link=${{ steps.infra-image-outputs.outputs.image_url }}" >> $GITHUB_OUTPUT
           echo "image_sparkr_url_link=${{ steps.infra-image-outputs.outputs.image_url }}" >> $GITHUB_OUTPUT
+          echo "image_pyspark_url_link=${{ steps.infra-image-outputs.outputs.image_url }}" >> $GITHUB_OUTPUT
         else
           echo "image_docs_url_link=${{ steps.infra-image-docs-outputs.outputs.image_docs_url }}" >> $GITHUB_OUTPUT
           echo "image_lint_url_link=${{ steps.infra-image-lint-outputs.outputs.image_lint_url }}" >> $GITHUB_OUTPUT
           echo "image_sparkr_url_link=${{ steps.infra-image-sparkr-outputs.outputs.image_sparkr_url }}" >> $GITHUB_OUTPUT
+          echo "image_pyspark_url_link=${{ steps.infra-image-pyspark-outputs.outputs.image_pyspark_url }}" >> $GITHUB_OUTPUT
         fi
 
   # Build: build Spark and run the tests for specified modules.
@@ -204,7 +242,7 @@ jobs:
           - >-
             api, catalyst, hive-thriftserver
           - >-
-            mllib-local, mllib, graphx
+            mllib-local, mllib, graphx, profiler
           - >-
             streaming, sql-kafka-0-10, streaming-kafka-0-10, streaming-kinesis-asl,
             kubernetes, hadoop-cloud, spark-ganglia-lgpl, protobuf, connect
@@ -257,8 +295,6 @@ jobs:
       INCLUDED_TAGS: ${{ matrix.included-tags }}
       HADOOP_PROFILE: ${{ matrix.hadoop }}
       HIVE_PROFILE: ${{ matrix.hive }}
-      # GitHub Actions' default miniconda to use in pip packaging test.
-      CONDA_PREFIX: /usr/share/miniconda
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
       NOLINT_ON_COMPILE: true
@@ -320,7 +356,7 @@ jobs:
     - name: Install Python packages (Python 3.11)
       if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect')
       run: |
-        python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3'
+        python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1'
         python3.11 -m pip list
     # Run the tests.
     - name: Run tests
@@ -355,7 +391,9 @@ jobs:
     needs: precondition
     if: >-
       fromJson(needs.precondition.outputs.required).pyspark == 'true' ||
+      fromJson(needs.precondition.outputs.required).pyspark-pandas == 'true' ||
       fromJson(needs.precondition.outputs.required).lint == 'true' ||
+      fromJson(needs.precondition.outputs.required).docs == 'true' ||
       fromJson(needs.precondition.outputs.required).sparkr == 'true'
     runs-on: ubuntu-latest
     permissions:
@@ -385,7 +423,8 @@ jobs:
         uses: docker/setup-qemu-action@v3
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
-      - name: Build and push
+      - name: Build and push for branch-3.5
+        if: inputs.branch == 'branch-3.5'
         id: docker_build
         uses: docker/build-push-action@v6
         with:
@@ -396,7 +435,7 @@ jobs:
           # Use the infra image cache to speed up
           cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-cache:${{ inputs.branch }}
       - name: Build and push (Documentation)
-        if: hashFiles('dev/spark-test-image/docs/Dockerfile') != ''
+        if: ${{ inputs.branch != 'branch-3.5' && fromJson(needs.precondition.outputs.required).docs == 'true' && hashFiles('dev/spark-test-image/docs/Dockerfile') != '' }}
         id: docker_build_docs
         uses: docker/build-push-action@v6
         with:
@@ -407,7 +446,7 @@ jobs:
           # Use the infra image cache to speed up
           cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-docs-cache:${{ inputs.branch }}
       - name: Build and push (Linter)
-        if: hashFiles('dev/spark-test-image/lint/Dockerfile') != ''
+        if: ${{ inputs.branch != 'branch-3.5' && fromJson(needs.precondition.outputs.required).lint == 'true' && hashFiles('dev/spark-test-image/lint/Dockerfile') != '' }}
         id: docker_build_lint
         uses: docker/build-push-action@v6
         with:
@@ -418,7 +457,7 @@ jobs:
           # Use the infra image cache to speed up
           cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-lint-cache:${{ inputs.branch }}
       - name: Build and push (SparkR)
-        if: hashFiles('dev/spark-test-image/sparkr/Dockerfile') != ''
+        if: ${{ inputs.branch != 'branch-3.5' && fromJson(needs.precondition.outputs.required).sparkr == 'true' && hashFiles('dev/spark-test-image/sparkr/Dockerfile') != '' }}
         id: docker_build_sparkr
         uses: docker/build-push-action@v6
         with:
@@ -428,17 +467,29 @@ jobs:
             ${{ needs.precondition.outputs.image_sparkr_url }}
           # Use the infra image cache to speed up
           cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-sparkr-cache:${{ inputs.branch }}
+      - name: Build and push (PySpark with ${{ env.PYSPARK_IMAGE_TO_TEST }})
+        if: ${{ inputs.branch != 'branch-3.5' && (fromJson(needs.precondition.outputs.required).pyspark == 'true' || fromJson(needs.precondition.outputs.required).pyspark-pandas == 'true') && env.PYSPARK_IMAGE_TO_TEST != '' }}
+        id: docker_build_pyspark
+        env: ${{ fromJSON(inputs.envs) }}
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/${{ env.PYSPARK_IMAGE_TO_TEST }}/
+          push: true
+          tags: |
+            ${{ needs.precondition.outputs.image_pyspark_url }}
+          # Use the infra image cache to speed up
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-${{ env.PYSPARK_IMAGE_TO_TEST }}-cache:${{ inputs.branch }}
 
 
   pyspark:
     needs: [precondition, infra-image]
     # always run if pyspark == 'true', even infra-image is skip (such as non-master job)
-    if: (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark == 'true'
+    if: (!cancelled()) && (fromJson(needs.precondition.outputs.required).pyspark == 'true' || fromJson(needs.precondition.outputs.required).pyspark-pandas == 'true')
     name: "Build modules: ${{ matrix.modules }}"
     runs-on: ubuntu-latest
     timeout-minutes: 180
     container:
-      image: ${{ needs.precondition.outputs.image_url }}
+      image: ${{ needs.precondition.outputs.image_pyspark_url_link }}
     strategy:
       fail-fast: false
       matrix:
@@ -448,7 +499,7 @@ jobs:
           - >-
             pyspark-sql, pyspark-resource, pyspark-testing
           - >-
-            pyspark-core, pyspark-errors, pyspark-streaming
+            pyspark-core, pyspark-errors, pyspark-streaming, pyspark-logger
           - >-
             pyspark-mllib, pyspark-ml, pyspark-ml-connect
           - >-
@@ -466,6 +517,13 @@ jobs:
           - >-
             pyspark-pandas-connect-part3
         exclude:
+          # Always run if pyspark == 'true', even infra-image is skip (such as non-master job)
+          # In practice, the build will run in individual PR, but not against the individual commit
+          # in Apache Spark repository.
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-sql, pyspark-resource, pyspark-testing' }}
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-core, pyspark-errors, pyspark-streaming, pyspark-logger' }}
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-mllib, pyspark-ml, pyspark-ml-connect' }}
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-connect' }}
           # Always run if pyspark-pandas == 'true', even infra-image is skip (such as non-master job)
           # In practice, the build will run in individual PR, but not against the individual commit
           # in Apache Spark repository.
@@ -477,11 +535,8 @@ jobs:
           - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part3' }}
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
-      PYTHON_TO_TEST: 'python3.11'
       HADOOP_PROFILE: ${{ inputs.hadoop }}
       HIVE_PROFILE: hive2.3
-      # GitHub Actions' default miniconda to use in pip packaging test.
-      CONDA_PREFIX: /usr/share/miniconda
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
       SKIP_UNIDOC: true
@@ -534,6 +589,7 @@ jobs:
         distribution: zulu
         java-version: ${{ matrix.java }}
     - name: List Python packages (${{ env.PYTHON_TO_TEST }})
+      if: ${{ env.PYTHON_TO_TEST != '' }}
       env: ${{ fromJSON(inputs.envs) }}
       shell: 'script -q -e -c "bash {0}"'
       run: |
@@ -542,12 +598,18 @@ jobs:
           echo $py
           $py -m pip list
         done
+    - name: Install Conda for pip packaging test
+      if: contains(matrix.modules, 'pyspark-errors')
+      uses: conda-incubator/setup-miniconda@v3
+      with:
+        miniforge-version: latest
     # Run the tests.
     - name: Run tests
       env: ${{ fromJSON(inputs.envs) }}
       shell: 'script -q -e -c "bash {0}"'
       run: |
         if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then
+          export PATH=$CONDA/bin:$PATH
           export SKIP_PACKAGING=false
           echo "Python Packaging Tests Enabled!"
         fi
@@ -559,11 +621,14 @@ jobs:
         fi
     - name: Upload coverage to Codecov
       if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true'
-      uses: codecov/codecov-action@v4
+      uses: codecov/codecov-action@v5
+      env:
+        CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
       with:
         files: ./python/coverage.xml
         flags: unittests
         name: PySpark
+        verbose: true
     - name: Upload test results to report
       env: ${{ fromJSON(inputs.envs) }}
       if: always()
@@ -692,7 +757,7 @@ jobs:
         python-version: '3.11'
     - name: Install dependencies for Python CodeGen check
       run: |
-        python3.11 -m pip install 'black==23.9.1' 'protobuf==5.28.3' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
+        python3.11 -m pip install 'black==23.12.1' 'protobuf==5.29.1' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
         python3.11 -m pip list
     - name: Python CodeGen check for branch-3.5
       if: inputs.branch == 'branch-3.5'
@@ -1043,6 +1108,7 @@ jobs:
           spark.sql.autoBroadcastJoinThreshold=-1
           spark.sql.join.forceApplyShuffledHashJoin=true
     - name: Run TPC-DS queries on collated data
+      if: inputs.branch != 'branch-3.5'
       run: |
         SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSCollationQueryTestSuite"
     - name: Upload test results to report
@@ -1174,6 +1240,7 @@ jobs:
       - name: Start Minikube
         uses: medyagh/setup-minikube@v0.0.18
         with:
+          kubernetes-version: "1.32.0"
           # Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic
           cpus: 2
           memory: 6144m
@@ -1192,7 +1259,7 @@ jobs:
           if [[ "${{ inputs.branch }}" == 'branch-3.5' ]]; then
             kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.7.0/installer/volcano-development.yaml || true
           else
-            kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.9.0/installer/volcano-development.yaml || true
+            kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.10.0/installer/volcano-development.yaml || true
           fi
           eval $(minikube docker-env)
           build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test"
diff --git a/.github/workflows/build_branch35.yml b/.github/workflows/build_branch35.yml
index 2ec080d5722c1..4757ca3c574f5 100644
--- a/.github/workflows/build_branch35.yml
+++ b/.github/workflows/build_branch35.yml
@@ -22,6 +22,7 @@ name: "Build (branch-3.5, Scala 2.13, Hadoop 3, JDK 8)"
 on:
   schedule:
     - cron: '0 11 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
@@ -37,6 +38,7 @@ jobs:
       envs: >-
         {
           "SCALA_PROFILE": "scala2.13",
+          "PYSPARK_IMAGE_TO_TEST": "",
           "PYTHON_TO_TEST": "",
           "ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-xe:21.3.0"
         }
diff --git a/.github/workflows/build_branch35_python.yml b/.github/workflows/build_branch35_python.yml
index 1585534d33ba9..452a55f3bc2c1 100644
--- a/.github/workflows/build_branch35_python.yml
+++ b/.github/workflows/build_branch35_python.yml
@@ -22,6 +22,7 @@ name: "Build / Python-only (branch-3.5)"
 on:
   schedule:
     - cron: '0 11 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
@@ -36,6 +37,7 @@ jobs:
       hadoop: hadoop3
       envs: >-
         {
+          "PYSPARK_IMAGE_TO_TEST": "",
           "PYTHON_TO_TEST": ""
         }
       jobs: >-
diff --git a/.github/workflows/build_coverage.yml b/.github/workflows/build_coverage.yml
index 64f65bd777a02..007d9ce99c847 100644
--- a/.github/workflows/build_coverage.yml
+++ b/.github/workflows/build_coverage.yml
@@ -22,6 +22,7 @@ name: "Build / Coverage (master, Scala 2.13, Hadoop 3, JDK 17)"
 on:
   schedule:
     - cron: '0 10 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
@@ -36,6 +37,7 @@ jobs:
       hadoop: hadoop3
       envs: >-
         {
+          "PYSPARK_IMAGE_TO_TEST": "python-311",
           "PYTHON_TO_TEST": "python3.11",
           "PYSPARK_CODECOV": "true"
         }
diff --git a/.github/workflows/build_infra_images_cache.yml b/.github/workflows/build_infra_images_cache.yml
index a6beacedeebd4..ac139147beb91 100644
--- a/.github/workflows/build_infra_images_cache.yml
+++ b/.github/workflows/build_infra_images_cache.yml
@@ -30,9 +30,16 @@ on:
     - 'dev/spark-test-image/docs/Dockerfile'
     - 'dev/spark-test-image/lint/Dockerfile'
     - 'dev/spark-test-image/sparkr/Dockerfile'
+    - 'dev/spark-test-image/pypy-310/Dockerfile'
+    - 'dev/spark-test-image/python-309/Dockerfile'
+    - 'dev/spark-test-image/python-310/Dockerfile'
+    - 'dev/spark-test-image/python-311/Dockerfile'
+    - 'dev/spark-test-image/python-312/Dockerfile'
+    - 'dev/spark-test-image/python-313/Dockerfile'
     - '.github/workflows/build_infra_images_cache.yml'
   # Create infra image when cutting down branches/tags
   create:
+  workflow_dispatch:
 jobs:
   main:
     if: github.repository == 'apache/spark'
@@ -102,3 +109,107 @@ jobs:
       - name: Image digest (SparkR)
         if: hashFiles('dev/spark-test-image/sparkr/Dockerfile') != ''
         run: echo ${{ steps.docker_build_sparkr.outputs.digest }}
+      - name: Build and push (PySpark with old dependencies)
+        if: hashFiles('dev/spark-test-image/python-minimum/Dockerfile') != ''
+        id: docker_build_pyspark_python_minimum
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/python-minimum/
+          push: true
+          tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-minimum-cache:${{ github.ref_name }}-static
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-minimum-cache:${{ github.ref_name }}
+          cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-minimum-cache:${{ github.ref_name }},mode=max
+      - name: Image digest (PySpark with old dependencies)
+        if: hashFiles('dev/spark-test-image/python-minimum/Dockerfile') != ''
+        run: echo ${{ steps.docker_build_pyspark_python_minimum.outputs.digest }}
+      - name: Build and push (PySpark PS with old dependencies)
+        if: hashFiles('dev/spark-test-image/python-ps-minimum/Dockerfile') != ''
+        id: docker_build_pyspark_python_ps_minimum
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/python-ps-minimum/
+          push: true
+          tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-ps-minimum-cache:${{ github.ref_name }}-static
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-ps-minimum-cache:${{ github.ref_name }}
+          cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-ps-minimum-cache:${{ github.ref_name }},mode=max
+      - name: Image digest (PySpark PS with old dependencies)
+        if: hashFiles('dev/spark-test-image/python-ps-minimum/Dockerfile') != ''
+        run: echo ${{ steps.docker_build_pyspark_python_ps_minimum.outputs.digest }}
+      - name: Build and push (PySpark with PyPy 3.10)
+        if: hashFiles('dev/spark-test-image/pypy-310/Dockerfile') != ''
+        id: docker_build_pyspark_pypy_310
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/pypy-310/
+          push: true
+          tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-pypy-310-cache:${{ github.ref_name }}-static
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-pypy-310-cache:${{ github.ref_name }}
+          cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-pypy-310-cache:${{ github.ref_name }},mode=max
+      - name: Image digest (PySpark with PyPy 3.10)
+        if: hashFiles('dev/spark-test-image/pypy-310/Dockerfile') != ''
+        run: echo ${{ steps.docker_build_pyspark_pypy_310.outputs.digest }}
+      - name: Build and push (PySpark with Python 3.9)
+        if: hashFiles('dev/spark-test-image/python-309/Dockerfile') != ''
+        id: docker_build_pyspark_python_309
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/python-309/
+          push: true
+          tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-309-cache:${{ github.ref_name }}-static
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-309-cache:${{ github.ref_name }}
+          cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-309-cache:${{ github.ref_name }},mode=max
+      - name: Image digest (PySpark with Python 3.9)
+        if: hashFiles('dev/spark-test-image/python-309/Dockerfile') != ''
+        run: echo ${{ steps.docker_build_pyspark_python_309.outputs.digest }}
+      - name: Build and push (PySpark with Python 3.10)
+        if: hashFiles('dev/spark-test-image/python-310/Dockerfile') != ''
+        id: docker_build_pyspark_python_310
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/python-310/
+          push: true
+          tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-310-cache:${{ github.ref_name }}-static
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-310-cache:${{ github.ref_name }}
+          cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-310-cache:${{ github.ref_name }},mode=max
+      - name: Image digest (PySpark with Python 3.10)
+        if: hashFiles('dev/spark-test-image/python-310/Dockerfile') != ''
+        run: echo ${{ steps.docker_build_pyspark_python_310.outputs.digest }}
+      - name: Build and push (PySpark with Python 3.11)
+        if: hashFiles('dev/spark-test-image/python-311/Dockerfile') != ''
+        id: docker_build_pyspark_python_311
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/python-311/
+          push: true
+          tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-311-cache:${{ github.ref_name }}-static
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-311-cache:${{ github.ref_name }}
+          cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-311-cache:${{ github.ref_name }},mode=max
+      - name: Image digest (PySpark with Python 3.11)
+        if: hashFiles('dev/spark-test-image/python-311/Dockerfile') != ''
+        run: echo ${{ steps.docker_build_pyspark_python_311.outputs.digest }}
+      - name: Build and push (PySpark with Python 3.12)
+        if: hashFiles('dev/spark-test-image/python-312/Dockerfile') != ''
+        id: docker_build_pyspark_python_312
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/python-312/
+          push: true
+          tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-312-cache:${{ github.ref_name }}-static
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-312-cache:${{ github.ref_name }}
+          cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-312-cache:${{ github.ref_name }},mode=max
+      - name: Image digest (PySpark with Python 3.12)
+        if: hashFiles('dev/spark-test-image/python-312/Dockerfile') != ''
+        run: echo ${{ steps.docker_build_pyspark_python_312.outputs.digest }}
+      - name: Build and push (PySpark with Python 3.13)
+        if: hashFiles('dev/spark-test-image/python-313/Dockerfile') != ''
+        id: docker_build_pyspark_python_313
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/python-313/
+          push: true
+          tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-313-cache:${{ github.ref_name }}-static
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-313-cache:${{ github.ref_name }}
+          cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-313-cache:${{ github.ref_name }},mode=max
+      - name: Image digest (PySpark with Python 3.13)
+        if: hashFiles('dev/spark-test-image/python-313/Dockerfile') != ''
+        run: echo ${{ steps.docker_build_pyspark_python_313.outputs.digest }}
diff --git a/.github/workflows/build_java21.yml b/.github/workflows/build_java21.yml
index 871e1a9c07ef0..51ece691f9284 100644
--- a/.github/workflows/build_java21.yml
+++ b/.github/workflows/build_java21.yml
@@ -22,6 +22,7 @@ name: "Build (master, Scala 2.13, Hadoop 3, JDK 21)"
 on:
   schedule:
     - cron: '0 4 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
@@ -36,6 +37,8 @@ jobs:
       hadoop: hadoop3
       envs: >-
         {
+          "PYSPARK_IMAGE_TO_TEST": "python-311",
+          "PYTHON_TO_TEST": "python3.11",
           "SKIP_MIMA": "true",
           "SKIP_UNIDOC": "true",
           "DEDICATED_JVM_SBT_TESTS": "org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormatV1Suite,org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormatV2Suite,org.apache.spark.sql.execution.datasources.orc.OrcSourceV1Suite,org.apache.spark.sql.execution.datasources.orc.OrcSourceV2Suite"
diff --git a/.github/workflows/build_maven.yml b/.github/workflows/build_maven.yml
index b5546c61eb11b..e047390add6f9 100644
--- a/.github/workflows/build_maven.yml
+++ b/.github/workflows/build_maven.yml
@@ -22,6 +22,7 @@ name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 17)"
 on:
   schedule:
     - cron: '0 13 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
diff --git a/.github/workflows/build_maven_java21.yml b/.github/workflows/build_maven_java21.yml
index 127904145464b..9fbc7b84383f0 100644
--- a/.github/workflows/build_maven_java21.yml
+++ b/.github/workflows/build_maven_java21.yml
@@ -22,6 +22,7 @@ name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21)"
 on:
   schedule:
     - cron: '0 14 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
diff --git a/.github/workflows/build_maven_java21_macos15.yml b/.github/workflows/build_maven_java21_macos15.yml
index cc6d0ea4e90da..377a67191ab49 100644
--- a/.github/workflows/build_maven_java21_macos15.yml
+++ b/.github/workflows/build_maven_java21_macos15.yml
@@ -22,6 +22,7 @@ name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21, MacOS-15)"
 on:
   schedule:
     - cron: '0 20 */2 * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
diff --git a/.github/workflows/build_non_ansi.yml b/.github/workflows/build_non_ansi.yml
index 4ac2a589f4f81..31654476ea3f8 100644
--- a/.github/workflows/build_non_ansi.yml
+++ b/.github/workflows/build_non_ansi.yml
@@ -22,6 +22,7 @@ name: "Build / Non-ANSI (master, Hadoop 3, JDK 17, Scala 2.13)"
 on:
   schedule:
     - cron: '0 1 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
@@ -36,6 +37,8 @@ jobs:
       hadoop: hadoop3
       envs: >-
         {
+          "PYSPARK_IMAGE_TO_TEST": "python-311",
+          "PYTHON_TO_TEST": "python3.11",
           "SPARK_ANSI_SQL_MODE": "false",
         }
       jobs: >-
diff --git a/.github/workflows/build_python_3.10.yml b/.github/workflows/build_python_3.10.yml
index 5ae37fbc9120e..9b0c90c5c7747 100644
--- a/.github/workflows/build_python_3.10.yml
+++ b/.github/workflows/build_python_3.10.yml
@@ -22,6 +22,7 @@ name: "Build / Python-only (master, Python 3.10)"
 on:
   schedule:
     - cron: '0 17 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
@@ -36,6 +37,7 @@ jobs:
       hadoop: hadoop3
       envs: >-
         {
+          "PYSPARK_IMAGE_TO_TEST": "python-310",
           "PYTHON_TO_TEST": "python3.10"
         }
       jobs: >-
diff --git a/.github/workflows/build_python_3.11_macos.yml b/.github/workflows/build_python_3.11_macos.yml
index 4caae55b5fea8..57902e4871ffa 100644
--- a/.github/workflows/build_python_3.11_macos.yml
+++ b/.github/workflows/build_python_3.11_macos.yml
@@ -22,6 +22,7 @@ name: "Build / Python-only (master, Python 3.11, MacOS)"
 on:
   schedule:
     - cron: '0 21 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
diff --git a/.github/workflows/build_python_3.12.yml b/.github/workflows/build_python_3.12.yml
index e1fd45a7d8838..e0c04700554ca 100644
--- a/.github/workflows/build_python_3.12.yml
+++ b/.github/workflows/build_python_3.12.yml
@@ -22,6 +22,7 @@ name: "Build / Python-only (master, Python 3.12)"
 on:
   schedule:
     - cron: '0 19 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
@@ -36,6 +37,7 @@ jobs:
       hadoop: hadoop3
       envs: >-
         {
+          "PYSPARK_IMAGE_TO_TEST": "python-312",
           "PYTHON_TO_TEST": "python3.12"
         }
       jobs: >-
diff --git a/.github/workflows/build_python_3.13.yml b/.github/workflows/build_python_3.13.yml
index 6f67cf383584f..e85b1577f323f 100644
--- a/.github/workflows/build_python_3.13.yml
+++ b/.github/workflows/build_python_3.13.yml
@@ -22,6 +22,7 @@ name: "Build / Python-only (master, Python 3.13)"
 on:
   schedule:
     - cron: '0 20 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
@@ -36,6 +37,7 @@ jobs:
       hadoop: hadoop3
       envs: >-
         {
+          "PYSPARK_IMAGE_TO_TEST": "python-313",
           "PYTHON_TO_TEST": "python3.13"
         }
       jobs: >-
diff --git a/.github/workflows/build_python_3.9.yml b/.github/workflows/build_python_3.9.yml
index b2401fcf2aa14..0df17699140ed 100644
--- a/.github/workflows/build_python_3.9.yml
+++ b/.github/workflows/build_python_3.9.yml
@@ -22,6 +22,7 @@ name: "Build / Python-only (master, Python 3.9)"
 on:
   schedule:
     - cron: '0 21 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
@@ -36,6 +37,7 @@ jobs:
       hadoop: hadoop3
       envs: >-
         {
+          "PYSPARK_IMAGE_TO_TEST": "python-309",
           "PYTHON_TO_TEST": "python3.9"
         }
       jobs: >-
diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml
index d57a0c2b91623..311907558f6e2 100644
--- a/.github/workflows/build_python_connect.yml
+++ b/.github/workflows/build_python_connect.yml
@@ -22,6 +22,7 @@ name: Build / Spark Connect Python-only (master, Python 3.11)
 on:
   schedule:
     - cron: '0 19 * * *'
+  workflow_dispatch:
 
 jobs:
   # Build: build Spark and run the tests for specified modules using SBT
@@ -82,7 +83,7 @@ jobs:
           sed -i 's/rootLogger.level = info/rootLogger.level = warn/g' conf/log4j2.properties
 
           # Start a Spark Connect server for local
-          PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
+          PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.9-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
             --driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
             --jars "`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
 
@@ -93,7 +94,7 @@ jobs:
           # Several tests related to catalog requires to run them sequencially, e.g., writing a table in a listener.
           ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect,pyspark-ml-connect
           # None of tests are dependent on each other in Pandas API on Spark so run them in parallel
-          ./python/run-tests --parallelism=2 --python-executables=python3 --modules pyspark-pandas-connect-part0,pyspark-pandas-connect-part1,pyspark-pandas-connect-part2,pyspark-pandas-connect-part3
+          ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-pandas-connect-part0,pyspark-pandas-connect-part1,pyspark-pandas-connect-part2,pyspark-pandas-connect-part3
 
           # Stop Spark Connect server.
           ./sbin/stop-connect-server.sh
@@ -101,7 +102,7 @@ jobs:
           mv pyspark.back python/pyspark
 
           # Start a Spark Connect server for local-cluster
-          PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
+          PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.9-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
             --master "local-cluster[2, 4, 1024]" \
             --driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
             --jars "`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
diff --git a/.github/workflows/build_python_connect35.yml b/.github/workflows/build_python_connect35.yml
index 4b7a6b82b9527..ba77f2dff75a9 100644
--- a/.github/workflows/build_python_connect35.yml
+++ b/.github/workflows/build_python_connect35.yml
@@ -22,6 +22,7 @@ name: Build / Spark Connect Python-only (master-server, 35-client, Python 3.11)
 on:
   schedule:
     - cron: '0 21 * * *'
+  workflow_dispatch:
 
 jobs:
   # Build: build Spark and run the tests for specified modules using SBT
@@ -70,7 +71,7 @@ jobs:
           pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
 
           # Add Python deps for Spark Connect.
-          pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3'
+          pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3'
 
           # Add torch as a testing dependency for TorchDistributor
           pip install 'torch==2.0.1' 'torchvision==0.15.2' torcheval
@@ -85,7 +86,7 @@ jobs:
           sed -i 's/rootLogger.level = info/rootLogger.level = warn/g' conf/log4j2.properties
 
           # Start a Spark Connect server for local
-          PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
+          PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.9-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
             --driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
             --jars "`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
 
@@ -98,7 +99,7 @@ jobs:
           # Run branch-3.5 tests
           ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect
           # None of tests are dependent on each other in Pandas API on Spark so run them in parallel
-          ./python/run-tests --parallelism=2 --python-executables=python3 --modules pyspark-pandas-connect,pyspark-pandas-slow-connect
+          ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-pandas-connect,pyspark-pandas-slow-connect
       - name: Upload test results to report
         if: always()
         uses: actions/upload-artifact@v4
diff --git a/.github/workflows/build_python_minimum.yml b/.github/workflows/build_python_minimum.yml
new file mode 100644
index 0000000000000..0efd2ad8265f7
--- /dev/null
+++ b/.github/workflows/build_python_minimum.yml
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build / Python-only (master, Python with old dependencies)"
+
+on:
+  schedule:
+    - cron: '0 9 * * *'
+  workflow_dispatch:
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 17
+      branch: master
+      hadoop: hadoop3
+      envs: >-
+        {
+          "PYSPARK_IMAGE_TO_TEST": "python-minimum",
+          "PYTHON_TO_TEST": "python3.9"
+        }
+      jobs: >-
+        {
+          "pyspark": "true"
+        }
diff --git a/.github/workflows/build_python_ps_minimum.yml b/.github/workflows/build_python_ps_minimum.yml
new file mode 100644
index 0000000000000..742d578e27418
--- /dev/null
+++ b/.github/workflows/build_python_ps_minimum.yml
@@ -0,0 +1,47 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build / Python-only (master, Python PS with old dependencies)"
+
+on:
+  schedule:
+    - cron: '0 10 * * *'
+  workflow_dispatch:
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 17
+      branch: master
+      hadoop: hadoop3
+      envs: >-
+        {
+          "PYSPARK_IMAGE_TO_TEST": "python-ps-minimum",
+          "PYTHON_TO_TEST": "python3.9"
+        }
+      jobs: >-
+        {
+          "pyspark": "true",
+          "pyspark-pandas": "true"
+        }
diff --git a/.github/workflows/build_python_pypy3.10.yml b/.github/workflows/build_python_pypy3.10.yml
index 163af2f4aec8b..0bd2ef03ce77c 100644
--- a/.github/workflows/build_python_pypy3.10.yml
+++ b/.github/workflows/build_python_pypy3.10.yml
@@ -22,6 +22,7 @@ name: "Build / Python-only (master, PyPy 3.10)"
 on:
   schedule:
     - cron: '0 15 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
@@ -36,6 +37,7 @@ jobs:
       hadoop: hadoop3
       envs: >-
         {
+          "PYSPARK_IMAGE_TO_TEST": "pypy-310",
           "PYTHON_TO_TEST": "pypy3"
         }
       jobs: >-
diff --git a/.github/workflows/build_rockdb_as_ui_backend.yml b/.github/workflows/build_rockdb_as_ui_backend.yml
index 96009c41dbbf9..1d9a079e72643 100644
--- a/.github/workflows/build_rockdb_as_ui_backend.yml
+++ b/.github/workflows/build_rockdb_as_ui_backend.yml
@@ -22,6 +22,7 @@ name: "Build / RocksDB as UI Backend (master, Hadoop 3, JDK 17, Scala 2.13)"
 on:
   schedule:
     - cron: '0 6 * * *'
+  workflow_dispatch:
 
 jobs:
   run-build:
@@ -36,6 +37,8 @@ jobs:
       hadoop: hadoop3
       envs: >-
         {
+          "PYSPARK_IMAGE_TO_TEST": "python-311",
+          "PYTHON_TO_TEST": "python3.11",
           "LIVE_UI_LOCAL_STORE_DIR": "/tmp/kvStore",
         }
       jobs: >-
diff --git a/.github/workflows/build_sparkr_window.yml b/.github/workflows/build_sparkr_window.yml
index b97251a461715..b28e81908549f 100644
--- a/.github/workflows/build_sparkr_window.yml
+++ b/.github/workflows/build_sparkr_window.yml
@@ -21,6 +21,7 @@ name: "Build / SparkR-only (master, 4.4.2, windows-2022)"
 on:
   schedule:
     - cron: '0 17 * * *'
+  workflow_dispatch:
 
 jobs:
   build:
diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml
index 6965fb4968af3..206806a7a0ed7 100644
--- a/.github/workflows/maven_test.yml
+++ b/.github/workflows/maven_test.yml
@@ -178,7 +178,7 @@ jobs:
       - name: Install Python packages (Python 3.11)
         if: (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect')
         run: |
-          python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3'
+          python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1'
           python3.11 -m pip list
       # Run the tests.
       - name: Run tests
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index 8729012c2b8d2..4bcc275064d3c 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -62,8 +62,8 @@ jobs:
         run: |
          pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
             ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 'pandas==2.2.3' 'plotly>=4.8' 'docutils<0.18.0' \
-            'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
-            'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
+            'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
+            'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
             'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
       - name: Install Ruby for documentation generation
         uses: ruby/setup-ruby@v1
diff --git a/.github/workflows/python_macos_test.yml b/.github/workflows/python_macos_test.yml
index cca133dab541a..231816750236b 100644
--- a/.github/workflows/python_macos_test.yml
+++ b/.github/workflows/python_macos_test.yml
@@ -134,7 +134,7 @@ jobs:
           python${{matrix.python}} -m pip install --ignore-installed 'blinker>=1.6.2'
           python${{matrix.python}} -m pip install --ignore-installed 'six==1.16.0'
           python${{matrix.python}} -m pip install numpy 'pyarrow>=15.0.0' 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly>=4.8' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \
-          python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \
+          python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \
           python${{matrix.python}} -m pip cache purge && \
           python${{matrix.python}} -m pip list
       # Run the tests.
diff --git a/.github/workflows/update_build_status.yml b/.github/workflows/update_build_status.yml
index d0a50b2b4aa74..542fa567dea69 100644
--- a/.github/workflows/update_build_status.yml
+++ b/.github/workflows/update_build_status.yml
@@ -72,7 +72,7 @@ jobs:
                       } catch (error) {
                         console.error(error)
                         // Run not found. This can happen when the PR author removes GitHub Actions runs or
-                        // disalbes GitHub Actions.
+                        // disables GitHub Actions.
                         continue
                       }
 
diff --git a/LICENSE-binary b/LICENSE-binary
index 40d28fbe71e6b..5cf099cb4d3c4 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -286,6 +286,10 @@ io.netty:netty-transport-classes-kqueue
 io.netty:netty-transport-native-epoll
 io.netty:netty-transport-native-kqueue
 io.netty:netty-transport-native-unix-common
+io.vertx:vertx-auth-common
+io.vertx:vertx-core
+io.vertx:vertx-web-client
+io.vertx:vertx-web-common
 jakarta.inject:jakarta.inject-api
 jakarta.validation:jakarta.validation-api
 javax.jdo:jdo-api
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 9c825a99be180..e320981783ecc 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2922,7 +2922,7 @@ setClassUnion("characterOrstructTypeOrColumn", c("character", "structType", "Col
 #' @details
 #' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType}
 #' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set
-#' to \code{TRUE}. If the string is unparseable, the Column will contain the value NA.
+#' to \code{TRUE}. If the string is unparsable, the Column will contain the value NA.
 #'
 #' @rdname column_collection_functions
 #' @param as.json.array indicating if input string is JSON array of objects or a single object.
@@ -3004,7 +3004,7 @@ setMethod("schema_of_json", signature(x = "characterOrColumn"),
 #' @details
 #' \code{from_csv}: Parses a column containing a CSV string into a Column of \code{structType}
 #' with the specified \code{schema}.
-#' If the string is unparseable, the Column will contain the value NA.
+#' If the string is unparsable, the Column will contain the value NA.
 #'
 #' @rdname column_collection_functions
 #' @aliases from_csv from_csv,Column,characterOrstructTypeOrColumn-method
diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R
index 61e174de9ac56..4ccec991bb07b 100644
--- a/R/pkg/R/serialize.R
+++ b/R/pkg/R/serialize.R
@@ -60,7 +60,7 @@ writeObject <- function(con, object, writeType = TRUE) {
   if (type %in% c("integer", "character", "logical", "double", "numeric")) {
     if (is.na(object[[1]])) {
       # Uses the first element for now to keep the behavior same as R before
-      # 4.2.0. This is wrong because we should differenciate c(NA) from a
+      # 4.2.0. This is wrong because we should differentiate c(NA) from a
       # single NA as the former means array(null) and the latter means null
       # in Spark SQL. However, it requires non-trivial comparison to distinguish
       # both in R. We should ideally fix this.
diff --git a/bin/pyspark b/bin/pyspark
index 2f08f78369159..650d913eea028 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -77,7 +77,7 @@ fi
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.9-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index 232813b4ffdd6..9f55d772a25cf 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
 )
 
 set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.7-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.9-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
diff --git a/binder/Dockerfile b/binder/Dockerfile
index 6e3dd9155fb7a..2d5c30a9a92e0 100644
--- a/binder/Dockerfile
+++ b/binder/Dockerfile
@@ -22,8 +22,8 @@ RUN  pip install --no-cache notebook jupyterlab
 # create user with a home directory
 ARG NB_USER
 ARG NB_UID
-ENV USER ${NB_USER}
-ENV HOME /home/${NB_USER}
+ENV USER=${NB_USER}
+ENV HOME=/home/${NB_USER}
 
 RUN adduser --disabled-password \
     --gecos "Default user" \
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
index 02a38eac5b409..6e9bd548f5327 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
@@ -251,17 +251,17 @@ AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
             // Higher shuffleMergeId seen for the shuffle ID meaning new stage attempt is being
             // run for the shuffle ID. Close and clean up old shuffleMergeId files,
             // happens in the indeterminate stage retries
-            AppAttemptShuffleMergeId currrentAppAttemptShuffleMergeId =
+            AppAttemptShuffleMergeId currentAppAttemptShuffleMergeId =
                 new AppAttemptShuffleMergeId(appShuffleInfo.appId, appShuffleInfo.attemptId,
                     shuffleId, latestShuffleMergeId);
             logger.info("{}: creating a new shuffle merge metadata since received " +
               "shuffleMergeId {} is higher than latest shuffleMergeId {}",
               MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$,
-                currrentAppAttemptShuffleMergeId),
+                currentAppAttemptShuffleMergeId),
               MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, shuffleMergeId),
               MDC.of(LogKeys.LATEST_SHUFFLE_MERGE_ID$.MODULE$, latestShuffleMergeId));
             submitCleanupTask(() ->
-                closeAndDeleteOutdatedPartitions(currrentAppAttemptShuffleMergeId,
+                closeAndDeleteOutdatedPartitions(currentAppAttemptShuffleMergeId,
                     mergePartitionsInfo.shuffleMergePartitions));
             return new AppShuffleMergePartitionsInfo(shuffleMergeId, false);
           } else {
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
index 4064f830e92d8..81448dc95a374 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
@@ -415,18 +415,6 @@ private static Collation fetchCollation(int collationId) {
         }
       }
 
-      /**
-       * Method for constructing errors thrown on providing invalid collation name.
-       */
-      protected static SparkException collationInvalidNameException(String collationName) {
-        Map<String, String> params = new HashMap<>();
-        final int maxSuggestions = 3;
-        params.put("collationName", collationName);
-        params.put("proposals", getClosestSuggestionsOnInvalidName(collationName, maxSuggestions));
-        return new SparkException("COLLATION_INVALID_NAME",
-          SparkException.constructMessageParams(params), null);
-      }
-
       private static int collationNameToId(String collationName) throws SparkException {
         // Collation names provided by user are treated as case-insensitive.
         String collationNameUpper = collationName.toUpperCase();
@@ -479,9 +467,6 @@ private enum CaseSensitivity {
        */
       private static final int CASE_SENSITIVITY_MASK = 0b1;
 
-      private static final String UTF8_BINARY_COLLATION_NAME = "UTF8_BINARY";
-      private static final String UTF8_LCASE_COLLATION_NAME = "UTF8_LCASE";
-
       private static final int UTF8_BINARY_COLLATION_ID =
         new CollationSpecUTF8(CaseSensitivity.UNSPECIFIED, SpaceTrimming.NONE).collationId;
       private static final int UTF8_LCASE_COLLATION_ID =
@@ -667,9 +652,9 @@ protected CollationMeta buildCollationMeta() {
       protected String normalizedCollationName() {
         StringBuilder builder = new StringBuilder();
         if(caseSensitivity == CaseSensitivity.UNSPECIFIED){
-          builder.append(UTF8_BINARY_COLLATION_NAME);
+          builder.append(CollationNames.UTF8_BINARY);
         } else{
-          builder.append(UTF8_LCASE_COLLATION_NAME);
+          builder.append(CollationNames.UTF8_LCASE);
         }
         if (spaceTrimming != SpaceTrimming.NONE) {
           builder.append('_');
@@ -681,12 +666,12 @@ protected String normalizedCollationName() {
       static List<CollationIdentifier> listCollations() {
         CollationIdentifier UTF8_BINARY_COLLATION_IDENT = new CollationIdentifier(
             PROVIDER_SPARK,
-            UTF8_BINARY_COLLATION_NAME,
+            CollationNames.UTF8_BINARY,
             CollationSpecICU.ICU_VERSION
         );
         CollationIdentifier UTF8_LCASE_COLLATION_IDENT = new CollationIdentifier(
             PROVIDER_SPARK,
-            UTF8_LCASE_COLLATION_NAME,
+            CollationNames.UTF8_LCASE,
             CollationSpecICU.ICU_VERSION
         );
         return Arrays.asList(UTF8_BINARY_COLLATION_IDENT, UTF8_LCASE_COLLATION_IDENT);
@@ -770,7 +755,7 @@ private enum AccentSensitivity {
         VersionInfo.ICU_VERSION.getMinor());
 
       static {
-        ICULocaleMap.put("UNICODE", ULocale.ROOT);
+        ICULocaleMap.put(CollationNames.UNICODE, ULocale.ROOT);
         // ICU-implemented `ULocale`s which have corresponding `Collator` installed.
         ULocale[] locales = Collator.getAvailableULocales();
         // Build locale names in format: language["_" optional script]["_" optional country code].
@@ -818,13 +803,13 @@ private enum AccentSensitivity {
       }
 
       private static final int UNICODE_COLLATION_ID = new CollationSpecICU(
-        "UNICODE",
+        CollationNames.UNICODE,
         CaseSensitivity.CS,
         AccentSensitivity.AS,
         SpaceTrimming.NONE).collationId;
 
       private static final int UNICODE_CI_COLLATION_ID = new CollationSpecICU(
-        "UNICODE",
+        CollationNames.UNICODE,
         CaseSensitivity.CI,
         AccentSensitivity.AS,
         SpaceTrimming.NONE).collationId;
@@ -1185,6 +1170,52 @@ public static int collationNameToId(String collationName) throws SparkException
     return Collation.CollationSpec.collationNameToId(collationName);
   }
 
+  /**
+   * Returns the resolved fully qualified collation name.
+   */
+  public static String resolveFullyQualifiedName(String[] collationName) throws SparkException {
+    // If collation name has only one part, then we don't need to do any name resolution.
+    if (collationName.length == 1) return collationName[0];
+    else {
+      // Currently we only support builtin collation names with fixed catalog `SYSTEM` and
+      // schema `BUILTIN`.
+      if (collationName.length != 3 ||
+          !CollationFactory.CATALOG.equalsIgnoreCase(collationName[0]) ||
+          !CollationFactory.SCHEMA.equalsIgnoreCase(collationName[1])) {
+        // Throw exception with original (before case conversion) collation name.
+        throw CollationFactory.collationInvalidNameException(
+            collationName.length != 0 ? collationName[collationName.length - 1] : "");
+      }
+      return collationName[2];
+    }
+  }
+
+  /**
+   * Method for constructing errors thrown on providing invalid collation name.
+   */
+  public static SparkException collationInvalidNameException(String collationName) {
+    Map<String, String> params = new HashMap<>();
+    final int maxSuggestions = 3;
+    params.put("collationName", collationName);
+    params.put("proposals", getClosestSuggestionsOnInvalidName(collationName, maxSuggestions));
+    return new SparkException("COLLATION_INVALID_NAME",
+        SparkException.constructMessageParams(params), null);
+  }
+
+
+
+  /**
+   * Returns the fully qualified collation name for the given collation ID.
+   */
+  public static String fullyQualifiedName(int collationId) {
+    Collation.CollationSpec.DefinitionOrigin definitionOrigin =
+        Collation.CollationSpec.getDefinitionOrigin(collationId);
+    // Currently only predefined collations are supported.
+    assert definitionOrigin == Collation.CollationSpec.DefinitionOrigin.PREDEFINED;
+    return String.format("%s.%s.%s", CATALOG, SCHEMA,
+      Collation.CollationSpec.fetchCollation(collationId).collationName);
+  }
+
   public static boolean isCaseInsensitive(int collationId) {
     return Collation.CollationSpecICU.fromCollationId(collationId).caseSensitivity ==
             Collation.CollationSpecICU.CaseSensitivity.CI;
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationNames.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationNames.java
new file mode 100644
index 0000000000000..11e9e1a87e713
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationNames.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util;
+
+public class CollationNames {
+  public static final String UTF8_BINARY = "UTF8_BINARY";
+  public static final String UTF8_LCASE = "UTF8_LCASE";
+  public static final String UNICODE = "UNICODE";
+  public static final String UNICODE_CI = "UNICODE_CI";
+}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
index aae47aa963201..f12408fb49313 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
@@ -135,27 +135,57 @@ public static byte[] subStringSQL(byte[] bytes, int pos, int len) {
     return Arrays.copyOfRange(bytes, start, end);
   }
 
+  /**
+   * Concatenate multiple byte arrays into one.
+   * If one of the inputs is null then null will be returned.
+   *
+   * @param inputs byte arrays to concatenate
+   * @return the concatenated byte array or null if one of the arguments is null
+   */
   public static byte[] concat(byte[]... inputs) {
+    return concatWS(EMPTY_BYTE, inputs);
+  }
+
+  /**
+   * Concatenate multiple byte arrays with a given delimiter.
+   * If the delimiter or one of the inputs is null then null will be returned.
+   *
+   * @param delimiter byte array to be placed between each input
+   * @param inputs    byte arrays to concatenate
+   * @return the concatenated byte array or null if one of the arguments is null
+   */
+  public static byte[] concatWS(byte[] delimiter, byte[]... inputs) {
+    if (delimiter == null) {
+      return null;
+    }
     // Compute the total length of the result
     long totalLength = 0;
     for (byte[] input : inputs) {
       if (input != null) {
-        totalLength += input.length;
+        totalLength += input.length + delimiter.length;
       } else {
         return null;
       }
     }
-
+    if (totalLength > 0) totalLength -= delimiter.length;
     // Allocate a new byte array, and copy the inputs one by one into it
     final byte[] result = new byte[Ints.checkedCast(totalLength)];
     int offset = 0;
-    for (byte[] input : inputs) {
+    for (int i = 0; i < inputs.length; i++) {
+      byte[] input = inputs[i];
       int len = input.length;
       Platform.copyMemory(
         input, Platform.BYTE_ARRAY_OFFSET,
         result, Platform.BYTE_ARRAY_OFFSET + offset,
         len);
       offset += len;
+      if (delimiter.length > 0 && i < inputs.length - 1) {
+        Platform.copyMemory(
+          delimiter, Platform.BYTE_ARRAY_OFFSET,
+          result, Platform.BYTE_ARRAY_OFFSET + offset,
+          delimiter.length);
+        offset += delimiter.length;
+      }
     }
     return result;
   }
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/array/ByteArraySuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/array/ByteArraySuite.java
index aff619175ff7b..5e221b4e359d4 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/array/ByteArraySuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/array/ByteArraySuite.java
@@ -67,4 +67,59 @@ public void testCompareBinary() {
     byte[] y4 = new byte[]{(byte) 100, (byte) 200};
     Assertions.assertEquals(0, ByteArray.compareBinary(x4, y4));
   }
+
+  @Test
+  public void testConcat() {
+    byte[] x1 = new byte[]{(byte) 1, (byte) 2, (byte) 3};
+    byte[] y1 = new byte[]{(byte) 4, (byte) 5, (byte) 6};
+    byte[] result1 = ByteArray.concat(x1, y1);
+    byte[] expected1 = new byte[]{(byte) 1, (byte) 2, (byte) 3, (byte) 4, (byte) 5, (byte) 6};
+    Assertions.assertArrayEquals(expected1, result1);
+
+    byte[] x2 = new byte[]{(byte) 1, (byte) 2, (byte) 3};
+    byte[] y2 = new byte[0];
+    byte[] result2 = ByteArray.concat(x2, y2);
+    byte[] expected2 = new byte[]{(byte) 1, (byte) 2, (byte) 3};
+    Assertions.assertArrayEquals(expected2, result2);
+
+    byte[] x3 = new byte[0];
+    byte[] y3 = new byte[]{(byte) 4, (byte) 5, (byte) 6};
+    byte[] result3 = ByteArray.concat(x3, y3);
+    byte[] expected3 = new byte[]{(byte) 4, (byte) 5, (byte) 6};
+    Assertions.assertArrayEquals(expected3, result3);
+
+    byte[] x4 = new byte[]{(byte) 1, (byte) 2, (byte) 3};
+    byte[] y4 = null;
+    byte[] result4 = ByteArray.concat(x4, y4);
+    Assertions.assertArrayEquals(null, result4);
+  }
+
+  @Test
+  public void testConcatWS() {
+    byte[] separator = new byte[]{(byte) 42};
+
+    byte[] x1 = new byte[]{(byte) 1, (byte) 2, (byte) 3};
+    byte[] y1 = new byte[]{(byte) 4, (byte) 5, (byte) 6};
+    byte[] result1 = ByteArray.concatWS(separator, x1, y1);
+    byte[] expected1 = new byte[]{(byte) 1, (byte) 2, (byte) 3, (byte) 42,
+            (byte) 4, (byte) 5, (byte) 6};
+    Assertions.assertArrayEquals(expected1, result1);
+
+    byte[] x2 = new byte[]{(byte) 1, (byte) 2, (byte) 3};
+    byte[] y2 = new byte[0];
+    byte[] result2 = ByteArray.concatWS(separator, x2, y2);
+    byte[] expected2 = new byte[]{(byte) 1, (byte) 2, (byte) 3, (byte) 42};
+    Assertions.assertArrayEquals(expected2, result2);
+
+    byte[] x3 = new byte[0];
+    byte[] y3 = new byte[]{(byte) 4, (byte) 5, (byte) 6};
+    byte[] result3 = ByteArray.concatWS(separator, x3, y3);
+    byte[] expected3 = new byte[]{(byte) 42, (byte) 4, (byte) 5, (byte) 6};
+    Assertions.assertArrayEquals(expected3, result3);
+
+    byte[] x4 = new byte[]{(byte) 1, (byte) 2, (byte) 3};
+    byte[] y4 = null;
+    byte[] result4 = ByteArray.concatWS(separator, x4, y4);
+    Assertions.assertArrayEquals(null, result4);
+  }
 }
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java
index a696da8cf45b8..1db163c1c822d 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java
@@ -26,6 +26,7 @@
 import java.util.Map;
 
 import static org.junit.jupiter.api.Assertions.*;
+import static org.apache.spark.sql.catalyst.util.CollationNames.*;
 
 // checkstyle.off: AvoidEscapedUnicodeCharacters
 public class CollationSupportSuite {
@@ -37,7 +38,7 @@ public class CollationSupportSuite {
    * the specified collations (as often seen in some pass-through Spark expressions).
    */
   private final String[] testSupportedCollations =
-    {"UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI"};
+    {UTF8_BINARY, UTF8_LCASE, UNICODE, UNICODE_CI};
 
   /**
    * Collation-aware UTF8String comparison and equality check.
@@ -86,82 +87,82 @@ public void testCompare() throws SparkException {
       assertCompare("a", "ä", collationName, -1);
     }
     // Advanced tests.
-    assertCompare("äü", "bü", "UTF8_BINARY", 1);
-    assertCompare("bxx", "bü", "UTF8_BINARY", -1);
-    assertCompare("äü", "bü", "UTF8_LCASE", 1);
-    assertCompare("bxx", "bü", "UTF8_LCASE", -1);
-    assertCompare("äü", "bü", "UNICODE", -1);
-    assertCompare("bxx", "bü", "UNICODE", 1);
-    assertCompare("äü", "bü", "UNICODE_CI", -1);
-    assertCompare("bxx", "bü", "UNICODE_CI", 1);
+    assertCompare("äü", "bü", UTF8_BINARY, 1);
+    assertCompare("bxx", "bü", UTF8_BINARY, -1);
+    assertCompare("äü", "bü", UTF8_LCASE, 1);
+    assertCompare("bxx", "bü", UTF8_LCASE, -1);
+    assertCompare("äü", "bü", UNICODE, -1);
+    assertCompare("bxx", "bü", UNICODE, 1);
+    assertCompare("äü", "bü", UNICODE_CI, -1);
+    assertCompare("bxx", "bü", UNICODE_CI, 1);
     assertCompare("cČć", "ČćC", "SR_CI_AI", 0);
     // Case variation.
-    assertCompare("AbCd", "aBcD", "UTF8_BINARY", -1);
-    assertCompare("ABCD", "abcd", "UTF8_LCASE", 0);
-    assertCompare("AbcD", "aBCd", "UNICODE", 1);
-    assertCompare("abcd", "ABCD", "UNICODE_CI", 0);
+    assertCompare("AbCd", "aBcD", UTF8_BINARY, -1);
+    assertCompare("ABCD", "abcd", UTF8_LCASE, 0);
+    assertCompare("AbcD", "aBCd", UNICODE, 1);
+    assertCompare("abcd", "ABCD", UNICODE_CI, 0);
     // Accent variation.
-    assertCompare("aBćD", "ABĆD", "UTF8_BINARY", 1);
-    assertCompare("AbCδ", "ABCΔ", "UTF8_LCASE", 0);
-    assertCompare("äBCd", "ÄBCD", "UNICODE", -1);
-    assertCompare("Ab́cD", "AB́CD", "UNICODE_CI", 0);
+    assertCompare("aBćD", "ABĆD", UTF8_BINARY, 1);
+    assertCompare("AbCδ", "ABCΔ", UTF8_LCASE, 0);
+    assertCompare("äBCd", "ÄBCD", UNICODE, -1);
+    assertCompare("Ab́cD", "AB́CD", UNICODE_CI, 0);
     assertCompare("ÈÉÊË", "EeEe", "AF_CI_AI", 0);
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertCompare("i\u0307", "İ", "UTF8_BINARY", -1);
-    assertCompare("İ", "i\u0307", "UTF8_BINARY", 1);
-    assertCompare("i\u0307", "İ", "UTF8_LCASE", 0);
-    assertCompare("İ", "i\u0307", "UTF8_LCASE", 0);
-    assertCompare("i\u0307", "İ", "UNICODE", -1);
-    assertCompare("İ", "i\u0307", "UNICODE", 1);
-    assertCompare("i\u0307", "İ", "UNICODE_CI", 0);
-    assertCompare("İ", "i\u0307", "UNICODE_CI", 0);
-    assertCompare("i\u0307İ", "i\u0307İ", "UTF8_LCASE", 0);
-    assertCompare("i\u0307İ", "İi\u0307", "UTF8_LCASE", 0);
-    assertCompare("İi\u0307", "i\u0307İ", "UTF8_LCASE", 0);
-    assertCompare("İi\u0307", "İi\u0307", "UTF8_LCASE", 0);
-    assertCompare("i\u0307İ", "i\u0307İ", "UNICODE_CI", 0);
-    assertCompare("i\u0307İ", "İi\u0307", "UNICODE_CI", 0);
-    assertCompare("İi\u0307", "i\u0307İ", "UNICODE_CI", 0);
-    assertCompare("İi\u0307", "İi\u0307", "UNICODE_CI", 0);
+    assertCompare("i\u0307", "İ", UTF8_BINARY, -1);
+    assertCompare("İ", "i\u0307", UTF8_BINARY, 1);
+    assertCompare("i\u0307", "İ", UTF8_LCASE, 0);
+    assertCompare("İ", "i\u0307", UTF8_LCASE, 0);
+    assertCompare("i\u0307", "İ", UNICODE, -1);
+    assertCompare("İ", "i\u0307", UNICODE, 1);
+    assertCompare("i\u0307", "İ", UNICODE_CI, 0);
+    assertCompare("İ", "i\u0307", UNICODE_CI, 0);
+    assertCompare("i\u0307İ", "i\u0307İ", UTF8_LCASE, 0);
+    assertCompare("i\u0307İ", "İi\u0307", UTF8_LCASE, 0);
+    assertCompare("İi\u0307", "i\u0307İ", UTF8_LCASE, 0);
+    assertCompare("İi\u0307", "İi\u0307", UTF8_LCASE, 0);
+    assertCompare("i\u0307İ", "i\u0307İ", UNICODE_CI, 0);
+    assertCompare("i\u0307İ", "İi\u0307", UNICODE_CI, 0);
+    assertCompare("İi\u0307", "i\u0307İ", UNICODE_CI, 0);
+    assertCompare("İi\u0307", "İi\u0307", UNICODE_CI, 0);
     // Conditional case mapping (e.g. Greek sigmas).
-    assertCompare("ς", "σ", "UTF8_BINARY", -1);
-    assertCompare("ς", "Σ", "UTF8_BINARY", 1);
-    assertCompare("σ", "Σ", "UTF8_BINARY", 1);
-    assertCompare("ς", "σ", "UTF8_LCASE", 0);
-    assertCompare("ς", "Σ", "UTF8_LCASE", 0);
-    assertCompare("σ", "Σ", "UTF8_LCASE", 0);
-    assertCompare("ς", "σ", "UNICODE", 1);
-    assertCompare("ς", "Σ", "UNICODE", 1);
-    assertCompare("σ", "Σ", "UNICODE", -1);
-    assertCompare("ς", "σ", "UNICODE_CI", 0);
-    assertCompare("ς", "Σ", "UNICODE_CI", 0);
-    assertCompare("σ", "Σ", "UNICODE_CI", 0);
+    assertCompare("ς", "σ", UTF8_BINARY, -1);
+    assertCompare("ς", "Σ", UTF8_BINARY, 1);
+    assertCompare("σ", "Σ", UTF8_BINARY, 1);
+    assertCompare("ς", "σ", UTF8_LCASE, 0);
+    assertCompare("ς", "Σ", UTF8_LCASE, 0);
+    assertCompare("σ", "Σ", UTF8_LCASE, 0);
+    assertCompare("ς", "σ", UNICODE, 1);
+    assertCompare("ς", "Σ", UNICODE, 1);
+    assertCompare("σ", "Σ", UNICODE, -1);
+    assertCompare("ς", "σ", UNICODE_CI, 0);
+    assertCompare("ς", "Σ", UNICODE_CI, 0);
+    assertCompare("σ", "Σ", UNICODE_CI, 0);
     // Surrogate pairs.
-    assertCompare("a🙃b🙃c", "aaaaa", "UTF8_BINARY", 1);
-    assertCompare("a🙃b🙃c", "aaaaa", "UTF8_LCASE", 1);
-    assertCompare("a🙃b🙃c", "aaaaa", "UNICODE", -1); // != UTF8_BINARY
-    assertCompare("a🙃b🙃c", "aaaaa", "UNICODE_CI", -1); // != UTF8_LCASE
-    assertCompare("a🙃b🙃c", "a🙃b🙃c", "UTF8_BINARY", 0);
-    assertCompare("a🙃b🙃c", "a🙃b🙃c", "UTF8_LCASE", 0);
-    assertCompare("a🙃b🙃c", "a🙃b🙃c", "UNICODE", 0);
-    assertCompare("a🙃b🙃c", "a🙃b🙃c", "UNICODE_CI", 0);
-    assertCompare("a🙃b🙃c", "a🙃b🙃d", "UTF8_BINARY", -1);
-    assertCompare("a🙃b🙃c", "a🙃b🙃d", "UTF8_LCASE", -1);
-    assertCompare("a🙃b🙃c", "a🙃b🙃d", "UNICODE", -1);
-    assertCompare("a🙃b🙃c", "a🙃b🙃d", "UNICODE_CI", -1);
+    assertCompare("a🙃b🙃c", "aaaaa", UTF8_BINARY, 1);
+    assertCompare("a🙃b🙃c", "aaaaa", UTF8_LCASE, 1);
+    assertCompare("a🙃b🙃c", "aaaaa", UNICODE, -1); // != UTF8_BINARY
+    assertCompare("a🙃b🙃c", "aaaaa", UNICODE_CI, -1); // != UTF8_LCASE
+    assertCompare("a🙃b🙃c", "a🙃b🙃c", UTF8_BINARY, 0);
+    assertCompare("a🙃b🙃c", "a🙃b🙃c", UTF8_LCASE, 0);
+    assertCompare("a🙃b🙃c", "a🙃b🙃c", UNICODE, 0);
+    assertCompare("a🙃b🙃c", "a🙃b🙃c", UNICODE_CI, 0);
+    assertCompare("a🙃b🙃c", "a🙃b🙃d", UTF8_BINARY, -1);
+    assertCompare("a🙃b🙃c", "a🙃b🙃d", UTF8_LCASE, -1);
+    assertCompare("a🙃b🙃c", "a🙃b🙃d", UNICODE, -1);
+    assertCompare("a🙃b🙃c", "a🙃b🙃d", UNICODE_CI, -1);
     // Maximum code point.
     int maxCodePoint = Character.MAX_CODE_POINT;
     String maxCodePointStr = new String(Character.toChars(maxCodePoint));
     for (int i = 0; i < maxCodePoint && Character.isValidCodePoint(i); ++i) {
-      assertCompare(new String(Character.toChars(i)), maxCodePointStr, "UTF8_BINARY", -1);
-      assertCompare(new String(Character.toChars(i)), maxCodePointStr, "UTF8_LCASE", -1);
+      assertCompare(new String(Character.toChars(i)), maxCodePointStr, UTF8_BINARY, -1);
+      assertCompare(new String(Character.toChars(i)), maxCodePointStr, UTF8_LCASE, -1);
     }
     // Minimum code point.
     int minCodePoint = Character.MIN_CODE_POINT;
     String minCodePointStr = new String(Character.toChars(minCodePoint));
     for (int i = minCodePoint + 1; i <= maxCodePoint && Character.isValidCodePoint(i); ++i) {
-      assertCompare(new String(Character.toChars(i)), minCodePointStr, "UTF8_BINARY", 1);
-      assertCompare(new String(Character.toChars(i)), minCodePointStr, "UTF8_LCASE", 1);
+      assertCompare(new String(Character.toChars(i)), minCodePointStr, UTF8_BINARY, 1);
+      assertCompare(new String(Character.toChars(i)), minCodePointStr, UTF8_LCASE, 1);
     }
   }
 
@@ -302,201 +303,201 @@ public void testContains() throws SparkException {
       assertContains("Здраво", "Здраво", collationName, true);
     }
     // Advanced tests.
-    assertContains("abcde", "bcd", "UTF8_BINARY", true);
-    assertContains("abcde", "bde", "UTF8_BINARY", false);
-    assertContains("abcde", "fgh", "UTF8_BINARY", false);
-    assertContains("abcde", "abcde", "UNICODE", true);
-    assertContains("abcde", "aBcDe", "UNICODE", false);
-    assertContains("abcde", "fghij", "UNICODE", false);
-    assertContains("abcde", "C", "UTF8_LCASE", true);
-    assertContains("abcde", "AbCdE", "UTF8_LCASE", true);
-    assertContains("abcde", "X", "UTF8_LCASE", false);
-    assertContains("abcde", "c", "UNICODE_CI", true);
-    assertContains("abcde", "bCD", "UNICODE_CI", true);
-    assertContains("abcde", "123", "UNICODE_CI", false);
-    assertContains("ab世De", "b世D", "UTF8_BINARY", true);
-    assertContains("ab世De", "B世d", "UTF8_BINARY", false);
-    assertContains("äbćδe", "bćδ", "UTF8_BINARY", true);
-    assertContains("äbćδe", "BcΔ", "UTF8_BINARY", false);
-    assertContains("ab世De", "ab世De", "UNICODE", true);
-    assertContains("ab世De", "AB世dE", "UNICODE", false);
-    assertContains("äbćδe", "äbćδe", "UNICODE", true);
-    assertContains("äbćδe", "ÄBcΔÉ", "UNICODE", false);
-    assertContains("ab世De", "b世D", "UTF8_LCASE", true);
-    assertContains("ab世De", "B世d", "UTF8_LCASE", true);
-    assertContains("äbćδe", "bćδ", "UTF8_LCASE", true);
-    assertContains("äbćδe", "BcΔ", "UTF8_LCASE", false);
-    assertContains("ab世De", "ab世De", "UNICODE_CI", true);
-    assertContains("ab世De", "AB世dE", "UNICODE_CI", true);
-    assertContains("äbćδe", "ÄbćδE", "UNICODE_CI", true);
-    assertContains("äbćδe", "ÄBcΔÉ", "UNICODE_CI", false);
-    assertContains("The Kelvin.", "Kelvin", "UTF8_LCASE", true);
-    assertContains("The Kelvin.", "Kelvin", "UTF8_LCASE", true);
-    assertContains("The KKelvin.", "KKelvin", "UTF8_LCASE", true);
-    assertContains("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true);
-    assertContains("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true);
-    assertContains("The KKelvin.", "KKelvin,", "UTF8_LCASE", false);
+    assertContains("abcde", "bcd", UTF8_BINARY, true);
+    assertContains("abcde", "bde", UTF8_BINARY, false);
+    assertContains("abcde", "fgh", UTF8_BINARY, false);
+    assertContains("abcde", "abcde", UNICODE, true);
+    assertContains("abcde", "aBcDe", UNICODE, false);
+    assertContains("abcde", "fghij", UNICODE, false);
+    assertContains("abcde", "C", UTF8_LCASE, true);
+    assertContains("abcde", "AbCdE", UTF8_LCASE, true);
+    assertContains("abcde", "X", UTF8_LCASE, false);
+    assertContains("abcde", "c", UNICODE_CI, true);
+    assertContains("abcde", "bCD", UNICODE_CI, true);
+    assertContains("abcde", "123", UNICODE_CI, false);
+    assertContains("ab世De", "b世D", UTF8_BINARY, true);
+    assertContains("ab世De", "B世d", UTF8_BINARY, false);
+    assertContains("äbćδe", "bćδ", UTF8_BINARY, true);
+    assertContains("äbćδe", "BcΔ", UTF8_BINARY, false);
+    assertContains("ab世De", "ab世De", UNICODE, true);
+    assertContains("ab世De", "AB世dE", UNICODE, false);
+    assertContains("äbćδe", "äbćδe", UNICODE, true);
+    assertContains("äbćδe", "ÄBcΔÉ", UNICODE, false);
+    assertContains("ab世De", "b世D", UTF8_LCASE, true);
+    assertContains("ab世De", "B世d", UTF8_LCASE, true);
+    assertContains("äbćδe", "bćδ", UTF8_LCASE, true);
+    assertContains("äbćδe", "BcΔ", UTF8_LCASE, false);
+    assertContains("ab世De", "ab世De", UNICODE_CI, true);
+    assertContains("ab世De", "AB世dE", UNICODE_CI, true);
+    assertContains("äbćδe", "ÄbćδE", UNICODE_CI, true);
+    assertContains("äbćδe", "ÄBcΔÉ", UNICODE_CI, false);
+    assertContains("The Kelvin.", "Kelvin", UTF8_LCASE, true);
+    assertContains("The Kelvin.", "Kelvin", UTF8_LCASE, true);
+    assertContains("The KKelvin.", "KKelvin", UTF8_LCASE, true);
+    assertContains("2 Kelvin.", "2 Kelvin", UTF8_LCASE, true);
+    assertContains("2 Kelvin.", "2 Kelvin", UTF8_LCASE, true);
+    assertContains("The KKelvin.", "KKelvin,", UTF8_LCASE, false);
     assertContains("abčćd", "ABCCD", "SR_CI_AI", true);
     // Case variation.
-    assertContains("aBcDe", "bcd", "UTF8_BINARY", false);
-    assertContains("aBcDe", "BcD", "UTF8_BINARY", true);
-    assertContains("aBcDe", "abcde", "UNICODE", false);
-    assertContains("aBcDe", "aBcDe", "UNICODE", true);
-    assertContains("aBcDe", "bcd", "UTF8_LCASE", true);
-    assertContains("aBcDe", "BCD", "UTF8_LCASE", true);
-    assertContains("aBcDe", "abcde", "UNICODE_CI", true);
-    assertContains("aBcDe", "AbCdE", "UNICODE_CI", true);
+    assertContains("aBcDe", "bcd", UTF8_BINARY, false);
+    assertContains("aBcDe", "BcD", UTF8_BINARY, true);
+    assertContains("aBcDe", "abcde", UNICODE, false);
+    assertContains("aBcDe", "aBcDe", UNICODE, true);
+    assertContains("aBcDe", "bcd", UTF8_LCASE, true);
+    assertContains("aBcDe", "BCD", UTF8_LCASE, true);
+    assertContains("aBcDe", "abcde", UNICODE_CI, true);
+    assertContains("aBcDe", "AbCdE", UNICODE_CI, true);
     // Accent variation.
-    assertContains("aBcDe", "bćd", "UTF8_BINARY", false);
-    assertContains("aBcDe", "BćD", "UTF8_BINARY", false);
-    assertContains("aBcDe", "abćde", "UNICODE", false);
-    assertContains("aBcDe", "aBćDe", "UNICODE", false);
-    assertContains("aBcDe", "bćd", "UTF8_LCASE", false);
-    assertContains("aBcDe", "BĆD", "UTF8_LCASE", false);
-    assertContains("aBcDe", "abćde", "UNICODE_CI", false);
-    assertContains("aBcDe", "AbĆdE", "UNICODE_CI", false);
+    assertContains("aBcDe", "bćd", UTF8_BINARY, false);
+    assertContains("aBcDe", "BćD", UTF8_BINARY, false);
+    assertContains("aBcDe", "abćde", UNICODE, false);
+    assertContains("aBcDe", "aBćDe", UNICODE, false);
+    assertContains("aBcDe", "bćd", UTF8_LCASE, false);
+    assertContains("aBcDe", "BĆD", UTF8_LCASE, false);
+    assertContains("aBcDe", "abćde", UNICODE_CI, false);
+    assertContains("aBcDe", "AbĆdE", UNICODE_CI, false);
     assertContains("abEEE", "Bèêë", "AF_CI_AI", true);
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertContains("i\u0307", "i", "UNICODE_CI", false);
-    assertContains("i\u0307", "\u0307", "UNICODE_CI", false);
-    assertContains("i\u0307", "İ", "UNICODE_CI", true);
-    assertContains("İ", "i", "UNICODE_CI", false);
-    assertContains("adi̇os", "io", "UNICODE_CI", false);
-    assertContains("adi̇os", "Io", "UNICODE_CI", false);
-    assertContains("adi̇os", "i\u0307o", "UNICODE_CI", true);
-    assertContains("adi̇os", "İo", "UNICODE_CI", true);
-    assertContains("adİos", "io", "UNICODE_CI", false);
-    assertContains("adİos", "Io", "UNICODE_CI", false);
-    assertContains("adİos", "i\u0307o", "UNICODE_CI", true);
-    assertContains("adİos", "İo", "UNICODE_CI", true);
-    assertContains("i\u0307", "i", "UTF8_LCASE", true); // != UNICODE_CI
-    assertContains("İ", "\u0307", "UTF8_LCASE", false);
-    assertContains("İ", "i", "UTF8_LCASE", false);
-    assertContains("i\u0307", "\u0307", "UTF8_LCASE", true); // != UNICODE_CI
-    assertContains("i\u0307", "İ", "UTF8_LCASE", true);
-    assertContains("İ", "i", "UTF8_LCASE", false);
-    assertContains("adi̇os", "io", "UTF8_LCASE", false);
-    assertContains("adi̇os", "Io", "UTF8_LCASE", false);
-    assertContains("adi̇os", "i\u0307o", "UTF8_LCASE", true);
-    assertContains("adi̇os", "İo", "UTF8_LCASE", true);
-    assertContains("adİos", "io", "UTF8_LCASE", false);
-    assertContains("adİos", "Io", "UTF8_LCASE", false);
-    assertContains("adİos", "i\u0307o", "UTF8_LCASE", true);
-    assertContains("adİos", "İo", "UTF8_LCASE", true);
+    assertContains("i\u0307", "i", UNICODE_CI, false);
+    assertContains("i\u0307", "\u0307", UNICODE_CI, false);
+    assertContains("i\u0307", "İ", UNICODE_CI, true);
+    assertContains("İ", "i", UNICODE_CI, false);
+    assertContains("adi̇os", "io", UNICODE_CI, false);
+    assertContains("adi̇os", "Io", UNICODE_CI, false);
+    assertContains("adi̇os", "i\u0307o", UNICODE_CI, true);
+    assertContains("adi̇os", "İo", UNICODE_CI, true);
+    assertContains("adİos", "io", UNICODE_CI, false);
+    assertContains("adİos", "Io", UNICODE_CI, false);
+    assertContains("adİos", "i\u0307o", UNICODE_CI, true);
+    assertContains("adİos", "İo", UNICODE_CI, true);
+    assertContains("i\u0307", "i", UTF8_LCASE, true); // != UNICODE_CI
+    assertContains("İ", "\u0307", UTF8_LCASE, false);
+    assertContains("İ", "i", UTF8_LCASE, false);
+    assertContains("i\u0307", "\u0307", UTF8_LCASE, true); // != UNICODE_CI
+    assertContains("i\u0307", "İ", UTF8_LCASE, true);
+    assertContains("İ", "i", UTF8_LCASE, false);
+    assertContains("adi̇os", "io", UTF8_LCASE, false);
+    assertContains("adi̇os", "Io", UTF8_LCASE, false);
+    assertContains("adi̇os", "i\u0307o", UTF8_LCASE, true);
+    assertContains("adi̇os", "İo", UTF8_LCASE, true);
+    assertContains("adİos", "io", UTF8_LCASE, false);
+    assertContains("adİos", "Io", UTF8_LCASE, false);
+    assertContains("adİos", "i\u0307o", UTF8_LCASE, true);
+    assertContains("adİos", "İo", UTF8_LCASE, true);
     // Conditional case mapping (e.g. Greek sigmas).
-    assertContains("σ", "σ", "UTF8_BINARY", true);
-    assertContains("σ", "ς", "UTF8_BINARY", false);
-    assertContains("σ", "Σ", "UTF8_BINARY", false);
-    assertContains("ς", "σ", "UTF8_BINARY", false);
-    assertContains("ς", "ς", "UTF8_BINARY", true);
-    assertContains("ς", "Σ", "UTF8_BINARY", false);
-    assertContains("Σ", "σ", "UTF8_BINARY", false);
-    assertContains("Σ", "ς", "UTF8_BINARY", false);
-    assertContains("Σ", "Σ", "UTF8_BINARY", true);
-    assertContains("σ", "σ", "UTF8_LCASE", true);
-    assertContains("σ", "ς", "UTF8_LCASE", true);
-    assertContains("σ", "Σ", "UTF8_LCASE", true);
-    assertContains("ς", "σ", "UTF8_LCASE", true);
-    assertContains("ς", "ς", "UTF8_LCASE", true);
-    assertContains("ς", "Σ", "UTF8_LCASE", true);
-    assertContains("Σ", "σ", "UTF8_LCASE", true);
-    assertContains("Σ", "ς", "UTF8_LCASE", true);
-    assertContains("Σ", "Σ", "UTF8_LCASE", true);
-    assertContains("σ", "σ", "UNICODE", true);
-    assertContains("σ", "ς", "UNICODE", false);
-    assertContains("σ", "Σ", "UNICODE", false);
-    assertContains("ς", "σ", "UNICODE", false);
-    assertContains("ς", "ς", "UNICODE", true);
-    assertContains("ς", "Σ", "UNICODE", false);
-    assertContains("Σ", "σ", "UNICODE", false);
-    assertContains("Σ", "ς", "UNICODE", false);
-    assertContains("Σ", "Σ", "UNICODE", true);
-    assertContains("σ", "σ", "UNICODE_CI", true);
-    assertContains("σ", "ς", "UNICODE_CI", true);
-    assertContains("σ", "Σ", "UNICODE_CI", true);
-    assertContains("ς", "σ", "UNICODE_CI", true);
-    assertContains("ς", "ς", "UNICODE_CI", true);
-    assertContains("ς", "Σ", "UNICODE_CI", true);
-    assertContains("Σ", "σ", "UNICODE_CI", true);
-    assertContains("Σ", "ς", "UNICODE_CI", true);
-    assertContains("Σ", "Σ", "UNICODE_CI", true);
-    assertContains("ΣΑΛΑΤΑ", "Σ", "UTF8_BINARY", true);
-    assertContains("ΣΑΛΑΤΑ", "σ", "UTF8_BINARY", false);
-    assertContains("ΣΑΛΑΤΑ", "ς", "UTF8_BINARY", false);
-    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_BINARY", true);
-    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_BINARY", false);
-    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_BINARY", false);
-    assertContains("ΣΑΛΑΤΑ", "Σ", "UTF8_LCASE", true);
-    assertContains("ΣΑΛΑΤΑ", "σ", "UTF8_LCASE", true);
-    assertContains("ΣΑΛΑΤΑ", "ς", "UTF8_LCASE", true);
-    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_LCASE", true);
-    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_LCASE", true);
-    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_LCASE", true);
-    assertContains("ΣΑΛΑΤΑ", "Σ", "UNICODE", true);
-    assertContains("ΣΑΛΑΤΑ", "σ", "UNICODE", false);
-    assertContains("ΣΑΛΑΤΑ", "ς", "UNICODE", false);
-    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE", true);
-    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE", false);
-    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE", false);
-    assertContains("ΣΑΛΑΤΑ", "Σ", "UNICODE_CI", true);
-    assertContains("ΣΑΛΑΤΑ", "σ", "UNICODE_CI", true);
-    assertContains("ΣΑΛΑΤΑ", "ς", "UNICODE_CI", true);
-    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE_CI", true);
-    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE_CI", true);
-    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE_CI", true);
+    assertContains("σ", "σ", UTF8_BINARY, true);
+    assertContains("σ", "ς", UTF8_BINARY, false);
+    assertContains("σ", "Σ", UTF8_BINARY, false);
+    assertContains("ς", "σ", UTF8_BINARY, false);
+    assertContains("ς", "ς", UTF8_BINARY, true);
+    assertContains("ς", "Σ", UTF8_BINARY, false);
+    assertContains("Σ", "σ", UTF8_BINARY, false);
+    assertContains("Σ", "ς", UTF8_BINARY, false);
+    assertContains("Σ", "Σ", UTF8_BINARY, true);
+    assertContains("σ", "σ", UTF8_LCASE, true);
+    assertContains("σ", "ς", UTF8_LCASE, true);
+    assertContains("σ", "Σ", UTF8_LCASE, true);
+    assertContains("ς", "σ", UTF8_LCASE, true);
+    assertContains("ς", "ς", UTF8_LCASE, true);
+    assertContains("ς", "Σ", UTF8_LCASE, true);
+    assertContains("Σ", "σ", UTF8_LCASE, true);
+    assertContains("Σ", "ς", UTF8_LCASE, true);
+    assertContains("Σ", "Σ", UTF8_LCASE, true);
+    assertContains("σ", "σ", UNICODE, true);
+    assertContains("σ", "ς", UNICODE, false);
+    assertContains("σ", "Σ", UNICODE, false);
+    assertContains("ς", "σ", UNICODE, false);
+    assertContains("ς", "ς", UNICODE, true);
+    assertContains("ς", "Σ", UNICODE, false);
+    assertContains("Σ", "σ", UNICODE, false);
+    assertContains("Σ", "ς", UNICODE, false);
+    assertContains("Σ", "Σ", UNICODE, true);
+    assertContains("σ", "σ", UNICODE_CI, true);
+    assertContains("σ", "ς", UNICODE_CI, true);
+    assertContains("σ", "Σ", UNICODE_CI, true);
+    assertContains("ς", "σ", UNICODE_CI, true);
+    assertContains("ς", "ς", UNICODE_CI, true);
+    assertContains("ς", "Σ", UNICODE_CI, true);
+    assertContains("Σ", "σ", UNICODE_CI, true);
+    assertContains("Σ", "ς", UNICODE_CI, true);
+    assertContains("Σ", "Σ", UNICODE_CI, true);
+    assertContains("ΣΑΛΑΤΑ", "Σ", UTF8_BINARY, true);
+    assertContains("ΣΑΛΑΤΑ", "σ", UTF8_BINARY, false);
+    assertContains("ΣΑΛΑΤΑ", "ς", UTF8_BINARY, false);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UTF8_BINARY, true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", UTF8_BINARY, false);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", UTF8_BINARY, false);
+    assertContains("ΣΑΛΑΤΑ", "Σ", UTF8_LCASE, true);
+    assertContains("ΣΑΛΑΤΑ", "σ", UTF8_LCASE, true);
+    assertContains("ΣΑΛΑΤΑ", "ς", UTF8_LCASE, true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UTF8_LCASE, true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", UTF8_LCASE, true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", UTF8_LCASE, true);
+    assertContains("ΣΑΛΑΤΑ", "Σ", UNICODE, true);
+    assertContains("ΣΑΛΑΤΑ", "σ", UNICODE, false);
+    assertContains("ΣΑΛΑΤΑ", "ς", UNICODE, false);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UNICODE, true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", UNICODE, false);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", UNICODE, false);
+    assertContains("ΣΑΛΑΤΑ", "Σ", UNICODE_CI, true);
+    assertContains("ΣΑΛΑΤΑ", "σ", UNICODE_CI, true);
+    assertContains("ΣΑΛΑΤΑ", "ς", UNICODE_CI, true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UNICODE_CI, true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", UNICODE_CI, true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", UNICODE_CI, true);
     // Surrogate pairs.
-    assertContains("a🙃b🙃c", "x", "UTF8_BINARY", false);
-    assertContains("a🙃b🙃c", "x", "UTF8_LCASE", false);
-    assertContains("a🙃b🙃c", "x", "UNICODE", false);
-    assertContains("a🙃b🙃c", "x", "UNICODE_CI", false);
-    assertContains("a🙃b🙃c", "b", "UTF8_BINARY", true);
-    assertContains("a🙃b🙃c", "b", "UTF8_LCASE", true);
-    assertContains("a🙃b🙃c", "b", "UNICODE", true);
-    assertContains("a🙃b🙃c", "b", "UNICODE_CI", true);
-    assertContains("a🙃b🙃c", "a🙃b", "UTF8_BINARY", true);
-    assertContains("a🙃b🙃c", "a🙃b", "UTF8_LCASE", true);
-    assertContains("a🙃b🙃c", "a🙃b", "UNICODE", true);
-    assertContains("a🙃b🙃c", "a🙃b", "UNICODE_CI", true);
-    assertContains("a🙃b🙃c", "b🙃c", "UTF8_BINARY", true);
-    assertContains("a🙃b🙃c", "b🙃c", "UTF8_LCASE", true);
-    assertContains("a🙃b🙃c", "b🙃c", "UNICODE", true);
-    assertContains("a🙃b🙃c", "b🙃c", "UNICODE_CI", true);
-    assertContains("a🙃b🙃c", "a🙃b🙃c", "UTF8_BINARY", true);
-    assertContains("a🙃b🙃c", "a🙃b🙃c", "UTF8_LCASE", true);
-    assertContains("a🙃b🙃c", "a🙃b🙃c", "UNICODE", true);
-    assertContains("a🙃b🙃c", "a🙃b🙃c", "UNICODE_CI", true);
-    assertContains("😀😆😃😄", "😄😆", "UTF8_BINARY", false);
-    assertContains("😀😆😃😄", "😄😆", "UTF8_LCASE", false);
-    assertContains("😀😆😃😄", "😄😆", "UNICODE", false);
-    assertContains("😀😆😃😄", "😄😆", "UNICODE_CI", false);
-    assertContains("😀😆😃😄", "😆😃", "UTF8_BINARY", true);
-    assertContains("😀😆😃😄", "😆😃", "UTF8_LCASE", true);
-    assertContains("😀😆😃😄", "😆😃", "UNICODE", true);
-    assertContains("😀😆😃😄", "😆😃", "UNICODE_CI", true);
-    assertContains("😀😆😃😄", "😀😆", "UTF8_BINARY", true);
-    assertContains("😀😆😃😄", "😀😆", "UTF8_LCASE", true);
-    assertContains("😀😆😃😄", "😀😆", "UNICODE", true);
-    assertContains("😀😆😃😄", "😀😆", "UNICODE_CI", true);
-    assertContains("😀😆😃😄", "😃😄", "UTF8_BINARY", true);
-    assertContains("😀😆😃😄", "😃😄", "UTF8_LCASE", true);
-    assertContains("😀😆😃😄", "😃😄", "UNICODE", true);
-    assertContains("😀😆😃😄", "😃😄", "UNICODE_CI", true);
-    assertContains("😀😆😃😄", "😀😆😃😄", "UTF8_BINARY", true);
-    assertContains("😀😆😃😄", "😀😆😃😄", "UTF8_LCASE", true);
-    assertContains("😀😆😃😄", "😀😆😃😄", "UNICODE", true);
-    assertContains("😀😆😃😄", "😀😆😃😄", "UNICODE_CI", true);
-    assertContains("𐐅", "𐐅", "UTF8_BINARY", true);
-    assertContains("𐐅", "𐐅", "UTF8_LCASE", true);
-    assertContains("𐐅", "𐐅", "UNICODE", true);
-    assertContains("𐐅", "𐐅", "UNICODE_CI", true);
-    assertContains("𐐅", "𐐭", "UTF8_BINARY", false);
-    assertContains("𐐅", "𐐭", "UTF8_LCASE", true);
-    assertContains("𐐅", "𐐭", "UNICODE", false);
-    assertContains("𐐅", "𐐭", "UNICODE_CI", true);
-    assertContains("𝔸", "𝔸", "UTF8_BINARY", true);
-    assertContains("𝔸", "𝔸", "UTF8_LCASE", true);
-    assertContains("𝔸", "𝔸", "UNICODE", true);
-    assertContains("𝔸", "𝔸", "UNICODE_CI", true);
+    assertContains("a🙃b🙃c", "x", UTF8_BINARY, false);
+    assertContains("a🙃b🙃c", "x", UTF8_LCASE, false);
+    assertContains("a🙃b🙃c", "x", UNICODE, false);
+    assertContains("a🙃b🙃c", "x", UNICODE_CI, false);
+    assertContains("a🙃b🙃c", "b", UTF8_BINARY, true);
+    assertContains("a🙃b🙃c", "b", UTF8_LCASE, true);
+    assertContains("a🙃b🙃c", "b", UNICODE, true);
+    assertContains("a🙃b🙃c", "b", UNICODE_CI, true);
+    assertContains("a🙃b🙃c", "a🙃b", UTF8_BINARY, true);
+    assertContains("a🙃b🙃c", "a🙃b", UTF8_LCASE, true);
+    assertContains("a🙃b🙃c", "a🙃b", UNICODE, true);
+    assertContains("a🙃b🙃c", "a🙃b", UNICODE_CI, true);
+    assertContains("a🙃b🙃c", "b🙃c", UTF8_BINARY, true);
+    assertContains("a🙃b🙃c", "b🙃c", UTF8_LCASE, true);
+    assertContains("a🙃b🙃c", "b🙃c", UNICODE, true);
+    assertContains("a🙃b🙃c", "b🙃c", UNICODE_CI, true);
+    assertContains("a🙃b🙃c", "a🙃b🙃c", UTF8_BINARY, true);
+    assertContains("a🙃b🙃c", "a🙃b🙃c", UTF8_LCASE, true);
+    assertContains("a🙃b🙃c", "a🙃b🙃c", UNICODE, true);
+    assertContains("a🙃b🙃c", "a🙃b🙃c", UNICODE_CI, true);
+    assertContains("😀😆😃😄", "😄😆", UTF8_BINARY, false);
+    assertContains("😀😆😃😄", "😄😆", UTF8_LCASE, false);
+    assertContains("😀😆😃😄", "😄😆", UNICODE, false);
+    assertContains("😀😆😃😄", "😄😆", UNICODE_CI, false);
+    assertContains("😀😆😃😄", "😆😃", UTF8_BINARY, true);
+    assertContains("😀😆😃😄", "😆😃", UTF8_LCASE, true);
+    assertContains("😀😆😃😄", "😆😃", UNICODE, true);
+    assertContains("😀😆😃😄", "😆😃", UNICODE_CI, true);
+    assertContains("😀😆😃😄", "😀😆", UTF8_BINARY, true);
+    assertContains("😀😆😃😄", "😀😆", UTF8_LCASE, true);
+    assertContains("😀😆😃😄", "😀😆", UNICODE, true);
+    assertContains("😀😆😃😄", "😀😆", UNICODE_CI, true);
+    assertContains("😀😆😃😄", "😃😄", UTF8_BINARY, true);
+    assertContains("😀😆😃😄", "😃😄", UTF8_LCASE, true);
+    assertContains("😀😆😃😄", "😃😄", UNICODE, true);
+    assertContains("😀😆😃😄", "😃😄", UNICODE_CI, true);
+    assertContains("😀😆😃😄", "😀😆😃😄", UTF8_BINARY, true);
+    assertContains("😀😆😃😄", "😀😆😃😄", UTF8_LCASE, true);
+    assertContains("😀😆😃😄", "😀😆😃😄", UNICODE, true);
+    assertContains("😀😆😃😄", "😀😆😃😄", UNICODE_CI, true);
+    assertContains("𐐅", "𐐅", UTF8_BINARY, true);
+    assertContains("𐐅", "𐐅", UTF8_LCASE, true);
+    assertContains("𐐅", "𐐅", UNICODE, true);
+    assertContains("𐐅", "𐐅", UNICODE_CI, true);
+    assertContains("𐐅", "𐐭", UTF8_BINARY, false);
+    assertContains("𐐅", "𐐭", UTF8_LCASE, true);
+    assertContains("𐐅", "𐐭", UNICODE, false);
+    assertContains("𐐅", "𐐭", UNICODE_CI, true);
+    assertContains("𝔸", "𝔸", UTF8_BINARY, true);
+    assertContains("𝔸", "𝔸", UTF8_LCASE, true);
+    assertContains("𝔸", "𝔸", UNICODE, true);
+    assertContains("𝔸", "𝔸", UNICODE_CI, true);
   }
 
   /**
@@ -549,211 +550,211 @@ public void testStartsWith() throws SparkException {
       assertStartsWith("Здраво", "Здраво", collationName, true);
     }
     // Advanced tests.
-    assertStartsWith("abcde", "abc", "UTF8_BINARY", true);
-    assertStartsWith("abcde", "abd", "UTF8_BINARY", false);
-    assertStartsWith("abcde", "fgh", "UTF8_BINARY", false);
-    assertStartsWith("abcde", "abcde", "UNICODE", true);
-    assertStartsWith("abcde", "aBcDe", "UNICODE", false);
-    assertStartsWith("abcde", "fghij", "UNICODE", false);
-    assertStartsWith("abcde", "A", "UTF8_LCASE", true);
-    assertStartsWith("abcde", "AbCdE", "UTF8_LCASE", true);
-    assertStartsWith("abcde", "X", "UTF8_LCASE", false);
-    assertStartsWith("abcde", "a", "UNICODE_CI", true);
-    assertStartsWith("abcde", "aBC", "UNICODE_CI", true);
-    assertStartsWith("abcde", "bcd", "UNICODE_CI", false);
-    assertStartsWith("abcde", "123", "UNICODE_CI", false);
-    assertStartsWith("ab世De", "ab世", "UTF8_BINARY", true);
-    assertStartsWith("ab世De", "aB世", "UTF8_BINARY", false);
-    assertStartsWith("äbćδe", "äbć", "UTF8_BINARY", true);
-    assertStartsWith("äbćδe", "äBc", "UTF8_BINARY", false);
-    assertStartsWith("ab世De", "ab世De", "UNICODE", true);
-    assertStartsWith("ab世De", "AB世dE", "UNICODE", false);
-    assertStartsWith("äbćδe", "äbćδe", "UNICODE", true);
-    assertStartsWith("äbćδe", "ÄBcΔÉ", "UNICODE", false);
-    assertStartsWith("ab世De", "ab世", "UTF8_LCASE", true);
-    assertStartsWith("ab世De", "aB世", "UTF8_LCASE", true);
-    assertStartsWith("äbćδe", "äbć", "UTF8_LCASE", true);
-    assertStartsWith("äbćδe", "äBc", "UTF8_LCASE", false);
-    assertStartsWith("ab世De", "ab世De", "UNICODE_CI", true);
-    assertStartsWith("ab世De", "AB世dE", "UNICODE_CI", true);
-    assertStartsWith("äbćδe", "ÄbćδE", "UNICODE_CI", true);
-    assertStartsWith("äbćδe", "ÄBcΔÉ", "UNICODE_CI", false);
-    assertStartsWith("Kelvin.", "Kelvin", "UTF8_LCASE", true);
-    assertStartsWith("Kelvin.", "Kelvin", "UTF8_LCASE", true);
-    assertStartsWith("KKelvin.", "KKelvin", "UTF8_LCASE", true);
-    assertStartsWith("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true);
-    assertStartsWith("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true);
-    assertStartsWith("KKelvin.", "KKelvin,", "UTF8_LCASE", false);
+    assertStartsWith("abcde", "abc", UTF8_BINARY, true);
+    assertStartsWith("abcde", "abd", UTF8_BINARY, false);
+    assertStartsWith("abcde", "fgh", UTF8_BINARY, false);
+    assertStartsWith("abcde", "abcde", UNICODE, true);
+    assertStartsWith("abcde", "aBcDe", UNICODE, false);
+    assertStartsWith("abcde", "fghij", UNICODE, false);
+    assertStartsWith("abcde", "A", UTF8_LCASE, true);
+    assertStartsWith("abcde", "AbCdE", UTF8_LCASE, true);
+    assertStartsWith("abcde", "X", UTF8_LCASE, false);
+    assertStartsWith("abcde", "a", UNICODE_CI, true);
+    assertStartsWith("abcde", "aBC", UNICODE_CI, true);
+    assertStartsWith("abcde", "bcd", UNICODE_CI, false);
+    assertStartsWith("abcde", "123", UNICODE_CI, false);
+    assertStartsWith("ab世De", "ab世", UTF8_BINARY, true);
+    assertStartsWith("ab世De", "aB世", UTF8_BINARY, false);
+    assertStartsWith("äbćδe", "äbć", UTF8_BINARY, true);
+    assertStartsWith("äbćδe", "äBc", UTF8_BINARY, false);
+    assertStartsWith("ab世De", "ab世De", UNICODE, true);
+    assertStartsWith("ab世De", "AB世dE", UNICODE, false);
+    assertStartsWith("äbćδe", "äbćδe", UNICODE, true);
+    assertStartsWith("äbćδe", "ÄBcΔÉ", UNICODE, false);
+    assertStartsWith("ab世De", "ab世", UTF8_LCASE, true);
+    assertStartsWith("ab世De", "aB世", UTF8_LCASE, true);
+    assertStartsWith("äbćδe", "äbć", UTF8_LCASE, true);
+    assertStartsWith("äbćδe", "äBc", UTF8_LCASE, false);
+    assertStartsWith("ab世De", "ab世De", UNICODE_CI, true);
+    assertStartsWith("ab世De", "AB世dE", UNICODE_CI, true);
+    assertStartsWith("äbćδe", "ÄbćδE", UNICODE_CI, true);
+    assertStartsWith("äbćδe", "ÄBcΔÉ", UNICODE_CI, false);
+    assertStartsWith("Kelvin.", "Kelvin", UTF8_LCASE, true);
+    assertStartsWith("Kelvin.", "Kelvin", UTF8_LCASE, true);
+    assertStartsWith("KKelvin.", "KKelvin", UTF8_LCASE, true);
+    assertStartsWith("2 Kelvin.", "2 Kelvin", UTF8_LCASE, true);
+    assertStartsWith("2 Kelvin.", "2 Kelvin", UTF8_LCASE, true);
+    assertStartsWith("KKelvin.", "KKelvin,", UTF8_LCASE, false);
     assertStartsWith("Ћао", "Ца", "sr_Cyrl_CI_AI", false);
     assertStartsWith("Ћао", "ћа", "sr_Cyrl_CI_AI", true);
     assertStartsWith("Ćao", "Ca", "SR_CI", false);
     assertStartsWith("Ćao", "Ca", "SR_CI_AI", true);
     assertStartsWith("Ćao", "Ća", "SR", true);
     // Case variation.
-    assertStartsWith("aBcDe", "abc", "UTF8_BINARY", false);
-    assertStartsWith("aBcDe", "aBc", "UTF8_BINARY", true);
-    assertStartsWith("aBcDe", "abcde", "UNICODE", false);
-    assertStartsWith("aBcDe", "aBcDe", "UNICODE", true);
-    assertStartsWith("aBcDe", "abc", "UTF8_LCASE", true);
-    assertStartsWith("aBcDe", "ABC", "UTF8_LCASE", true);
-    assertStartsWith("aBcDe", "abcde", "UNICODE_CI", true);
-    assertStartsWith("aBcDe", "AbCdE", "UNICODE_CI", true);
+    assertStartsWith("aBcDe", "abc", UTF8_BINARY, false);
+    assertStartsWith("aBcDe", "aBc", UTF8_BINARY, true);
+    assertStartsWith("aBcDe", "abcde", UNICODE, false);
+    assertStartsWith("aBcDe", "aBcDe", UNICODE, true);
+    assertStartsWith("aBcDe", "abc", UTF8_LCASE, true);
+    assertStartsWith("aBcDe", "ABC", UTF8_LCASE, true);
+    assertStartsWith("aBcDe", "abcde", UNICODE_CI, true);
+    assertStartsWith("aBcDe", "AbCdE", UNICODE_CI, true);
     // Accent variation.
-    assertStartsWith("aBcDe", "abć", "UTF8_BINARY", false);
-    assertStartsWith("aBcDe", "aBć", "UTF8_BINARY", false);
-    assertStartsWith("aBcDe", "abćde", "UNICODE", false);
-    assertStartsWith("aBcDe", "aBćDe", "UNICODE", false);
-    assertStartsWith("aBcDe", "abć", "UTF8_LCASE", false);
-    assertStartsWith("aBcDe", "ABĆ", "UTF8_LCASE", false);
-    assertStartsWith("aBcDe", "abćde", "UNICODE_CI", false);
-    assertStartsWith("aBcDe", "AbĆdE", "UNICODE_CI", false);
+    assertStartsWith("aBcDe", "abć", UTF8_BINARY, false);
+    assertStartsWith("aBcDe", "aBć", UTF8_BINARY, false);
+    assertStartsWith("aBcDe", "abćde", UNICODE, false);
+    assertStartsWith("aBcDe", "aBćDe", UNICODE, false);
+    assertStartsWith("aBcDe", "abć", UTF8_LCASE, false);
+    assertStartsWith("aBcDe", "ABĆ", UTF8_LCASE, false);
+    assertStartsWith("aBcDe", "abćde", UNICODE_CI, false);
+    assertStartsWith("aBcDe", "AbĆdE", UNICODE_CI, false);
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertStartsWith("i\u0307", "i", "UNICODE_CI", false);
-    assertStartsWith("i\u0307", "İ", "UNICODE_CI", true);
-    assertStartsWith("İ", "i", "UNICODE_CI", false);
-    assertStartsWith("İİİ", "i̇i̇", "UNICODE_CI", true);
-    assertStartsWith("İİİ", "i̇i", "UNICODE_CI", false);
-    assertStartsWith("İi̇İ", "i̇İ", "UNICODE_CI", true);
-    assertStartsWith("i̇İi̇i̇", "İi̇İi", "UNICODE_CI", false);
-    assertStartsWith("i̇onic", "io", "UNICODE_CI", false);
-    assertStartsWith("i̇onic", "Io", "UNICODE_CI", false);
-    assertStartsWith("i̇onic", "i\u0307o", "UNICODE_CI", true);
-    assertStartsWith("i̇onic", "İo", "UNICODE_CI", true);
-    assertStartsWith("İonic", "io", "UNICODE_CI", false);
-    assertStartsWith("İonic", "Io", "UNICODE_CI", false);
-    assertStartsWith("İonic", "i\u0307o", "UNICODE_CI", true);
-    assertStartsWith("İonic", "İo", "UNICODE_CI", true);
-    assertStartsWith("i\u0307", "i", "UTF8_LCASE", true); // != UNICODE_CI
-    assertStartsWith("i\u0307", "İ", "UTF8_LCASE", true);
-    assertStartsWith("İ", "i", "UTF8_LCASE", false);
-    assertStartsWith("İİİ", "i̇i̇", "UTF8_LCASE", true);
-    assertStartsWith("İİİ", "i̇i", "UTF8_LCASE", false);
-    assertStartsWith("İi̇İ", "i̇İ", "UTF8_LCASE", true);
-    assertStartsWith("i̇İi̇i̇", "İi̇İi", "UTF8_LCASE", true); // != UNICODE_CI
-    assertStartsWith("i̇onic", "io", "UTF8_LCASE", false);
-    assertStartsWith("i̇onic", "Io", "UTF8_LCASE", false);
-    assertStartsWith("i̇onic", "i\u0307o", "UTF8_LCASE", true);
-    assertStartsWith("i̇onic", "İo", "UTF8_LCASE", true);
-    assertStartsWith("İonic", "io", "UTF8_LCASE", false);
-    assertStartsWith("İonic", "Io", "UTF8_LCASE", false);
-    assertStartsWith("İonic", "i\u0307o", "UTF8_LCASE", true);
-    assertStartsWith("İonic", "İo", "UTF8_LCASE", true);
-    assertStartsWith("oİ", "oİ", "UTF8_LCASE", true);
-    assertStartsWith("oİ", "oi̇", "UTF8_LCASE", true);
+    assertStartsWith("i\u0307", "i", UNICODE_CI, false);
+    assertStartsWith("i\u0307", "İ", UNICODE_CI, true);
+    assertStartsWith("İ", "i", UNICODE_CI, false);
+    assertStartsWith("İİİ", "i̇i̇", UNICODE_CI, true);
+    assertStartsWith("İİİ", "i̇i", UNICODE_CI, false);
+    assertStartsWith("İi̇İ", "i̇İ", UNICODE_CI, true);
+    assertStartsWith("i̇İi̇i̇", "İi̇İi", UNICODE_CI, false);
+    assertStartsWith("i̇onic", "io", UNICODE_CI, false);
+    assertStartsWith("i̇onic", "Io", UNICODE_CI, false);
+    assertStartsWith("i̇onic", "i\u0307o", UNICODE_CI, true);
+    assertStartsWith("i̇onic", "İo", UNICODE_CI, true);
+    assertStartsWith("İonic", "io", UNICODE_CI, false);
+    assertStartsWith("İonic", "Io", UNICODE_CI, false);
+    assertStartsWith("İonic", "i\u0307o", UNICODE_CI, true);
+    assertStartsWith("İonic", "İo", UNICODE_CI, true);
+    assertStartsWith("i\u0307", "i", UTF8_LCASE, true); // != UNICODE_CI
+    assertStartsWith("i\u0307", "İ", UTF8_LCASE, true);
+    assertStartsWith("İ", "i", UTF8_LCASE, false);
+    assertStartsWith("İİİ", "i̇i̇", UTF8_LCASE, true);
+    assertStartsWith("İİİ", "i̇i", UTF8_LCASE, false);
+    assertStartsWith("İi̇İ", "i̇İ", UTF8_LCASE, true);
+    assertStartsWith("i̇İi̇i̇", "İi̇İi", UTF8_LCASE, true); // != UNICODE_CI
+    assertStartsWith("i̇onic", "io", UTF8_LCASE, false);
+    assertStartsWith("i̇onic", "Io", UTF8_LCASE, false);
+    assertStartsWith("i̇onic", "i\u0307o", UTF8_LCASE, true);
+    assertStartsWith("i̇onic", "İo", UTF8_LCASE, true);
+    assertStartsWith("İonic", "io", UTF8_LCASE, false);
+    assertStartsWith("İonic", "Io", UTF8_LCASE, false);
+    assertStartsWith("İonic", "i\u0307o", UTF8_LCASE, true);
+    assertStartsWith("İonic", "İo", UTF8_LCASE, true);
+    assertStartsWith("oİ", "oİ", UTF8_LCASE, true);
+    assertStartsWith("oİ", "oi̇", UTF8_LCASE, true);
     // Conditional case mapping (e.g. Greek sigmas).
-    assertStartsWith("σ", "σ", "UTF8_BINARY", true);
-    assertStartsWith("σ", "ς", "UTF8_BINARY", false);
-    assertStartsWith("σ", "Σ", "UTF8_BINARY", false);
-    assertStartsWith("ς", "σ", "UTF8_BINARY", false);
-    assertStartsWith("ς", "ς", "UTF8_BINARY", true);
-    assertStartsWith("ς", "Σ", "UTF8_BINARY", false);
-    assertStartsWith("Σ", "σ", "UTF8_BINARY", false);
-    assertStartsWith("Σ", "ς", "UTF8_BINARY", false);
-    assertStartsWith("Σ", "Σ", "UTF8_BINARY", true);
-    assertStartsWith("σ", "σ", "UTF8_LCASE", true);
-    assertStartsWith("σ", "ς", "UTF8_LCASE", true);
-    assertStartsWith("σ", "Σ", "UTF8_LCASE", true);
-    assertStartsWith("ς", "σ", "UTF8_LCASE", true);
-    assertStartsWith("ς", "ς", "UTF8_LCASE", true);
-    assertStartsWith("ς", "Σ", "UTF8_LCASE", true);
-    assertStartsWith("Σ", "σ", "UTF8_LCASE", true);
-    assertStartsWith("Σ", "ς", "UTF8_LCASE", true);
-    assertStartsWith("Σ", "Σ", "UTF8_LCASE", true);
-    assertStartsWith("σ", "σ", "UNICODE", true);
-    assertStartsWith("σ", "ς", "UNICODE", false);
-    assertStartsWith("σ", "Σ", "UNICODE", false);
-    assertStartsWith("ς", "σ", "UNICODE", false);
-    assertStartsWith("ς", "ς", "UNICODE", true);
-    assertStartsWith("ς", "Σ", "UNICODE", false);
-    assertStartsWith("Σ", "σ", "UNICODE", false);
-    assertStartsWith("Σ", "ς", "UNICODE", false);
-    assertStartsWith("Σ", "Σ", "UNICODE", true);
-    assertStartsWith("σ", "σ", "UNICODE_CI", true);
-    assertStartsWith("σ", "ς", "UNICODE_CI", true);
-    assertStartsWith("σ", "Σ", "UNICODE_CI", true);
-    assertStartsWith("ς", "σ", "UNICODE_CI", true);
-    assertStartsWith("ς", "ς", "UNICODE_CI", true);
-    assertStartsWith("ς", "Σ", "UNICODE_CI", true);
-    assertStartsWith("Σ", "σ", "UNICODE_CI", true);
-    assertStartsWith("Σ", "ς", "UNICODE_CI", true);
-    assertStartsWith("Σ", "Σ", "UNICODE_CI", true);
-    assertStartsWith("ΣΑΛΑΤΑ", "Σ", "UTF8_BINARY", true);
-    assertStartsWith("ΣΑΛΑΤΑ", "σ", "UTF8_BINARY", false);
-    assertStartsWith("ΣΑΛΑΤΑ", "ς", "UTF8_BINARY", false);
-    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_BINARY", false);
-    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_BINARY", false);
-    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_BINARY", false);
-    assertStartsWith("ΣΑΛΑΤΑ", "Σ", "UTF8_LCASE", true);
-    assertStartsWith("ΣΑΛΑΤΑ", "σ", "UTF8_LCASE", true);
-    assertStartsWith("ΣΑΛΑΤΑ", "ς", "UTF8_LCASE", true);
-    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_LCASE", false);
-    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_LCASE", false);
-    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_LCASE", false);
-    assertStartsWith("ΣΑΛΑΤΑ", "Σ", "UNICODE", true);
-    assertStartsWith("ΣΑΛΑΤΑ", "σ", "UNICODE", false);
-    assertStartsWith("ΣΑΛΑΤΑ", "ς", "UNICODE", false);
-    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE", false);
-    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE", false);
-    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE", false);
-    assertStartsWith("ΣΑΛΑΤΑ", "Σ", "UNICODE_CI", true);
-    assertStartsWith("ΣΑΛΑΤΑ", "σ", "UNICODE_CI", true);
-    assertStartsWith("ΣΑΛΑΤΑ", "ς", "UNICODE_CI", true);
-    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE_CI", false);
-    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE_CI", false);
-    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE_CI", false);
+    assertStartsWith("σ", "σ", UTF8_BINARY, true);
+    assertStartsWith("σ", "ς", UTF8_BINARY, false);
+    assertStartsWith("σ", "Σ", UTF8_BINARY, false);
+    assertStartsWith("ς", "σ", UTF8_BINARY, false);
+    assertStartsWith("ς", "ς", UTF8_BINARY, true);
+    assertStartsWith("ς", "Σ", UTF8_BINARY, false);
+    assertStartsWith("Σ", "σ", UTF8_BINARY, false);
+    assertStartsWith("Σ", "ς", UTF8_BINARY, false);
+    assertStartsWith("Σ", "Σ", UTF8_BINARY, true);
+    assertStartsWith("σ", "σ", UTF8_LCASE, true);
+    assertStartsWith("σ", "ς", UTF8_LCASE, true);
+    assertStartsWith("σ", "Σ", UTF8_LCASE, true);
+    assertStartsWith("ς", "σ", UTF8_LCASE, true);
+    assertStartsWith("ς", "ς", UTF8_LCASE, true);
+    assertStartsWith("ς", "Σ", UTF8_LCASE, true);
+    assertStartsWith("Σ", "σ", UTF8_LCASE, true);
+    assertStartsWith("Σ", "ς", UTF8_LCASE, true);
+    assertStartsWith("Σ", "Σ", UTF8_LCASE, true);
+    assertStartsWith("σ", "σ", UNICODE, true);
+    assertStartsWith("σ", "ς", UNICODE, false);
+    assertStartsWith("σ", "Σ", UNICODE, false);
+    assertStartsWith("ς", "σ", UNICODE, false);
+    assertStartsWith("ς", "ς", UNICODE, true);
+    assertStartsWith("ς", "Σ", UNICODE, false);
+    assertStartsWith("Σ", "σ", UNICODE, false);
+    assertStartsWith("Σ", "ς", UNICODE, false);
+    assertStartsWith("Σ", "Σ", UNICODE, true);
+    assertStartsWith("σ", "σ", UNICODE_CI, true);
+    assertStartsWith("σ", "ς", UNICODE_CI, true);
+    assertStartsWith("σ", "Σ", UNICODE_CI, true);
+    assertStartsWith("ς", "σ", UNICODE_CI, true);
+    assertStartsWith("ς", "ς", UNICODE_CI, true);
+    assertStartsWith("ς", "Σ", UNICODE_CI, true);
+    assertStartsWith("Σ", "σ", UNICODE_CI, true);
+    assertStartsWith("Σ", "ς", UNICODE_CI, true);
+    assertStartsWith("Σ", "Σ", UNICODE_CI, true);
+    assertStartsWith("ΣΑΛΑΤΑ", "Σ", UTF8_BINARY, true);
+    assertStartsWith("ΣΑΛΑΤΑ", "σ", UTF8_BINARY, false);
+    assertStartsWith("ΣΑΛΑΤΑ", "ς", UTF8_BINARY, false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UTF8_BINARY, false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UTF8_BINARY, false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UTF8_BINARY, false);
+    assertStartsWith("ΣΑΛΑΤΑ", "Σ", UTF8_LCASE, true);
+    assertStartsWith("ΣΑΛΑΤΑ", "σ", UTF8_LCASE, true);
+    assertStartsWith("ΣΑΛΑΤΑ", "ς", UTF8_LCASE, true);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UTF8_LCASE, false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UTF8_LCASE, false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UTF8_LCASE, false);
+    assertStartsWith("ΣΑΛΑΤΑ", "Σ", UNICODE, true);
+    assertStartsWith("ΣΑΛΑΤΑ", "σ", UNICODE, false);
+    assertStartsWith("ΣΑΛΑΤΑ", "ς", UNICODE, false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UNICODE, false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UNICODE, false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UNICODE, false);
+    assertStartsWith("ΣΑΛΑΤΑ", "Σ", UNICODE_CI, true);
+    assertStartsWith("ΣΑΛΑΤΑ", "σ", UNICODE_CI, true);
+    assertStartsWith("ΣΑΛΑΤΑ", "ς", UNICODE_CI, true);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UNICODE_CI, false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UNICODE_CI, false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UNICODE_CI, false);
     // Surrogate pairs.
-    assertStartsWith("a🙃b🙃c", "x", "UTF8_BINARY", false);
-    assertStartsWith("a🙃b🙃c", "x", "UTF8_LCASE", false);
-    assertStartsWith("a🙃b🙃c", "x", "UNICODE", false);
-    assertStartsWith("a🙃b🙃c", "x", "UNICODE_CI", false);
-    assertStartsWith("a🙃b🙃c", "b", "UTF8_BINARY", false);
-    assertStartsWith("a🙃b🙃c", "b", "UTF8_LCASE", false);
-    assertStartsWith("a🙃b🙃c", "b", "UNICODE", false);
-    assertStartsWith("a🙃b🙃c", "b", "UNICODE_CI", false);
-    assertStartsWith("a🙃b🙃c", "a🙃b", "UTF8_BINARY", true);
-    assertStartsWith("a🙃b🙃c", "a🙃b", "UTF8_LCASE", true);
-    assertStartsWith("a🙃b🙃c", "a🙃b", "UNICODE", true);
-    assertStartsWith("a🙃b🙃c", "a🙃b", "UNICODE_CI", true);
-    assertStartsWith("a🙃b🙃c", "b🙃c", "UTF8_BINARY", false);
-    assertStartsWith("a🙃b🙃c", "b🙃c", "UTF8_LCASE", false);
-    assertStartsWith("a🙃b🙃c", "b🙃c", "UNICODE", false);
-    assertStartsWith("a🙃b🙃c", "b🙃c", "UNICODE_CI", false);
-    assertStartsWith("a🙃b🙃c", "a🙃b🙃c", "UTF8_BINARY", true);
-    assertStartsWith("a🙃b🙃c", "a🙃b🙃c", "UTF8_LCASE", true);
-    assertStartsWith("a🙃b🙃c", "a🙃b🙃c", "UNICODE", true);
-    assertStartsWith("a🙃b🙃c", "a🙃b🙃c", "UNICODE_CI", true);
-    assertStartsWith("😀😆😃😄", "😄😆", "UTF8_BINARY", false);
-    assertStartsWith("😀😆😃😄", "😄😆", "UTF8_LCASE", false);
-    assertStartsWith("😀😆😃😄", "😄😆", "UNICODE", false);
-    assertStartsWith("😀😆😃😄", "😄😆", "UNICODE_CI", false);
-    assertStartsWith("😀😆😃😄", "😆😃", "UTF8_BINARY", false);
-    assertStartsWith("😀😆😃😄", "😆😃", "UTF8_LCASE", false);
-    assertStartsWith("😀😆😃😄", "😆😃", "UNICODE", false);
-    assertStartsWith("😀😆😃😄", "😆😃", "UNICODE_CI", false);
-    assertStartsWith("😀😆😃😄", "😀😆", "UTF8_BINARY", true);
-    assertStartsWith("😀😆😃😄", "😀😆", "UTF8_LCASE", true);
-    assertStartsWith("😀😆😃😄", "😀😆", "UNICODE", true);
-    assertStartsWith("😀😆😃😄", "😀😆", "UNICODE_CI", true);
-    assertStartsWith("😀😆😃😄", "😃😄", "UTF8_BINARY", false);
-    assertStartsWith("😀😆😃😄", "😃😄", "UTF8_LCASE", false);
-    assertStartsWith("😀😆😃😄", "😃😄", "UNICODE", false);
-    assertStartsWith("😀😆😃😄", "😃😄", "UNICODE_CI", false);
-    assertStartsWith("😀😆😃😄", "😀😆😃😄", "UTF8_BINARY", true);
-    assertStartsWith("😀😆😃😄", "😀😆😃😄", "UTF8_LCASE", true);
-    assertStartsWith("😀😆😃😄", "😀😆😃😄", "UNICODE", true);
-    assertStartsWith("😀😆😃😄", "😀😆😃😄", "UNICODE_CI", true);
-    assertStartsWith("𐐅", "𐐅", "UTF8_BINARY", true);
-    assertStartsWith("𐐅", "𐐅", "UTF8_LCASE", true);
-    assertStartsWith("𐐅", "𐐅", "UNICODE", true);
-    assertStartsWith("𐐅", "𐐅", "UNICODE_CI", true);
-    assertStartsWith("𐐅", "𐐭", "UTF8_BINARY", false);
-    assertStartsWith("𐐅", "𐐭", "UTF8_LCASE", true);
-    assertStartsWith("𐐅", "𐐭", "UNICODE", false);
-    assertStartsWith("𐐅", "𐐭", "UNICODE_CI", true);
-    assertStartsWith("𝔸", "𝔸", "UTF8_BINARY", true);
-    assertStartsWith("𝔸", "𝔸", "UTF8_LCASE", true);
-    assertStartsWith("𝔸", "𝔸", "UNICODE", true);
-    assertStartsWith("𝔸", "𝔸", "UNICODE_CI", true);
+    assertStartsWith("a🙃b🙃c", "x", UTF8_BINARY, false);
+    assertStartsWith("a🙃b🙃c", "x", UTF8_LCASE, false);
+    assertStartsWith("a🙃b🙃c", "x", UNICODE, false);
+    assertStartsWith("a🙃b🙃c", "x", UNICODE_CI, false);
+    assertStartsWith("a🙃b🙃c", "b", UTF8_BINARY, false);
+    assertStartsWith("a🙃b🙃c", "b", UTF8_LCASE, false);
+    assertStartsWith("a🙃b🙃c", "b", UNICODE, false);
+    assertStartsWith("a🙃b🙃c", "b", UNICODE_CI, false);
+    assertStartsWith("a🙃b🙃c", "a🙃b", UTF8_BINARY, true);
+    assertStartsWith("a🙃b🙃c", "a🙃b", UTF8_LCASE, true);
+    assertStartsWith("a🙃b🙃c", "a🙃b", UNICODE, true);
+    assertStartsWith("a🙃b🙃c", "a🙃b", UNICODE_CI, true);
+    assertStartsWith("a🙃b🙃c", "b🙃c", UTF8_BINARY, false);
+    assertStartsWith("a🙃b🙃c", "b🙃c", UTF8_LCASE, false);
+    assertStartsWith("a🙃b🙃c", "b🙃c", UNICODE, false);
+    assertStartsWith("a🙃b🙃c", "b🙃c", UNICODE_CI, false);
+    assertStartsWith("a🙃b🙃c", "a🙃b🙃c", UTF8_BINARY, true);
+    assertStartsWith("a🙃b🙃c", "a🙃b🙃c", UTF8_LCASE, true);
+    assertStartsWith("a🙃b🙃c", "a🙃b🙃c", UNICODE, true);
+    assertStartsWith("a🙃b🙃c", "a🙃b🙃c", UNICODE_CI, true);
+    assertStartsWith("😀😆😃😄", "😄😆", UTF8_BINARY, false);
+    assertStartsWith("😀😆😃😄", "😄😆", UTF8_LCASE, false);
+    assertStartsWith("😀😆😃😄", "😄😆", UNICODE, false);
+    assertStartsWith("😀😆😃😄", "😄😆", UNICODE_CI, false);
+    assertStartsWith("😀😆😃😄", "😆😃", UTF8_BINARY, false);
+    assertStartsWith("😀😆😃😄", "😆😃", UTF8_LCASE, false);
+    assertStartsWith("😀😆😃😄", "😆😃", UNICODE, false);
+    assertStartsWith("😀😆😃😄", "😆😃", UNICODE_CI, false);
+    assertStartsWith("😀😆😃😄", "😀😆", UTF8_BINARY, true);
+    assertStartsWith("😀😆😃😄", "😀😆", UTF8_LCASE, true);
+    assertStartsWith("😀😆😃😄", "😀😆", UNICODE, true);
+    assertStartsWith("😀😆😃😄", "😀😆", UNICODE_CI, true);
+    assertStartsWith("😀😆😃😄", "😃😄", UTF8_BINARY, false);
+    assertStartsWith("😀😆😃😄", "😃😄", UTF8_LCASE, false);
+    assertStartsWith("😀😆😃😄", "😃😄", UNICODE, false);
+    assertStartsWith("😀😆😃😄", "😃😄", UNICODE_CI, false);
+    assertStartsWith("😀😆😃😄", "😀😆😃😄", UTF8_BINARY, true);
+    assertStartsWith("😀😆😃😄", "😀😆😃😄", UTF8_LCASE, true);
+    assertStartsWith("😀😆😃😄", "😀😆😃😄", UNICODE, true);
+    assertStartsWith("😀😆😃😄", "😀😆😃😄", UNICODE_CI, true);
+    assertStartsWith("𐐅", "𐐅", UTF8_BINARY, true);
+    assertStartsWith("𐐅", "𐐅", UTF8_LCASE, true);
+    assertStartsWith("𐐅", "𐐅", UNICODE, true);
+    assertStartsWith("𐐅", "𐐅", UNICODE_CI, true);
+    assertStartsWith("𐐅", "𐐭", UTF8_BINARY, false);
+    assertStartsWith("𐐅", "𐐭", UTF8_LCASE, true);
+    assertStartsWith("𐐅", "𐐭", UNICODE, false);
+    assertStartsWith("𐐅", "𐐭", UNICODE_CI, true);
+    assertStartsWith("𝔸", "𝔸", UTF8_BINARY, true);
+    assertStartsWith("𝔸", "𝔸", UTF8_LCASE, true);
+    assertStartsWith("𝔸", "𝔸", UNICODE, true);
+    assertStartsWith("𝔸", "𝔸", UNICODE_CI, true);
   }
 
   /**
@@ -806,212 +807,212 @@ public void testEndsWith() throws SparkException {
       assertEndsWith("Здраво", "Здраво", collationName, true);
     }
     // Advanced tests.
-    assertEndsWith("abcde", "cde", "UTF8_BINARY", true);
-    assertEndsWith("abcde", "bde", "UTF8_BINARY", false);
-    assertEndsWith("abcde", "fgh", "UTF8_BINARY", false);
-    assertEndsWith("abcde", "abcde", "UNICODE", true);
-    assertEndsWith("abcde", "aBcDe", "UNICODE", false);
-    assertEndsWith("abcde", "fghij", "UNICODE", false);
-    assertEndsWith("abcde", "E", "UTF8_LCASE", true);
-    assertEndsWith("abcde", "AbCdE", "UTF8_LCASE", true);
-    assertEndsWith("abcde", "X", "UTF8_LCASE", false);
-    assertEndsWith("abcde", "e", "UNICODE_CI", true);
-    assertEndsWith("abcde", "CDe", "UNICODE_CI", true);
-    assertEndsWith("abcde", "bcd", "UNICODE_CI", false);
-    assertEndsWith("abcde", "123", "UNICODE_CI", false);
-    assertEndsWith("ab世De", "世De", "UTF8_BINARY", true);
-    assertEndsWith("ab世De", "世dE", "UTF8_BINARY", false);
-    assertEndsWith("äbćδe", "ćδe", "UTF8_BINARY", true);
-    assertEndsWith("äbćδe", "cΔé", "UTF8_BINARY", false);
-    assertEndsWith("ab世De", "ab世De", "UNICODE", true);
-    assertEndsWith("ab世De", "AB世dE", "UNICODE", false);
-    assertEndsWith("äbćδe", "äbćδe", "UNICODE", true);
-    assertEndsWith("äbćδe", "ÄBcΔÉ", "UNICODE", false);
-    assertEndsWith("ab世De", "世De", "UTF8_LCASE", true);
-    assertEndsWith("ab世De", "世dE", "UTF8_LCASE", true);
-    assertEndsWith("äbćδe", "ćδe", "UTF8_LCASE", true);
-    assertEndsWith("äbćδe", "cδE", "UTF8_LCASE", false);
-    assertEndsWith("ab世De", "ab世De", "UNICODE_CI", true);
-    assertEndsWith("ab世De", "AB世dE", "UNICODE_CI", true);
-    assertEndsWith("äbćδe", "ÄbćδE", "UNICODE_CI", true);
-    assertEndsWith("äbćδe", "ÄBcΔÉ", "UNICODE_CI", false);
-    assertEndsWith("The Kelvin", "Kelvin", "UTF8_LCASE", true);
-    assertEndsWith("The Kelvin", "Kelvin", "UTF8_LCASE", true);
-    assertEndsWith("The KKelvin", "KKelvin", "UTF8_LCASE", true);
-    assertEndsWith("The 2 Kelvin", "2 Kelvin", "UTF8_LCASE", true);
-    assertEndsWith("The 2 Kelvin", "2 Kelvin", "UTF8_LCASE", true);
-    assertEndsWith("The KKelvin", "KKelvin,", "UTF8_LCASE", false);
+    assertEndsWith("abcde", "cde", UTF8_BINARY, true);
+    assertEndsWith("abcde", "bde", UTF8_BINARY, false);
+    assertEndsWith("abcde", "fgh", UTF8_BINARY, false);
+    assertEndsWith("abcde", "abcde", UNICODE, true);
+    assertEndsWith("abcde", "aBcDe", UNICODE, false);
+    assertEndsWith("abcde", "fghij", UNICODE, false);
+    assertEndsWith("abcde", "E", UTF8_LCASE, true);
+    assertEndsWith("abcde", "AbCdE", UTF8_LCASE, true);
+    assertEndsWith("abcde", "X", UTF8_LCASE, false);
+    assertEndsWith("abcde", "e", UNICODE_CI, true);
+    assertEndsWith("abcde", "CDe", UNICODE_CI, true);
+    assertEndsWith("abcde", "bcd", UNICODE_CI, false);
+    assertEndsWith("abcde", "123", UNICODE_CI, false);
+    assertEndsWith("ab世De", "世De", UTF8_BINARY, true);
+    assertEndsWith("ab世De", "世dE", UTF8_BINARY, false);
+    assertEndsWith("äbćδe", "ćδe", UTF8_BINARY, true);
+    assertEndsWith("äbćδe", "cΔé", UTF8_BINARY, false);
+    assertEndsWith("ab世De", "ab世De", UNICODE, true);
+    assertEndsWith("ab世De", "AB世dE", UNICODE, false);
+    assertEndsWith("äbćδe", "äbćδe", UNICODE, true);
+    assertEndsWith("äbćδe", "ÄBcΔÉ", UNICODE, false);
+    assertEndsWith("ab世De", "世De", UTF8_LCASE, true);
+    assertEndsWith("ab世De", "世dE", UTF8_LCASE, true);
+    assertEndsWith("äbćδe", "ćδe", UTF8_LCASE, true);
+    assertEndsWith("äbćδe", "cδE", UTF8_LCASE, false);
+    assertEndsWith("ab世De", "ab世De", UNICODE_CI, true);
+    assertEndsWith("ab世De", "AB世dE", UNICODE_CI, true);
+    assertEndsWith("äbćδe", "ÄbćδE", UNICODE_CI, true);
+    assertEndsWith("äbćδe", "ÄBcΔÉ", UNICODE_CI, false);
+    assertEndsWith("The Kelvin", "Kelvin", UTF8_LCASE, true);
+    assertEndsWith("The Kelvin", "Kelvin", UTF8_LCASE, true);
+    assertEndsWith("The KKelvin", "KKelvin", UTF8_LCASE, true);
+    assertEndsWith("The 2 Kelvin", "2 Kelvin", UTF8_LCASE, true);
+    assertEndsWith("The 2 Kelvin", "2 Kelvin", UTF8_LCASE, true);
+    assertEndsWith("The KKelvin", "KKelvin,", UTF8_LCASE, false);
     assertEndsWith("Ћевапчићи", "цици", "sr_Cyrl_CI_AI", false);
     assertEndsWith("Ћевапчићи", "чИЋи", "sr_Cyrl_CI_AI", true);
     assertEndsWith("Ćevapčići", "cici", "SR_CI", false);
     assertEndsWith("Ćevapčići", "cici", "SR_CI_AI", true);
     assertEndsWith("Ćevapčići", "čići", "SR", true);
     // Case variation.
-    assertEndsWith("aBcDe", "cde", "UTF8_BINARY", false);
-    assertEndsWith("aBcDe", "cDe", "UTF8_BINARY", true);
-    assertEndsWith("aBcDe", "abcde", "UNICODE", false);
-    assertEndsWith("aBcDe", "aBcDe", "UNICODE", true);
-    assertEndsWith("aBcDe", "cde", "UTF8_LCASE", true);
-    assertEndsWith("aBcDe", "CDE", "UTF8_LCASE", true);
-    assertEndsWith("aBcDe", "abcde", "UNICODE_CI", true);
-    assertEndsWith("aBcDe", "AbCdE", "UNICODE_CI", true);
+    assertEndsWith("aBcDe", "cde", UTF8_BINARY, false);
+    assertEndsWith("aBcDe", "cDe", UTF8_BINARY, true);
+    assertEndsWith("aBcDe", "abcde", UNICODE, false);
+    assertEndsWith("aBcDe", "aBcDe", UNICODE, true);
+    assertEndsWith("aBcDe", "cde", UTF8_LCASE, true);
+    assertEndsWith("aBcDe", "CDE", UTF8_LCASE, true);
+    assertEndsWith("aBcDe", "abcde", UNICODE_CI, true);
+    assertEndsWith("aBcDe", "AbCdE", UNICODE_CI, true);
     // Accent variation.
-    assertEndsWith("aBcDe", "ćde", "UTF8_BINARY", false);
-    assertEndsWith("aBcDe", "ćDe", "UTF8_BINARY", false);
-    assertEndsWith("aBcDe", "abćde", "UNICODE", false);
-    assertEndsWith("aBcDe", "aBćDe", "UNICODE", false);
-    assertEndsWith("aBcDe", "ćde", "UTF8_LCASE", false);
-    assertEndsWith("aBcDe", "ĆDE", "UTF8_LCASE", false);
-    assertEndsWith("aBcDe", "abćde", "UNICODE_CI", false);
-    assertEndsWith("aBcDe", "AbĆdE", "UNICODE_CI", false);
+    assertEndsWith("aBcDe", "ćde", UTF8_BINARY, false);
+    assertEndsWith("aBcDe", "ćDe", UTF8_BINARY, false);
+    assertEndsWith("aBcDe", "abćde", UNICODE, false);
+    assertEndsWith("aBcDe", "aBćDe", UNICODE, false);
+    assertEndsWith("aBcDe", "ćde", UTF8_LCASE, false);
+    assertEndsWith("aBcDe", "ĆDE", UTF8_LCASE, false);
+    assertEndsWith("aBcDe", "abćde", UNICODE_CI, false);
+    assertEndsWith("aBcDe", "AbĆdE", UNICODE_CI, false);
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertEndsWith("i\u0307", "\u0307", "UNICODE_CI", false);
-    assertEndsWith("i\u0307", "İ", "UNICODE_CI", true);
-    assertEndsWith("İ", "i", "UNICODE_CI", false);
-    assertEndsWith("İİİ", "i̇i̇", "UNICODE_CI", true);
-    assertEndsWith("İİİ", "ii̇", "UNICODE_CI", false);
-    assertEndsWith("İi̇İ", "İi̇", "UNICODE_CI", true);
-    assertEndsWith("i̇İi̇i̇", "\u0307İi̇İ", "UNICODE_CI", false);
-    assertEndsWith("the i\u0307o", "io", "UNICODE_CI", false);
-    assertEndsWith("the i\u0307o", "Io", "UNICODE_CI", false);
-    assertEndsWith("the i\u0307o", "i\u0307o", "UNICODE_CI", true);
-    assertEndsWith("the i\u0307o", "İo", "UNICODE_CI", true);
-    assertEndsWith("the İo", "io", "UNICODE_CI", false);
-    assertEndsWith("the İo", "Io", "UNICODE_CI", false);
-    assertEndsWith("the İo", "i\u0307o", "UNICODE_CI", true);
-    assertEndsWith("the İo", "İo", "UNICODE_CI", true);
-    assertEndsWith("i\u0307", "\u0307", "UTF8_LCASE", true); // != UNICODE_CI
-    assertEndsWith("i\u0307", "İ", "UTF8_LCASE", true);
-    assertEndsWith("İ", "\u0307", "UTF8_LCASE", false);
-    assertEndsWith("İİİ", "i̇i̇", "UTF8_LCASE", true);
-    assertEndsWith("İİİ", "ii̇", "UTF8_LCASE", false);
-    assertEndsWith("İi̇İ", "İi̇", "UTF8_LCASE", true);
-    assertEndsWith("i̇İi̇i̇", "\u0307İi̇İ", "UTF8_LCASE", true); // != UNICODE_CI
-    assertEndsWith("i̇İi̇i̇", "\u0307İİ", "UTF8_LCASE", false);
-    assertEndsWith("the i\u0307o", "io", "UTF8_LCASE", false);
-    assertEndsWith("the i\u0307o", "Io", "UTF8_LCASE", false);
-    assertEndsWith("the i\u0307o", "i\u0307o", "UTF8_LCASE", true);
-    assertEndsWith("the i\u0307o", "İo", "UTF8_LCASE", true);
-    assertEndsWith("the İo", "io", "UTF8_LCASE", false);
-    assertEndsWith("the İo", "Io", "UTF8_LCASE", false);
-    assertEndsWith("the İo", "i\u0307o", "UTF8_LCASE", true);
-    assertEndsWith("the İo", "İo", "UTF8_LCASE", true);
-    assertEndsWith("İo", "İo", "UTF8_LCASE", true);
-    assertEndsWith("İo", "i̇o", "UTF8_LCASE", true);
+    assertEndsWith("i\u0307", "\u0307", UNICODE_CI, false);
+    assertEndsWith("i\u0307", "İ", UNICODE_CI, true);
+    assertEndsWith("İ", "i", UNICODE_CI, false);
+    assertEndsWith("İİİ", "i̇i̇", UNICODE_CI, true);
+    assertEndsWith("İİİ", "ii̇", UNICODE_CI, false);
+    assertEndsWith("İi̇İ", "İi̇", UNICODE_CI, true);
+    assertEndsWith("i̇İi̇i̇", "\u0307İi̇İ", UNICODE_CI, false);
+    assertEndsWith("the i\u0307o", "io", UNICODE_CI, false);
+    assertEndsWith("the i\u0307o", "Io", UNICODE_CI, false);
+    assertEndsWith("the i\u0307o", "i\u0307o", UNICODE_CI, true);
+    assertEndsWith("the i\u0307o", "İo", UNICODE_CI, true);
+    assertEndsWith("the İo", "io", UNICODE_CI, false);
+    assertEndsWith("the İo", "Io", UNICODE_CI, false);
+    assertEndsWith("the İo", "i\u0307o", UNICODE_CI, true);
+    assertEndsWith("the İo", "İo", UNICODE_CI, true);
+    assertEndsWith("i\u0307", "\u0307", UTF8_LCASE, true); // != UNICODE_CI
+    assertEndsWith("i\u0307", "İ", UTF8_LCASE, true);
+    assertEndsWith("İ", "\u0307", UTF8_LCASE, false);
+    assertEndsWith("İİİ", "i̇i̇", UTF8_LCASE, true);
+    assertEndsWith("İİİ", "ii̇", UTF8_LCASE, false);
+    assertEndsWith("İi̇İ", "İi̇", UTF8_LCASE, true);
+    assertEndsWith("i̇İi̇i̇", "\u0307İi̇İ", UTF8_LCASE, true); // != UNICODE_CI
+    assertEndsWith("i̇İi̇i̇", "\u0307İİ", UTF8_LCASE, false);
+    assertEndsWith("the i\u0307o", "io", UTF8_LCASE, false);
+    assertEndsWith("the i\u0307o", "Io", UTF8_LCASE, false);
+    assertEndsWith("the i\u0307o", "i\u0307o", UTF8_LCASE, true);
+    assertEndsWith("the i\u0307o", "İo", UTF8_LCASE, true);
+    assertEndsWith("the İo", "io", UTF8_LCASE, false);
+    assertEndsWith("the İo", "Io", UTF8_LCASE, false);
+    assertEndsWith("the İo", "i\u0307o", UTF8_LCASE, true);
+    assertEndsWith("the İo", "İo", UTF8_LCASE, true);
+    assertEndsWith("İo", "İo", UTF8_LCASE, true);
+    assertEndsWith("İo", "i̇o", UTF8_LCASE, true);
     // Conditional case mapping (e.g. Greek sigmas).
-    assertEndsWith("σ", "σ", "UTF8_BINARY", true);
-    assertEndsWith("σ", "ς", "UTF8_BINARY", false);
-    assertEndsWith("σ", "Σ", "UTF8_BINARY", false);
-    assertEndsWith("ς", "σ", "UTF8_BINARY", false);
-    assertEndsWith("ς", "ς", "UTF8_BINARY", true);
-    assertEndsWith("ς", "Σ", "UTF8_BINARY", false);
-    assertEndsWith("Σ", "σ", "UTF8_BINARY", false);
-    assertEndsWith("Σ", "ς", "UTF8_BINARY", false);
-    assertEndsWith("Σ", "Σ", "UTF8_BINARY", true);
-    assertEndsWith("σ", "σ", "UTF8_LCASE", true);
-    assertEndsWith("σ", "ς", "UTF8_LCASE", true);
-    assertEndsWith("σ", "Σ", "UTF8_LCASE", true);
-    assertEndsWith("ς", "σ", "UTF8_LCASE", true);
-    assertEndsWith("ς", "ς", "UTF8_LCASE", true);
-    assertEndsWith("ς", "Σ", "UTF8_LCASE", true);
-    assertEndsWith("Σ", "σ", "UTF8_LCASE", true);
-    assertEndsWith("Σ", "ς", "UTF8_LCASE", true);
-    assertEndsWith("Σ", "Σ", "UTF8_LCASE", true);
-    assertEndsWith("σ", "σ", "UNICODE", true);
-    assertEndsWith("σ", "ς", "UNICODE", false);
-    assertEndsWith("σ", "Σ", "UNICODE", false);
-    assertEndsWith("ς", "σ", "UNICODE", false);
-    assertEndsWith("ς", "ς", "UNICODE", true);
-    assertEndsWith("ς", "Σ", "UNICODE", false);
-    assertEndsWith("Σ", "σ", "UNICODE", false);
-    assertEndsWith("Σ", "ς", "UNICODE", false);
-    assertEndsWith("Σ", "Σ", "UNICODE", true);
-    assertEndsWith("σ", "σ", "UNICODE_CI", true);
-    assertEndsWith("σ", "ς", "UNICODE_CI", true);
-    assertEndsWith("σ", "Σ", "UNICODE_CI", true);
-    assertEndsWith("ς", "σ", "UNICODE_CI", true);
-    assertEndsWith("ς", "ς", "UNICODE_CI", true);
-    assertEndsWith("ς", "Σ", "UNICODE_CI", true);
-    assertEndsWith("Σ", "σ", "UNICODE_CI", true);
-    assertEndsWith("Σ", "ς", "UNICODE_CI", true);
-    assertEndsWith("Σ", "Σ", "UNICODE_CI", true);
-    assertEndsWith("ΣΑΛΑΤΑ", "Σ", "UTF8_BINARY", false);
-    assertEndsWith("ΣΑΛΑΤΑ", "σ", "UTF8_BINARY", false);
-    assertEndsWith("ΣΑΛΑΤΑ", "ς", "UTF8_BINARY", false);
-    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_BINARY", true);
-    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_BINARY", false);
-    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_BINARY", false);
-    assertEndsWith("ΣΑΛΑΤΑ", "Σ", "UTF8_LCASE", false);
-    assertEndsWith("ΣΑΛΑΤΑ", "σ", "UTF8_LCASE", false);
-    assertEndsWith("ΣΑΛΑΤΑ", "ς", "UTF8_LCASE", false);
-    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_LCASE", true);
-    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_LCASE", true);
-    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_LCASE", true);
-    assertEndsWith("ΣΑΛΑΤΑ", "Σ", "UNICODE", false);
-    assertEndsWith("ΣΑΛΑΤΑ", "σ", "UNICODE", false);
-    assertEndsWith("ΣΑΛΑΤΑ", "ς", "UNICODE", false);
-    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE", true);
-    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE", false);
-    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE", false);
-    assertEndsWith("ΣΑΛΑΤΑ", "Σ", "UNICODE_CI", false);
-    assertEndsWith("ΣΑΛΑΤΑ", "σ", "UNICODE_CI", false);
-    assertEndsWith("ΣΑΛΑΤΑ", "ς", "UNICODE_CI", false);
-    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE_CI", true);
-    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE_CI", true);
-    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE_CI", true);
+    assertEndsWith("σ", "σ", UTF8_BINARY, true);
+    assertEndsWith("σ", "ς", UTF8_BINARY, false);
+    assertEndsWith("σ", "Σ", UTF8_BINARY, false);
+    assertEndsWith("ς", "σ", UTF8_BINARY, false);
+    assertEndsWith("ς", "ς", UTF8_BINARY, true);
+    assertEndsWith("ς", "Σ", UTF8_BINARY, false);
+    assertEndsWith("Σ", "σ", UTF8_BINARY, false);
+    assertEndsWith("Σ", "ς", UTF8_BINARY, false);
+    assertEndsWith("Σ", "Σ", UTF8_BINARY, true);
+    assertEndsWith("σ", "σ", UTF8_LCASE, true);
+    assertEndsWith("σ", "ς", UTF8_LCASE, true);
+    assertEndsWith("σ", "Σ", UTF8_LCASE, true);
+    assertEndsWith("ς", "σ", UTF8_LCASE, true);
+    assertEndsWith("ς", "ς", UTF8_LCASE, true);
+    assertEndsWith("ς", "Σ", UTF8_LCASE, true);
+    assertEndsWith("Σ", "σ", UTF8_LCASE, true);
+    assertEndsWith("Σ", "ς", UTF8_LCASE, true);
+    assertEndsWith("Σ", "Σ", UTF8_LCASE, true);
+    assertEndsWith("σ", "σ", UNICODE, true);
+    assertEndsWith("σ", "ς", UNICODE, false);
+    assertEndsWith("σ", "Σ", UNICODE, false);
+    assertEndsWith("ς", "σ", UNICODE, false);
+    assertEndsWith("ς", "ς", UNICODE, true);
+    assertEndsWith("ς", "Σ", UNICODE, false);
+    assertEndsWith("Σ", "σ", UNICODE, false);
+    assertEndsWith("Σ", "ς", UNICODE, false);
+    assertEndsWith("Σ", "Σ", UNICODE, true);
+    assertEndsWith("σ", "σ", UNICODE_CI, true);
+    assertEndsWith("σ", "ς", UNICODE_CI, true);
+    assertEndsWith("σ", "Σ", UNICODE_CI, true);
+    assertEndsWith("ς", "σ", UNICODE_CI, true);
+    assertEndsWith("ς", "ς", UNICODE_CI, true);
+    assertEndsWith("ς", "Σ", UNICODE_CI, true);
+    assertEndsWith("Σ", "σ", UNICODE_CI, true);
+    assertEndsWith("Σ", "ς", UNICODE_CI, true);
+    assertEndsWith("Σ", "Σ", UNICODE_CI, true);
+    assertEndsWith("ΣΑΛΑΤΑ", "Σ", UTF8_BINARY, false);
+    assertEndsWith("ΣΑΛΑΤΑ", "σ", UTF8_BINARY, false);
+    assertEndsWith("ΣΑΛΑΤΑ", "ς", UTF8_BINARY, false);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UTF8_BINARY, true);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UTF8_BINARY, false);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UTF8_BINARY, false);
+    assertEndsWith("ΣΑΛΑΤΑ", "Σ", UTF8_LCASE, false);
+    assertEndsWith("ΣΑΛΑΤΑ", "σ", UTF8_LCASE, false);
+    assertEndsWith("ΣΑΛΑΤΑ", "ς", UTF8_LCASE, false);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UTF8_LCASE, true);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UTF8_LCASE, true);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UTF8_LCASE, true);
+    assertEndsWith("ΣΑΛΑΤΑ", "Σ", UNICODE, false);
+    assertEndsWith("ΣΑΛΑΤΑ", "σ", UNICODE, false);
+    assertEndsWith("ΣΑΛΑΤΑ", "ς", UNICODE, false);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UNICODE, true);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UNICODE, false);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UNICODE, false);
+    assertEndsWith("ΣΑΛΑΤΑ", "Σ", UNICODE_CI, false);
+    assertEndsWith("ΣΑΛΑΤΑ", "σ", UNICODE_CI, false);
+    assertEndsWith("ΣΑΛΑΤΑ", "ς", UNICODE_CI, false);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", UNICODE_CI, true);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", UNICODE_CI, true);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", UNICODE_CI, true);
     // Surrogate pairs.
-    assertEndsWith("a🙃b🙃c", "x", "UTF8_BINARY", false);
-    assertEndsWith("a🙃b🙃c", "x", "UTF8_LCASE", false);
-    assertEndsWith("a🙃b🙃c", "x", "UNICODE", false);
-    assertEndsWith("a🙃b🙃c", "x", "UNICODE_CI", false);
-    assertEndsWith("a🙃b🙃c", "b", "UTF8_BINARY", false);
-    assertEndsWith("a🙃b🙃c", "b", "UTF8_LCASE", false);
-    assertEndsWith("a🙃b🙃c", "b", "UNICODE", false);
-    assertEndsWith("a🙃b🙃c", "b", "UNICODE_CI", false);
-    assertEndsWith("a🙃b🙃c", "a🙃b", "UTF8_BINARY", false);
-    assertEndsWith("a🙃b🙃c", "a🙃b", "UTF8_LCASE", false);
-    assertEndsWith("a🙃b🙃c", "a🙃b", "UNICODE", false);
-    assertEndsWith("a🙃b🙃c", "a🙃b", "UNICODE_CI", false);
-    assertEndsWith("a🙃b🙃c", "b🙃c", "UTF8_BINARY", true);
-    assertEndsWith("a🙃b🙃c", "b🙃c", "UTF8_LCASE", true);
-    assertEndsWith("a🙃b🙃c", "b🙃c", "UNICODE", true);
-    assertEndsWith("a🙃b🙃c", "b🙃c", "UNICODE_CI", true);
-    assertEndsWith("a🙃b🙃c", "a🙃b🙃c", "UTF8_BINARY", true);
-    assertEndsWith("a🙃b🙃c", "a🙃b🙃c", "UTF8_LCASE", true);
-    assertEndsWith("a🙃b🙃c", "a🙃b🙃c", "UNICODE", true);
-    assertEndsWith("a🙃b🙃c", "a🙃b🙃c", "UNICODE_CI", true);
-    assertEndsWith("😀😆😃😄", "😄😆", "UTF8_BINARY", false);
-    assertEndsWith("😀😆😃😄", "😄😆", "UTF8_LCASE", false);
-    assertEndsWith("😀😆😃😄", "😄😆", "UNICODE", false);
-    assertEndsWith("😀😆😃😄", "😄😆", "UNICODE_CI", false);
-    assertEndsWith("😀😆😃😄", "😆😃", "UTF8_BINARY", false);
-    assertEndsWith("😀😆😃😄", "😆😃", "UTF8_LCASE", false);
-    assertEndsWith("😀😆😃😄", "😆😃", "UNICODE", false);
-    assertEndsWith("😀😆😃😄", "😆😃", "UNICODE_CI", false);
-    assertEndsWith("😀😆😃😄", "😀😆", "UTF8_BINARY", false);
-    assertEndsWith("😀😆😃😄", "😀😆", "UTF8_LCASE", false);
-    assertEndsWith("😀😆😃😄", "😀😆", "UNICODE", false);
-    assertEndsWith("😀😆😃😄", "😀😆", "UNICODE_CI", false);
-    assertEndsWith("😀😆😃😄", "😃😄", "UTF8_BINARY", true);
-    assertEndsWith("😀😆😃😄", "😃😄", "UTF8_LCASE", true);
-    assertEndsWith("😀😆😃😄", "😃😄", "UNICODE", true);
-    assertEndsWith("😀😆😃😄", "😃😄", "UNICODE_CI", true);
-    assertEndsWith("😀😆😃😄", "😀😆😃😄", "UTF8_BINARY", true);
-    assertEndsWith("😀😆😃😄", "😀😆😃😄", "UTF8_LCASE", true);
-    assertEndsWith("😀😆😃😄", "😀😆😃😄", "UNICODE", true);
-    assertEndsWith("😀😆😃😄", "😀😆😃😄", "UNICODE_CI", true);
-    assertEndsWith("𐐅", "𐐅", "UTF8_BINARY", true);
-    assertEndsWith("𐐅", "𐐅", "UTF8_LCASE", true);
-    assertEndsWith("𐐅", "𐐅", "UNICODE", true);
-    assertEndsWith("𐐅", "𐐅", "UNICODE_CI", true);
-    assertEndsWith("𐐅", "𐐭", "UTF8_BINARY", false);
-    assertEndsWith("𐐅", "𐐭", "UTF8_LCASE", true);
-    assertEndsWith("𐐅", "𐐭", "UNICODE", false);
-    assertEndsWith("𐐅", "𐐭", "UNICODE_CI", true);
-    assertEndsWith("𝔸", "𝔸", "UTF8_BINARY", true);
-    assertEndsWith("𝔸", "𝔸", "UTF8_LCASE", true);
-    assertEndsWith("𝔸", "𝔸", "UNICODE", true);
-    assertEndsWith("𝔸", "𝔸", "UNICODE_CI", true);
+    assertEndsWith("a🙃b🙃c", "x", UTF8_BINARY, false);
+    assertEndsWith("a🙃b🙃c", "x", UTF8_LCASE, false);
+    assertEndsWith("a🙃b🙃c", "x", UNICODE, false);
+    assertEndsWith("a🙃b🙃c", "x", UNICODE_CI, false);
+    assertEndsWith("a🙃b🙃c", "b", UTF8_BINARY, false);
+    assertEndsWith("a🙃b🙃c", "b", UTF8_LCASE, false);
+    assertEndsWith("a🙃b🙃c", "b", UNICODE, false);
+    assertEndsWith("a🙃b🙃c", "b", UNICODE_CI, false);
+    assertEndsWith("a🙃b🙃c", "a🙃b", UTF8_BINARY, false);
+    assertEndsWith("a🙃b🙃c", "a🙃b", UTF8_LCASE, false);
+    assertEndsWith("a🙃b🙃c", "a🙃b", UNICODE, false);
+    assertEndsWith("a🙃b🙃c", "a🙃b", UNICODE_CI, false);
+    assertEndsWith("a🙃b🙃c", "b🙃c", UTF8_BINARY, true);
+    assertEndsWith("a🙃b🙃c", "b🙃c", UTF8_LCASE, true);
+    assertEndsWith("a🙃b🙃c", "b🙃c", UNICODE, true);
+    assertEndsWith("a🙃b🙃c", "b🙃c", UNICODE_CI, true);
+    assertEndsWith("a🙃b🙃c", "a🙃b🙃c", UTF8_BINARY, true);
+    assertEndsWith("a🙃b🙃c", "a🙃b🙃c", UTF8_LCASE, true);
+    assertEndsWith("a🙃b🙃c", "a🙃b🙃c", UNICODE, true);
+    assertEndsWith("a🙃b🙃c", "a🙃b🙃c", UNICODE_CI, true);
+    assertEndsWith("😀😆😃😄", "😄😆", UTF8_BINARY, false);
+    assertEndsWith("😀😆😃😄", "😄😆", UTF8_LCASE, false);
+    assertEndsWith("😀😆😃😄", "😄😆", UNICODE, false);
+    assertEndsWith("😀😆😃😄", "😄😆", UNICODE_CI, false);
+    assertEndsWith("😀😆😃😄", "😆😃", UTF8_BINARY, false);
+    assertEndsWith("😀😆😃😄", "😆😃", UTF8_LCASE, false);
+    assertEndsWith("😀😆😃😄", "😆😃", UNICODE, false);
+    assertEndsWith("😀😆😃😄", "😆😃", UNICODE_CI, false);
+    assertEndsWith("😀😆😃😄", "😀😆", UTF8_BINARY, false);
+    assertEndsWith("😀😆😃😄", "😀😆", UTF8_LCASE, false);
+    assertEndsWith("😀😆😃😄", "😀😆", UNICODE, false);
+    assertEndsWith("😀😆😃😄", "😀😆", UNICODE_CI, false);
+    assertEndsWith("😀😆😃😄", "😃😄", UTF8_BINARY, true);
+    assertEndsWith("😀😆😃😄", "😃😄", UTF8_LCASE, true);
+    assertEndsWith("😀😆😃😄", "😃😄", UNICODE, true);
+    assertEndsWith("😀😆😃😄", "😃😄", UNICODE_CI, true);
+    assertEndsWith("😀😆😃😄", "😀😆😃😄", UTF8_BINARY, true);
+    assertEndsWith("😀😆😃😄", "😀😆😃😄", UTF8_LCASE, true);
+    assertEndsWith("😀😆😃😄", "😀😆😃😄", UNICODE, true);
+    assertEndsWith("😀😆😃😄", "😀😆😃😄", UNICODE_CI, true);
+    assertEndsWith("𐐅", "𐐅", UTF8_BINARY, true);
+    assertEndsWith("𐐅", "𐐅", UTF8_LCASE, true);
+    assertEndsWith("𐐅", "𐐅", UNICODE, true);
+    assertEndsWith("𐐅", "𐐅", UNICODE_CI, true);
+    assertEndsWith("𐐅", "𐐭", UTF8_BINARY, false);
+    assertEndsWith("𐐅", "𐐭", UTF8_LCASE, true);
+    assertEndsWith("𐐅", "𐐭", UNICODE, false);
+    assertEndsWith("𐐅", "𐐭", UNICODE_CI, true);
+    assertEndsWith("𝔸", "𝔸", UTF8_BINARY, true);
+    assertEndsWith("𝔸", "𝔸", UTF8_LCASE, true);
+    assertEndsWith("𝔸", "𝔸", UNICODE, true);
+    assertEndsWith("𝔸", "𝔸", UNICODE_CI, true);
   }
 
   /**
@@ -1057,158 +1058,158 @@ public void testStringSplitSQL() throws SparkException {
     var array_AOB = new UTF8String[] { UTF8String.fromString("A𐐅B") };
     var array_AoB = new UTF8String[] { UTF8String.fromString("A𐐭B") };
     // Empty strings.
-    assertStringSplitSQL("", "", "UTF8_BINARY", empty_match);
-    assertStringSplitSQL("abc", "", "UTF8_BINARY", array_abc);
-    assertStringSplitSQL("", "abc", "UTF8_BINARY", empty_match);
-    assertStringSplitSQL("", "", "UNICODE", empty_match);
-    assertStringSplitSQL("abc", "", "UNICODE", array_abc);
-    assertStringSplitSQL("", "abc", "UNICODE", empty_match);
-    assertStringSplitSQL("", "", "UTF8_LCASE", empty_match);
-    assertStringSplitSQL("abc", "", "UTF8_LCASE", array_abc);
-    assertStringSplitSQL("", "abc", "UTF8_LCASE", empty_match);
-    assertStringSplitSQL("", "", "UNICODE_CI", empty_match);
-    assertStringSplitSQL("abc", "", "UNICODE_CI", array_abc);
-    assertStringSplitSQL("", "abc", "UNICODE_CI", empty_match);
+    assertStringSplitSQL("", "", UTF8_BINARY, empty_match);
+    assertStringSplitSQL("abc", "", UTF8_BINARY, array_abc);
+    assertStringSplitSQL("", "abc", UTF8_BINARY, empty_match);
+    assertStringSplitSQL("", "", UNICODE, empty_match);
+    assertStringSplitSQL("abc", "", UNICODE, array_abc);
+    assertStringSplitSQL("", "abc", UNICODE, empty_match);
+    assertStringSplitSQL("", "", UTF8_LCASE, empty_match);
+    assertStringSplitSQL("abc", "", UTF8_LCASE, array_abc);
+    assertStringSplitSQL("", "abc", UTF8_LCASE, empty_match);
+    assertStringSplitSQL("", "", UNICODE_CI, empty_match);
+    assertStringSplitSQL("abc", "", UNICODE_CI, array_abc);
+    assertStringSplitSQL("", "abc", UNICODE_CI, empty_match);
     // Basic tests.
-    assertStringSplitSQL("1a2", "a", "UTF8_BINARY", array_1_2);
-    assertStringSplitSQL("1a2", "A", "UTF8_BINARY", array_1a2);
-    assertStringSplitSQL("1a2", "b", "UTF8_BINARY", array_1a2);
-    assertStringSplitSQL("1a2", "1a2", "UNICODE", full_match);
-    assertStringSplitSQL("1a2", "1A2", "UNICODE", array_1a2);
-    assertStringSplitSQL("1a2", "3b4", "UNICODE", array_1a2);
-    assertStringSplitSQL("1a2", "A", "UTF8_LCASE", array_1_2);
-    assertStringSplitSQL("1a2", "1A2", "UTF8_LCASE", full_match);
-    assertStringSplitSQL("1a2", "X", "UTF8_LCASE", array_1a2);
-    assertStringSplitSQL("1a2", "a", "UNICODE_CI", array_1_2);
-    assertStringSplitSQL("1a2", "A", "UNICODE_CI", array_1_2);
-    assertStringSplitSQL("1a2", "1A2", "UNICODE_CI", full_match);
-    assertStringSplitSQL("1a2", "123", "UNICODE_CI", array_1a2);
+    assertStringSplitSQL("1a2", "a", UTF8_BINARY, array_1_2);
+    assertStringSplitSQL("1a2", "A", UTF8_BINARY, array_1a2);
+    assertStringSplitSQL("1a2", "b", UTF8_BINARY, array_1a2);
+    assertStringSplitSQL("1a2", "1a2", UNICODE, full_match);
+    assertStringSplitSQL("1a2", "1A2", UNICODE, array_1a2);
+    assertStringSplitSQL("1a2", "3b4", UNICODE, array_1a2);
+    assertStringSplitSQL("1a2", "A", UTF8_LCASE, array_1_2);
+    assertStringSplitSQL("1a2", "1A2", UTF8_LCASE, full_match);
+    assertStringSplitSQL("1a2", "X", UTF8_LCASE, array_1a2);
+    assertStringSplitSQL("1a2", "a", UNICODE_CI, array_1_2);
+    assertStringSplitSQL("1a2", "A", UNICODE_CI, array_1_2);
+    assertStringSplitSQL("1a2", "1A2", UNICODE_CI, full_match);
+    assertStringSplitSQL("1a2", "123", UNICODE_CI, array_1a2);
     // Advanced tests.
-    assertStringSplitSQL("äb世De", "b世D", "UTF8_BINARY", array_a_e);
-    assertStringSplitSQL("äb世De", "B世d", "UTF8_BINARY", array_special);
-    assertStringSplitSQL("äbćδe", "bćδ", "UTF8_BINARY", array_a_e);
-    assertStringSplitSQL("äbćδe", "BcΔ", "UTF8_BINARY", array_abcde);
-    assertStringSplitSQL("äb世De", "äb世De", "UNICODE", full_match);
-    assertStringSplitSQL("äb世De", "äB世de", "UNICODE", array_special);
-    assertStringSplitSQL("äbćδe", "äbćδe", "UNICODE", full_match);
-    assertStringSplitSQL("äbćδe", "ÄBcΔÉ", "UNICODE", array_abcde);
-    assertStringSplitSQL("äb世De", "b世D", "UTF8_LCASE", array_a_e);
-    assertStringSplitSQL("äb世De", "B世d", "UTF8_LCASE", array_a_e);
-    assertStringSplitSQL("äbćδe", "bćδ", "UTF8_LCASE", array_a_e);
-    assertStringSplitSQL("äbćδe", "BcΔ", "UTF8_LCASE", array_abcde);
-    assertStringSplitSQL("äb世De", "ab世De", "UNICODE_CI", array_special);
-    assertStringSplitSQL("äb世De", "AB世dE", "UNICODE_CI", array_special);
-    assertStringSplitSQL("äbćδe", "ÄbćδE", "UNICODE_CI", full_match);
-    assertStringSplitSQL("äbćδe", "ÄBcΔÉ", "UNICODE_CI", array_abcde);
+    assertStringSplitSQL("äb世De", "b世D", UTF8_BINARY, array_a_e);
+    assertStringSplitSQL("äb世De", "B世d", UTF8_BINARY, array_special);
+    assertStringSplitSQL("äbćδe", "bćδ", UTF8_BINARY, array_a_e);
+    assertStringSplitSQL("äbćδe", "BcΔ", UTF8_BINARY, array_abcde);
+    assertStringSplitSQL("äb世De", "äb世De", UNICODE, full_match);
+    assertStringSplitSQL("äb世De", "äB世de", UNICODE, array_special);
+    assertStringSplitSQL("äbćδe", "äbćδe", UNICODE, full_match);
+    assertStringSplitSQL("äbćδe", "ÄBcΔÉ", UNICODE, array_abcde);
+    assertStringSplitSQL("äb世De", "b世D", UTF8_LCASE, array_a_e);
+    assertStringSplitSQL("äb世De", "B世d", UTF8_LCASE, array_a_e);
+    assertStringSplitSQL("äbćδe", "bćδ", UTF8_LCASE, array_a_e);
+    assertStringSplitSQL("äbćδe", "BcΔ", UTF8_LCASE, array_abcde);
+    assertStringSplitSQL("äb世De", "ab世De", UNICODE_CI, array_special);
+    assertStringSplitSQL("äb世De", "AB世dE", UNICODE_CI, array_special);
+    assertStringSplitSQL("äbćδe", "ÄbćδE", UNICODE_CI, full_match);
+    assertStringSplitSQL("äbćδe", "ÄBcΔÉ", UNICODE_CI, array_abcde);
     // Case variation.
-    assertStringSplitSQL("AaXbB", "x", "UTF8_BINARY", array_AaXbB);
-    assertStringSplitSQL("AaXbB", "X", "UTF8_BINARY", array_Aa_bB);
-    assertStringSplitSQL("AaXbB", "axb", "UNICODE", array_AaXbB);
-    assertStringSplitSQL("AaXbB", "aXb", "UNICODE", array_A_B);
-    assertStringSplitSQL("AaXbB", "axb", "UTF8_LCASE", array_A_B);
-    assertStringSplitSQL("AaXbB", "AXB", "UTF8_LCASE", array_A_B);
-    assertStringSplitSQL("AaXbB", "axb", "UNICODE_CI", array_A_B);
-    assertStringSplitSQL("AaXbB", "AxB", "UNICODE_CI", array_A_B);
+    assertStringSplitSQL("AaXbB", "x", UTF8_BINARY, array_AaXbB);
+    assertStringSplitSQL("AaXbB", "X", UTF8_BINARY, array_Aa_bB);
+    assertStringSplitSQL("AaXbB", "axb", UNICODE, array_AaXbB);
+    assertStringSplitSQL("AaXbB", "aXb", UNICODE, array_A_B);
+    assertStringSplitSQL("AaXbB", "axb", UTF8_LCASE, array_A_B);
+    assertStringSplitSQL("AaXbB", "AXB", UTF8_LCASE, array_A_B);
+    assertStringSplitSQL("AaXbB", "axb", UNICODE_CI, array_A_B);
+    assertStringSplitSQL("AaXbB", "AxB", UNICODE_CI, array_A_B);
     // Accent variation.
-    assertStringSplitSQL("aBcDe", "bćd", "UTF8_BINARY", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "BćD", "UTF8_BINARY", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "abćde", "UNICODE", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "aBćDe", "UNICODE", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "bćd", "UTF8_LCASE", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "BĆD", "UTF8_LCASE", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "abćde", "UNICODE_CI", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "AbĆdE", "UNICODE_CI", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "bćd", UTF8_BINARY, array_aBcDe);
+    assertStringSplitSQL("aBcDe", "BćD", UTF8_BINARY, array_aBcDe);
+    assertStringSplitSQL("aBcDe", "abćde", UNICODE, array_aBcDe);
+    assertStringSplitSQL("aBcDe", "aBćDe", UNICODE, array_aBcDe);
+    assertStringSplitSQL("aBcDe", "bćd", UTF8_LCASE, array_aBcDe);
+    assertStringSplitSQL("aBcDe", "BĆD", UTF8_LCASE, array_aBcDe);
+    assertStringSplitSQL("aBcDe", "abćde", UNICODE_CI, array_aBcDe);
+    assertStringSplitSQL("aBcDe", "AbĆdE", UNICODE_CI, array_aBcDe);
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertStringSplitSQL("İ", "i", "UTF8_BINARY", array_Turkish_uppercase_dotted_I);
-    assertStringSplitSQL("İ", "i", "UTF8_LCASE", array_Turkish_uppercase_dotted_I);
-    assertStringSplitSQL("İ", "i", "UNICODE", array_Turkish_uppercase_dotted_I);
-    assertStringSplitSQL("İ", "i", "UNICODE_CI", array_Turkish_uppercase_dotted_I);
-    assertStringSplitSQL("İ", "\u0307", "UTF8_BINARY", array_Turkish_uppercase_dotted_I);
-    assertStringSplitSQL("İ", "\u0307", "UTF8_LCASE", array_Turkish_uppercase_dotted_I);
-    assertStringSplitSQL("İ", "\u0307", "UNICODE", array_Turkish_uppercase_dotted_I);
-    assertStringSplitSQL("İ", "\u0307", "UNICODE_CI", array_Turkish_uppercase_dotted_I);
-    assertStringSplitSQL("i\u0307", "i", "UTF8_BINARY", array_dot);
-    assertStringSplitSQL("i\u0307", "i", "UTF8_LCASE", array_dot);
-    assertStringSplitSQL("i\u0307", "i", "UNICODE", array_Turkish_lowercase_dotted_i);
-    assertStringSplitSQL("i\u0307", "i", "UNICODE_CI", array_Turkish_lowercase_dotted_i);
-    assertStringSplitSQL("i\u0307", "\u0307", "UTF8_BINARY", array_i);
-    assertStringSplitSQL("i\u0307", "\u0307", "UTF8_LCASE", array_i);
-    assertStringSplitSQL("i\u0307", "\u0307", "UNICODE", array_Turkish_lowercase_dotted_i);
-    assertStringSplitSQL("i\u0307", "\u0307", "UNICODE_CI", array_Turkish_lowercase_dotted_i);
-    assertStringSplitSQL("AİB", "İ", "UTF8_BINARY", array_A_B);
-    assertStringSplitSQL("AİB", "İ", "UTF8_LCASE", array_A_B);
-    assertStringSplitSQL("AİB", "İ", "UNICODE", array_A_B);
-    assertStringSplitSQL("AİB", "İ", "UNICODE_CI", array_A_B);
-    assertStringSplitSQL("AİB", "i\u0307", "UTF8_BINARY", array_AIB);
-    assertStringSplitSQL("AİB", "i\u0307", "UTF8_LCASE", array_A_B);
-    assertStringSplitSQL("AİB", "i\u0307", "UNICODE", array_AIB);
-    assertStringSplitSQL("AİB", "i\u0307", "UNICODE_CI", array_A_B);
-    assertStringSplitSQL("Ai\u0307B", "İ", "UTF8_BINARY", array_AiB);
-    assertStringSplitSQL("Ai\u0307B", "İ", "UTF8_LCASE", array_A_B);
-    assertStringSplitSQL("Ai\u0307B", "İ", "UNICODE", array_AiB);
-    assertStringSplitSQL("Ai\u0307B", "İ", "UNICODE_CI", array_A_B);
-    assertStringSplitSQL("Ai\u0307B", "i\u0307", "UTF8_BINARY", array_A_B);
-    assertStringSplitSQL("Ai\u0307B", "i\u0307", "UTF8_LCASE", array_A_B);
-    assertStringSplitSQL("Ai\u0307B", "i\u0307", "UNICODE", array_A_B);
-    assertStringSplitSQL("Ai\u0307B", "i\u0307", "UNICODE_CI", array_A_B);
+    assertStringSplitSQL("İ", "i", UTF8_BINARY, array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "i", UTF8_LCASE, array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "i", UNICODE, array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "i", UNICODE_CI, array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "\u0307", UTF8_BINARY, array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "\u0307", UTF8_LCASE, array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "\u0307", UNICODE, array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "\u0307", UNICODE_CI, array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("i\u0307", "i", UTF8_BINARY, array_dot);
+    assertStringSplitSQL("i\u0307", "i", UTF8_LCASE, array_dot);
+    assertStringSplitSQL("i\u0307", "i", UNICODE, array_Turkish_lowercase_dotted_i);
+    assertStringSplitSQL("i\u0307", "i", UNICODE_CI, array_Turkish_lowercase_dotted_i);
+    assertStringSplitSQL("i\u0307", "\u0307", UTF8_BINARY, array_i);
+    assertStringSplitSQL("i\u0307", "\u0307", UTF8_LCASE, array_i);
+    assertStringSplitSQL("i\u0307", "\u0307", UNICODE, array_Turkish_lowercase_dotted_i);
+    assertStringSplitSQL("i\u0307", "\u0307", UNICODE_CI, array_Turkish_lowercase_dotted_i);
+    assertStringSplitSQL("AİB", "İ", UTF8_BINARY, array_A_B);
+    assertStringSplitSQL("AİB", "İ", UTF8_LCASE, array_A_B);
+    assertStringSplitSQL("AİB", "İ", UNICODE, array_A_B);
+    assertStringSplitSQL("AİB", "İ", UNICODE_CI, array_A_B);
+    assertStringSplitSQL("AİB", "i\u0307", UTF8_BINARY, array_AIB);
+    assertStringSplitSQL("AİB", "i\u0307", UTF8_LCASE, array_A_B);
+    assertStringSplitSQL("AİB", "i\u0307", UNICODE, array_AIB);
+    assertStringSplitSQL("AİB", "i\u0307", UNICODE_CI, array_A_B);
+    assertStringSplitSQL("Ai\u0307B", "İ", UTF8_BINARY, array_AiB);
+    assertStringSplitSQL("Ai\u0307B", "İ", UTF8_LCASE, array_A_B);
+    assertStringSplitSQL("Ai\u0307B", "İ", UNICODE, array_AiB);
+    assertStringSplitSQL("Ai\u0307B", "İ", UNICODE_CI, array_A_B);
+    assertStringSplitSQL("Ai\u0307B", "i\u0307", UTF8_BINARY, array_A_B);
+    assertStringSplitSQL("Ai\u0307B", "i\u0307", UTF8_LCASE, array_A_B);
+    assertStringSplitSQL("Ai\u0307B", "i\u0307", UNICODE, array_A_B);
+    assertStringSplitSQL("Ai\u0307B", "i\u0307", UNICODE_CI, array_A_B);
     // Conditional case mapping (e.g. Greek sigmas).
-    assertStringSplitSQL("σ", "σ", "UTF8_BINARY", full_match);
-    assertStringSplitSQL("σ", "σ", "UTF8_LCASE", full_match);
-    assertStringSplitSQL("σ", "σ", "UNICODE", full_match);
-    assertStringSplitSQL("σ", "σ", "UNICODE_CI", full_match);
-    assertStringSplitSQL("σ", "ς", "UTF8_BINARY", array_small_nonfinal_sigma);
-    assertStringSplitSQL("σ", "ς", "UTF8_LCASE", full_match);
-    assertStringSplitSQL("σ", "ς", "UNICODE", array_small_nonfinal_sigma);
-    assertStringSplitSQL("σ", "ς", "UNICODE_CI", full_match);
-    assertStringSplitSQL("σ", "Σ", "UTF8_BINARY", array_small_nonfinal_sigma);
-    assertStringSplitSQL("σ", "Σ", "UTF8_LCASE", full_match);
-    assertStringSplitSQL("σ", "Σ", "UNICODE", array_small_nonfinal_sigma);
-    assertStringSplitSQL("σ", "Σ", "UNICODE_CI", full_match);
-    assertStringSplitSQL("ς", "σ", "UTF8_BINARY", array_small_final_sigma);
-    assertStringSplitSQL("ς", "σ", "UTF8_LCASE", full_match);
-    assertStringSplitSQL("ς", "σ", "UNICODE", array_small_final_sigma);
-    assertStringSplitSQL("ς", "σ", "UNICODE_CI", full_match);
-    assertStringSplitSQL("ς", "ς", "UTF8_BINARY", full_match);
-    assertStringSplitSQL("ς", "ς", "UTF8_LCASE", full_match);
-    assertStringSplitSQL("ς", "ς", "UNICODE", full_match);
-    assertStringSplitSQL("ς", "ς", "UNICODE_CI", full_match);
-    assertStringSplitSQL("ς", "Σ", "UTF8_BINARY", array_small_final_sigma);
-    assertStringSplitSQL("ς", "Σ", "UTF8_LCASE", full_match);
-    assertStringSplitSQL("ς", "Σ", "UNICODE", array_small_final_sigma);
-    assertStringSplitSQL("ς", "Σ", "UNICODE_CI", full_match);
-    assertStringSplitSQL("Σ", "σ", "UTF8_BINARY", array_capital_sigma);
-    assertStringSplitSQL("Σ", "σ", "UTF8_LCASE", full_match);
-    assertStringSplitSQL("Σ", "σ", "UNICODE", array_capital_sigma);
-    assertStringSplitSQL("Σ", "σ", "UNICODE_CI", full_match);
-    assertStringSplitSQL("Σ", "ς", "UTF8_BINARY", array_capital_sigma);
-    assertStringSplitSQL("Σ", "ς", "UTF8_LCASE", full_match);
-    assertStringSplitSQL("Σ", "ς", "UNICODE", array_capital_sigma);
-    assertStringSplitSQL("Σ", "ς", "UNICODE_CI", full_match);
-    assertStringSplitSQL("Σ", "Σ", "UTF8_BINARY", full_match);
-    assertStringSplitSQL("Σ", "Σ", "UTF8_LCASE", full_match);
-    assertStringSplitSQL("Σ", "Σ", "UNICODE", full_match);
-    assertStringSplitSQL("Σ", "Σ", "UNICODE_CI", full_match);
+    assertStringSplitSQL("σ", "σ", UTF8_BINARY, full_match);
+    assertStringSplitSQL("σ", "σ", UTF8_LCASE, full_match);
+    assertStringSplitSQL("σ", "σ", UNICODE, full_match);
+    assertStringSplitSQL("σ", "σ", UNICODE_CI, full_match);
+    assertStringSplitSQL("σ", "ς", UTF8_BINARY, array_small_nonfinal_sigma);
+    assertStringSplitSQL("σ", "ς", UTF8_LCASE, full_match);
+    assertStringSplitSQL("σ", "ς", UNICODE, array_small_nonfinal_sigma);
+    assertStringSplitSQL("σ", "ς", UNICODE_CI, full_match);
+    assertStringSplitSQL("σ", "Σ", UTF8_BINARY, array_small_nonfinal_sigma);
+    assertStringSplitSQL("σ", "Σ", UTF8_LCASE, full_match);
+    assertStringSplitSQL("σ", "Σ", UNICODE, array_small_nonfinal_sigma);
+    assertStringSplitSQL("σ", "Σ", UNICODE_CI, full_match);
+    assertStringSplitSQL("ς", "σ", UTF8_BINARY, array_small_final_sigma);
+    assertStringSplitSQL("ς", "σ", UTF8_LCASE, full_match);
+    assertStringSplitSQL("ς", "σ", UNICODE, array_small_final_sigma);
+    assertStringSplitSQL("ς", "σ", UNICODE_CI, full_match);
+    assertStringSplitSQL("ς", "ς", UTF8_BINARY, full_match);
+    assertStringSplitSQL("ς", "ς", UTF8_LCASE, full_match);
+    assertStringSplitSQL("ς", "ς", UNICODE, full_match);
+    assertStringSplitSQL("ς", "ς", UNICODE_CI, full_match);
+    assertStringSplitSQL("ς", "Σ", UTF8_BINARY, array_small_final_sigma);
+    assertStringSplitSQL("ς", "Σ", UTF8_LCASE, full_match);
+    assertStringSplitSQL("ς", "Σ", UNICODE, array_small_final_sigma);
+    assertStringSplitSQL("ς", "Σ", UNICODE_CI, full_match);
+    assertStringSplitSQL("Σ", "σ", UTF8_BINARY, array_capital_sigma);
+    assertStringSplitSQL("Σ", "σ", UTF8_LCASE, full_match);
+    assertStringSplitSQL("Σ", "σ", UNICODE, array_capital_sigma);
+    assertStringSplitSQL("Σ", "σ", UNICODE_CI, full_match);
+    assertStringSplitSQL("Σ", "ς", UTF8_BINARY, array_capital_sigma);
+    assertStringSplitSQL("Σ", "ς", UTF8_LCASE, full_match);
+    assertStringSplitSQL("Σ", "ς", UNICODE, array_capital_sigma);
+    assertStringSplitSQL("Σ", "ς", UNICODE_CI, full_match);
+    assertStringSplitSQL("Σ", "Σ", UTF8_BINARY, full_match);
+    assertStringSplitSQL("Σ", "Σ", UTF8_LCASE, full_match);
+    assertStringSplitSQL("Σ", "Σ", UNICODE, full_match);
+    assertStringSplitSQL("Σ", "Σ", UNICODE_CI, full_match);
     // Surrogate pairs.
-    assertStringSplitSQL("a🙃b🙃c", "🙃", "UTF8_BINARY", array_a_b_c);
-    assertStringSplitSQL("a🙃b🙃c", "🙃", "UTF8_LCASE", array_a_b_c);
-    assertStringSplitSQL("a🙃b🙃c", "🙃", "UNICODE", array_a_b_c);
-    assertStringSplitSQL("a🙃b🙃c", "🙃", "UNICODE_CI", array_a_b_c);
-    assertStringSplitSQL("😀😆😃😄", "😆😃", "UTF8_BINARY", array_emojis);
-    assertStringSplitSQL("😀😆😃😄", "😆😃", "UTF8_LCASE", array_emojis);
-    assertStringSplitSQL("😀😆😃😄", "😆😃", "UNICODE", array_emojis);
-    assertStringSplitSQL("😀😆😃😄", "😆😃", "UNICODE_CI", array_emojis);
-    assertStringSplitSQL("A𐐅B", "𐐅", "UTF8_BINARY", array_A_B);
-    assertStringSplitSQL("A𐐅B", "𐐅", "UTF8_LCASE", array_A_B);
-    assertStringSplitSQL("A𐐅B", "𐐅", "UNICODE", array_A_B);
-    assertStringSplitSQL("A𐐅B", "𐐅", "UNICODE_CI", array_A_B);
-    assertStringSplitSQL("A𐐅B", "𐐭", "UTF8_BINARY", array_AOB);
-    assertStringSplitSQL("A𐐅B", "𐐭", "UTF8_LCASE", array_A_B);
-    assertStringSplitSQL("A𐐅B", "𐐭", "UNICODE", array_AOB);
-    assertStringSplitSQL("A𐐅B", "𐐭", "UNICODE_CI", array_A_B);
-    assertStringSplitSQL("A𐐭B", "𐐅", "UTF8_BINARY", array_AoB);
-    assertStringSplitSQL("A𐐭B", "𐐅", "UTF8_LCASE", array_A_B);
-    assertStringSplitSQL("A𐐭B", "𐐅", "UNICODE", array_AoB);
-    assertStringSplitSQL("A𐐭B", "𐐅", "UNICODE_CI", array_A_B);
+    assertStringSplitSQL("a🙃b🙃c", "🙃", UTF8_BINARY, array_a_b_c);
+    assertStringSplitSQL("a🙃b🙃c", "🙃", UTF8_LCASE, array_a_b_c);
+    assertStringSplitSQL("a🙃b🙃c", "🙃", UNICODE, array_a_b_c);
+    assertStringSplitSQL("a🙃b🙃c", "🙃", UNICODE_CI, array_a_b_c);
+    assertStringSplitSQL("😀😆😃😄", "😆😃", UTF8_BINARY, array_emojis);
+    assertStringSplitSQL("😀😆😃😄", "😆😃", UTF8_LCASE, array_emojis);
+    assertStringSplitSQL("😀😆😃😄", "😆😃", UNICODE, array_emojis);
+    assertStringSplitSQL("😀😆😃😄", "😆😃", UNICODE_CI, array_emojis);
+    assertStringSplitSQL("A𐐅B", "𐐅", UTF8_BINARY, array_A_B);
+    assertStringSplitSQL("A𐐅B", "𐐅", UTF8_LCASE, array_A_B);
+    assertStringSplitSQL("A𐐅B", "𐐅", UNICODE, array_A_B);
+    assertStringSplitSQL("A𐐅B", "𐐅", UNICODE_CI, array_A_B);
+    assertStringSplitSQL("A𐐅B", "𐐭", UTF8_BINARY, array_AOB);
+    assertStringSplitSQL("A𐐅B", "𐐭", UTF8_LCASE, array_A_B);
+    assertStringSplitSQL("A𐐅B", "𐐭", UNICODE, array_AOB);
+    assertStringSplitSQL("A𐐅B", "𐐭", UNICODE_CI, array_A_B);
+    assertStringSplitSQL("A𐐭B", "𐐅", UTF8_BINARY, array_AoB);
+    assertStringSplitSQL("A𐐭B", "𐐅", UTF8_LCASE, array_A_B);
+    assertStringSplitSQL("A𐐭B", "𐐅", UNICODE, array_AoB);
+    assertStringSplitSQL("A𐐭B", "𐐅", UNICODE_CI, array_A_B);
   }
 
   /**
@@ -1391,156 +1392,156 @@ public void testInitCap() throws SparkException {
       assertInitCap("θαλασσινος", collationName, "Θαλασσινος");
     }
     // Advanced tests.
-    assertInitCap("aBćDe", "UTF8_BINARY", "Abćde");
-    assertInitCap("aBćDe", "UTF8_LCASE", "Abćde");
-    assertInitCap("aBćDe", "UNICODE", "Abćde");
-    assertInitCap("aBćDe", "UNICODE_CI", "Abćde");
-    assertInitCap("ab世De", "UTF8_BINARY", "Ab世de");
-    assertInitCap("ab世De", "UTF8_LCASE", "Ab世De");
-    assertInitCap("ab世De", "UNICODE", "Ab世De");
-    assertInitCap("ab世De", "UNICODE_CI", "Ab世De");
-    assertInitCap("äbćδe", "UTF8_BINARY", "Äbćδe");
-    assertInitCap("äbćδe", "UTF8_LCASE", "Äbćδe");
-    assertInitCap("äbćδe", "UNICODE", "Äbćδe");
-    assertInitCap("äbćδe", "UNICODE_CI", "Äbćδe");
-    assertInitCap("ÄBĆΔE", "UTF8_BINARY", "Äbćδe");
-    assertInitCap("ÄBĆΔE", "UTF8_LCASE", "Äbćδe");
-    assertInitCap("ÄBĆΔE", "UNICODE", "Äbćδe");
-    assertInitCap("ÄBĆΔE", "UNICODE_CI", "Äbćδe");
+    assertInitCap("aBćDe", UTF8_BINARY, "Abćde");
+    assertInitCap("aBćDe", UTF8_LCASE, "Abćde");
+    assertInitCap("aBćDe", UNICODE, "Abćde");
+    assertInitCap("aBćDe", UNICODE_CI, "Abćde");
+    assertInitCap("ab世De", UTF8_BINARY, "Ab世de");
+    assertInitCap("ab世De", UTF8_LCASE, "Ab世De");
+    assertInitCap("ab世De", UNICODE, "Ab世De");
+    assertInitCap("ab世De", UNICODE_CI, "Ab世De");
+    assertInitCap("äbćδe", UTF8_BINARY, "Äbćδe");
+    assertInitCap("äbćδe", UTF8_LCASE, "Äbćδe");
+    assertInitCap("äbćδe", UNICODE, "Äbćδe");
+    assertInitCap("äbćδe", UNICODE_CI, "Äbćδe");
+    assertInitCap("ÄBĆΔE", UTF8_BINARY, "Äbćδe");
+    assertInitCap("ÄBĆΔE", UTF8_LCASE, "Äbćδe");
+    assertInitCap("ÄBĆΔE", UNICODE, "Äbćδe");
+    assertInitCap("ÄBĆΔE", UNICODE_CI, "Äbćδe");
     assertInitCap("êéfgh", "AF_CI_AI", "Êéfgh");
     assertInitCap("öoAÄ", "DE_CI_AI", "Öoaä");
     // Case-variable character length
-    assertInitCap("İo", "UTF8_BINARY", "İo", "I\u0307o");
-    assertInitCap("İo", "UTF8_LCASE", "İo");
-    assertInitCap("İo", "UNICODE", "İo");
-    assertInitCap("İo", "UNICODE_CI", "İo");
-    assertInitCap("i\u0307o", "UTF8_BINARY", "I\u0307o");
-    assertInitCap("i\u0307o", "UTF8_LCASE", "I\u0307o");
-    assertInitCap("i\u0307o", "UNICODE", "I\u0307o");
-    assertInitCap("i\u0307o", "UNICODE_CI", "I\u0307o");
+    assertInitCap("İo", UTF8_BINARY, "İo", "I\u0307o");
+    assertInitCap("İo", UTF8_LCASE, "İo");
+    assertInitCap("İo", UNICODE, "İo");
+    assertInitCap("İo", UNICODE_CI, "İo");
+    assertInitCap("i\u0307o", UTF8_BINARY, "I\u0307o");
+    assertInitCap("i\u0307o", UTF8_LCASE, "I\u0307o");
+    assertInitCap("i\u0307o", UNICODE, "I\u0307o");
+    assertInitCap("i\u0307o", UNICODE_CI, "I\u0307o");
     // Different possible word boundaries
-    assertInitCap("aB 世 de", "UTF8_BINARY", "Ab 世 De");
-    assertInitCap("aB 世 de", "UTF8_LCASE", "Ab 世 De");
-    assertInitCap("aB 世 de", "UNICODE", "Ab 世 De");
-    assertInitCap("aB 世 de", "UNICODE_CI", "Ab 世 De");
+    assertInitCap("aB 世 de", UTF8_BINARY, "Ab 世 De");
+    assertInitCap("aB 世 de", UTF8_LCASE, "Ab 世 De");
+    assertInitCap("aB 世 de", UNICODE, "Ab 世 De");
+    assertInitCap("aB 世 de", UNICODE_CI, "Ab 世 De");
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertInitCap("İ", "UTF8_BINARY", "İ", "I\u0307");
-    assertInitCap("İ", "UTF8_LCASE", "İ");
-    assertInitCap("İ", "UNICODE", "İ");
-    assertInitCap("İ", "UNICODE_CI", "İ");
-    assertInitCap("I\u0307", "UTF8_BINARY","I\u0307");
-    assertInitCap("I\u0307", "UTF8_LCASE","I\u0307");
-    assertInitCap("I\u0307", "UNICODE","I\u0307");
-    assertInitCap("I\u0307", "UNICODE_CI","I\u0307");
-    assertInitCap("İonic", "UTF8_BINARY", "İonic", "I\u0307onic");
-    assertInitCap("İonic", "UTF8_LCASE", "İonic");
-    assertInitCap("İonic", "UNICODE", "İonic");
-    assertInitCap("İonic", "UNICODE_CI", "İonic");
-    assertInitCap("i\u0307onic", "UTF8_BINARY","I\u0307onic");
-    assertInitCap("i\u0307onic", "UTF8_LCASE","I\u0307onic");
-    assertInitCap("i\u0307onic", "UNICODE","I\u0307onic");
-    assertInitCap("i\u0307onic", "UNICODE_CI","I\u0307onic");
-    assertInitCap("FIDELİO", "UTF8_BINARY", "Fideli\u0307o");
-    assertInitCap("FIDELİO", "UTF8_LCASE", "Fideli\u0307o");
-    assertInitCap("FIDELİO", "UNICODE", "Fideli\u0307o");
-    assertInitCap("FIDELİO", "UNICODE_CI", "Fideli\u0307o");
+    assertInitCap("İ", UTF8_BINARY, "İ", "I\u0307");
+    assertInitCap("İ", UTF8_LCASE, "İ");
+    assertInitCap("İ", UNICODE, "İ");
+    assertInitCap("İ", UNICODE_CI, "İ");
+    assertInitCap("I\u0307", UTF8_BINARY,"I\u0307");
+    assertInitCap("I\u0307", UTF8_LCASE,"I\u0307");
+    assertInitCap("I\u0307", UNICODE,"I\u0307");
+    assertInitCap("I\u0307", UNICODE_CI,"I\u0307");
+    assertInitCap("İonic", UTF8_BINARY, "İonic", "I\u0307onic");
+    assertInitCap("İonic", UTF8_LCASE, "İonic");
+    assertInitCap("İonic", UNICODE, "İonic");
+    assertInitCap("İonic", UNICODE_CI, "İonic");
+    assertInitCap("i\u0307onic", UTF8_BINARY,"I\u0307onic");
+    assertInitCap("i\u0307onic", UTF8_LCASE,"I\u0307onic");
+    assertInitCap("i\u0307onic", UNICODE,"I\u0307onic");
+    assertInitCap("i\u0307onic", UNICODE_CI,"I\u0307onic");
+    assertInitCap("FIDELİO", UTF8_BINARY, "Fideli\u0307o");
+    assertInitCap("FIDELİO", UTF8_LCASE, "Fideli\u0307o");
+    assertInitCap("FIDELİO", UNICODE, "Fideli\u0307o");
+    assertInitCap("FIDELİO", UNICODE_CI, "Fideli\u0307o");
     // Surrogate pairs.
-    assertInitCap("a🙃B🙃c", "UTF8_BINARY", "A🙃b🙃c");
-    assertInitCap("a🙃B🙃c", "UTF8_LCASE", "A🙃B🙃C");
-    assertInitCap("a🙃B🙃c", "UNICODE", "A🙃B🙃C");
-    assertInitCap("a🙃B🙃c", "UNICODE_CI", "A🙃B🙃C");
-    assertInitCap("😄 😆", "UTF8_BINARY", "😄 😆");
-    assertInitCap("😄 😆", "UTF8_LCASE", "😄 😆");
-    assertInitCap("😄 😆", "UNICODE", "😄 😆");
-    assertInitCap("😄 😆", "UNICODE_CI", "😄 😆");
-    assertInitCap("😀😆😃😄", "UTF8_BINARY", "😀😆😃😄");
-    assertInitCap("😀😆😃😄", "UTF8_LCASE", "😀😆😃😄");
-    assertInitCap("😀😆😃😄", "UNICODE", "😀😆😃😄");
-    assertInitCap("😀😆😃😄", "UNICODE_CI", "😀😆😃😄");
-    assertInitCap("𝔸", "UTF8_BINARY", "𝔸");
-    assertInitCap("𝔸", "UTF8_LCASE", "𝔸");
-    assertInitCap("𝔸", "UNICODE", "𝔸");
-    assertInitCap("𝔸", "UNICODE_CI", "𝔸");
-    assertInitCap("𐐅", "UTF8_BINARY", "\uD801\uDC05", "𐐭");
-    assertInitCap("𐐅", "UTF8_LCASE", "𐐅");
-    assertInitCap("𐐅", "UNICODE", "𐐅");
-    assertInitCap("𐐅", "UNICODE_CI", "𐐅");
-    assertInitCap("𐐭", "UTF8_BINARY", "\uD801\uDC05", "𐐭");
-    assertInitCap("𐐭", "UTF8_LCASE", "𐐅");
-    assertInitCap("𐐭", "UNICODE", "𐐅");
-    assertInitCap("𐐭", "UNICODE_CI", "𐐅");
-    assertInitCap("𐐭𝔸", "UTF8_BINARY", "\uD801\uDC05\uD835\uDD38", "𐐭𝔸");
-    assertInitCap("𐐭𝔸", "UTF8_LCASE", "𐐅𝔸");
-    assertInitCap("𐐭𝔸", "UNICODE", "𐐅𝔸");
-    assertInitCap("𐐭𝔸", "UNICODE_CI", "𐐅𝔸");
+    assertInitCap("a🙃B🙃c", UTF8_BINARY, "A🙃b🙃c");
+    assertInitCap("a🙃B🙃c", UTF8_LCASE, "A🙃B🙃C");
+    assertInitCap("a🙃B🙃c", UNICODE, "A🙃B🙃C");
+    assertInitCap("a🙃B🙃c", UNICODE_CI, "A🙃B🙃C");
+    assertInitCap("😄 😆", UTF8_BINARY, "😄 😆");
+    assertInitCap("😄 😆", UTF8_LCASE, "😄 😆");
+    assertInitCap("😄 😆", UNICODE, "😄 😆");
+    assertInitCap("😄 😆", UNICODE_CI, "😄 😆");
+    assertInitCap("😀😆😃😄", UTF8_BINARY, "😀😆😃😄");
+    assertInitCap("😀😆😃😄", UTF8_LCASE, "😀😆😃😄");
+    assertInitCap("😀😆😃😄", UNICODE, "😀😆😃😄");
+    assertInitCap("😀😆😃😄", UNICODE_CI, "😀😆😃😄");
+    assertInitCap("𝔸", UTF8_BINARY, "𝔸");
+    assertInitCap("𝔸", UTF8_LCASE, "𝔸");
+    assertInitCap("𝔸", UNICODE, "𝔸");
+    assertInitCap("𝔸", UNICODE_CI, "𝔸");
+    assertInitCap("𐐅", UTF8_BINARY, "\uD801\uDC05", "𐐭");
+    assertInitCap("𐐅", UTF8_LCASE, "𐐅");
+    assertInitCap("𐐅", UNICODE, "𐐅");
+    assertInitCap("𐐅", UNICODE_CI, "𐐅");
+    assertInitCap("𐐭", UTF8_BINARY, "\uD801\uDC05", "𐐭");
+    assertInitCap("𐐭", UTF8_LCASE, "𐐅");
+    assertInitCap("𐐭", UNICODE, "𐐅");
+    assertInitCap("𐐭", UNICODE_CI, "𐐅");
+    assertInitCap("𐐭𝔸", UTF8_BINARY, "\uD801\uDC05\uD835\uDD38", "𐐭𝔸");
+    assertInitCap("𐐭𝔸", UTF8_LCASE, "𐐅𝔸");
+    assertInitCap("𐐭𝔸", UNICODE, "𐐅𝔸");
+    assertInitCap("𐐭𝔸", UNICODE_CI, "𐐅𝔸");
     // Ligatures.
-    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UTF8_BINARY", "Ss Fi Ffi Ff St Ϊ͂", "ß ﬁ ﬃ ﬀ ﬆ ῗ");
-    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UTF8_LCASE", "Ss Fi Ffi Ff St \u0399\u0308\u0342");
-    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UNICODE", "Ss Fi Ffi Ff St \u0399\u0308\u0342");
-    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UNICODE", "Ss Fi Ffi Ff St \u0399\u0308\u0342");
-    assertInitCap("œ ǽ", "UTF8_BINARY", "Œ Ǽ", "Œ Ǽ");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ῗ", UTF8_BINARY, "Ss Fi Ffi Ff St Ϊ͂", "ß ﬁ ﬃ ﬀ ﬆ ῗ");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ῗ", UTF8_LCASE, "Ss Fi Ffi Ff St \u0399\u0308\u0342");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ῗ", UNICODE, "Ss Fi Ffi Ff St \u0399\u0308\u0342");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ῗ", UNICODE, "Ss Fi Ffi Ff St \u0399\u0308\u0342");
+    assertInitCap("œ ǽ", UTF8_BINARY, "Œ Ǽ", "Œ Ǽ");
     // Different possible word boundaries.
-    assertInitCap("a b c", "UTF8_BINARY", "A B C");
-    assertInitCap("a b c", "UNICODE", "A B C");
-    assertInitCap("a b c", "UTF8_LCASE", "A B C");
-    assertInitCap("a b c", "UNICODE_CI", "A B C");
-    assertInitCap("a.b,c", "UTF8_BINARY", "A.b,c");
-    assertInitCap("a.b,c", "UNICODE", "A.b,C");
-    assertInitCap("a.b,c", "UTF8_LCASE", "A.b,C");
-    assertInitCap("a.b,c", "UNICODE_CI", "A.b,C");
-    assertInitCap("a. b-c", "UTF8_BINARY", "A. B-c");
-    assertInitCap("a. b-c", "UNICODE", "A. B-C");
-    assertInitCap("a. b-c", "UTF8_LCASE", "A. B-C");
-    assertInitCap("a. b-c", "UNICODE_CI", "A. B-C");
-    assertInitCap("a?b世c", "UTF8_BINARY", "A?b世c");
-    assertInitCap("a?b世c", "UNICODE", "A?B世C");
-    assertInitCap("a?b世c", "UTF8_LCASE", "A?B世C");
-    assertInitCap("a?b世c", "UNICODE_CI", "A?B世C");
+    assertInitCap("a b c", UTF8_BINARY, "A B C");
+    assertInitCap("a b c", UNICODE, "A B C");
+    assertInitCap("a b c", UTF8_LCASE, "A B C");
+    assertInitCap("a b c", UNICODE_CI, "A B C");
+    assertInitCap("a.b,c", UTF8_BINARY, "A.b,c");
+    assertInitCap("a.b,c", UNICODE, "A.b,C");
+    assertInitCap("a.b,c", UTF8_LCASE, "A.b,C");
+    assertInitCap("a.b,c", UNICODE_CI, "A.b,C");
+    assertInitCap("a. b-c", UTF8_BINARY, "A. B-c");
+    assertInitCap("a. b-c", UNICODE, "A. B-C");
+    assertInitCap("a. b-c", UTF8_LCASE, "A. B-C");
+    assertInitCap("a. b-c", UNICODE_CI, "A. B-C");
+    assertInitCap("a?b世c", UTF8_BINARY, "A?b世c");
+    assertInitCap("a?b世c", UNICODE, "A?B世C");
+    assertInitCap("a?b世c", UTF8_LCASE, "A?B世C");
+    assertInitCap("a?b世c", UNICODE_CI, "A?B世C");
     // Titlecase characters that are different from uppercase characters.
-    assertInitCap("ǳǱǲ", "UTF8_BINARY", "ǲǳǳ");
-    assertInitCap("ǳǱǲ", "UNICODE", "ǲǳǳ");
-    assertInitCap("ǳǱǲ", "UTF8_LCASE", "ǲǳǳ");
-    assertInitCap("ǳǱǲ", "UNICODE_CI", "ǲǳǳ");
-    assertInitCap("ǆaba ǈubav Ǌegova", "UTF8_BINARY", "ǅaba ǈubav ǋegova");
-    assertInitCap("ǆaba ǈubav Ǌegova", "UNICODE", "ǅaba ǈubav ǋegova");
-    assertInitCap("ǆaba ǈubav Ǌegova", "UTF8_LCASE", "ǅaba ǈubav ǋegova");
-    assertInitCap("ǆaba ǈubav Ǌegova", "UNICODE_CI", "ǅaba ǈubav ǋegova");
-    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UTF8_BINARY",
+    assertInitCap("ǳǱǲ", UTF8_BINARY, "ǲǳǳ");
+    assertInitCap("ǳǱǲ", UNICODE, "ǲǳǳ");
+    assertInitCap("ǳǱǲ", UTF8_LCASE, "ǲǳǳ");
+    assertInitCap("ǳǱǲ", UNICODE_CI, "ǲǳǳ");
+    assertInitCap("ǆaba ǈubav Ǌegova", UTF8_BINARY, "ǅaba ǈubav ǋegova");
+    assertInitCap("ǆaba ǈubav Ǌegova", UNICODE, "ǅaba ǈubav ǋegova");
+    assertInitCap("ǆaba ǈubav Ǌegova", UTF8_LCASE, "ǅaba ǈubav ǋegova");
+    assertInitCap("ǆaba ǈubav Ǌegova", UNICODE_CI, "ǅaba ǈubav ǋegova");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", UTF8_BINARY,
       "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota","ß ﬁ ﬃ ﬀ ﬆ Σημερινος Ασημενιος I\u0307ota");
-    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UTF8_LCASE",
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", UTF8_LCASE,
       "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota");
-    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UNICODE",
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", UNICODE,
       "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota");
-    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡςΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UNICODE_CI",
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡςΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", UNICODE_CI,
       "Ss Fi Ffi Ff St Σημερςινος Ασημενιος İota");
     // Characters that map to multiple characters when titlecased and lowercased.
-    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ İOTA", "UTF8_BINARY", "Ss Fi Ffi Ff St İota", "ß ﬁ ﬃ ﬀ ﬆ İota");
-    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ OİOTA", "UTF8_BINARY",
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ İOTA", UTF8_BINARY, "Ss Fi Ffi Ff St İota", "ß ﬁ ﬃ ﬀ ﬆ İota");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ OİOTA", UTF8_BINARY,
       "Ss Fi Ffi Ff St Oi\u0307ota", "ß ﬁ ﬃ ﬀ ﬆ Oi̇ota");
     // Lowercasing Greek letter sigma ('Σ') when case-ignorable character present.
-    assertInitCap("`Σ", "UTF8_BINARY", "`σ", "`σ");
-    assertInitCap("1`Σ`` AΣ", "UTF8_BINARY", "1`σ`` Aς", "1`σ`` Aς");
-    assertInitCap("a1`Σ``", "UTF8_BINARY", "A1`σ``", "A1`σ``");
-    assertInitCap("a`Σ``", "UTF8_BINARY", "A`ς``", "A`σ``");
-    assertInitCap("a`Σ``1", "UTF8_BINARY", "A`ς``1", "A`σ``1");
-    assertInitCap("a`Σ``A", "UTF8_BINARY", "A`σ``a", "A`σ``a");
-    assertInitCap("ΘΑ�Σ�ΟΣ�", "UTF8_BINARY", "Θα�σ�ος�", "Θα�σ�ος�");
-    assertInitCap("ΘΑᵩΣ�ΟᵩΣᵩ�", "UTF8_BINARY", "Θαᵩς�οᵩςᵩ�", "Θαᵩς�οᵩςᵩ�");
-    assertInitCap("ΘΑ�ᵩΣ�ΟᵩΣᵩ�", "UTF8_BINARY", "Θα�ᵩσ�οᵩςᵩ�", "Θα�ᵩσ�οᵩςᵩ�");
-    assertInitCap("ΘΑ�ᵩΣᵩ�ΟᵩΣᵩ�", "UTF8_BINARY", "Θα�ᵩσᵩ�οᵩςᵩ�", "Θα�ᵩσᵩ�οᵩςᵩ�");
-    assertInitCap("ΘΑ�Σ�Ο�Σ�", "UTF8_BINARY", "Θα�σ�ο�σ�", "Θα�σ�ο�σ�");
+    assertInitCap("`Σ", UTF8_BINARY, "`σ", "`σ");
+    assertInitCap("1`Σ`` AΣ", UTF8_BINARY, "1`σ`` Aς", "1`σ`` Aς");
+    assertInitCap("a1`Σ``", UTF8_BINARY, "A1`σ``", "A1`σ``");
+    assertInitCap("a`Σ``", UTF8_BINARY, "A`ς``", "A`σ``");
+    assertInitCap("a`Σ``1", UTF8_BINARY, "A`ς``1", "A`σ``1");
+    assertInitCap("a`Σ``A", UTF8_BINARY, "A`σ``a", "A`σ``a");
+    assertInitCap("ΘΑ�Σ�ΟΣ�", UTF8_BINARY, "Θα�σ�ος�", "Θα�σ�ος�");
+    assertInitCap("ΘΑᵩΣ�ΟᵩΣᵩ�", UTF8_BINARY, "Θαᵩς�οᵩςᵩ�", "Θαᵩς�οᵩςᵩ�");
+    assertInitCap("ΘΑ�ᵩΣ�ΟᵩΣᵩ�", UTF8_BINARY, "Θα�ᵩσ�οᵩςᵩ�", "Θα�ᵩσ�οᵩςᵩ�");
+    assertInitCap("ΘΑ�ᵩΣᵩ�ΟᵩΣᵩ�", UTF8_BINARY, "Θα�ᵩσᵩ�οᵩςᵩ�", "Θα�ᵩσᵩ�οᵩςᵩ�");
+    assertInitCap("ΘΑ�Σ�Ο�Σ�", UTF8_BINARY, "Θα�σ�ο�σ�", "Θα�σ�ο�σ�");
     // Disallowed bytes and invalid sequences.
     assertInitCap(UTF8String.fromBytes(new byte[] { (byte)0xC0, (byte)0xC1, (byte)0xF5}).toString(),
-      "UTF8_BINARY", "���", "���");
+      UTF8_BINARY, "���", "���");
     assertInitCap(UTF8String.fromBytes(
       new byte[]{(byte)0xC0, (byte)0xC1, (byte)0xF5, 0x20, 0x61, 0x41, (byte)0xC0}).toString(),
-      "UTF8_BINARY",
+      UTF8_BINARY,
       "��� Aa�", "��� Aa�");
     assertInitCap(UTF8String.fromBytes(new byte[]{(byte)0xC2,(byte)0xC2}).toString(),
-      "UTF8_BINARY", "��", "��");
+      UTF8_BINARY, "��", "��");
     assertInitCap(UTF8String.fromBytes(
       new byte[]{0x61, 0x41, (byte)0xC2, (byte)0xC2, 0x41}).toString(),
-      "UTF8_BINARY",
+      UTF8_BINARY,
       "Aa��a", "Aa��a");
   }
 
@@ -1559,147 +1560,147 @@ private void assertStringInstr(String string, String substring,
   @Test
   public void testStringInstr() throws SparkException {
     // Empty strings.
-    assertStringInstr("", "", "UTF8_BINARY", 1);
-    assertStringInstr("", "", "UTF8_LCASE", 1);
-    assertStringInstr("", "", "UNICODE_CI", 1);
-    assertStringInstr("", "", "UNICODE", 1);
-    assertStringInstr("a", "", "UTF8_BINARY", 1);
-    assertStringInstr("a", "", "UTF8_LCASE", 1);
-    assertStringInstr("a", "", "UNICODE", 1);
-    assertStringInstr("a", "", "UNICODE_CI", 1);
-    assertStringInstr("", "x", "UTF8_BINARY", 0);
-    assertStringInstr("", "x", "UTF8_LCASE", 0);
-    assertStringInstr("", "x", "UNICODE", 0);
-    assertStringInstr("", "x", "UNICODE_CI", 0);
+    assertStringInstr("", "", UTF8_BINARY, 1);
+    assertStringInstr("", "", UTF8_LCASE, 1);
+    assertStringInstr("", "", UNICODE_CI, 1);
+    assertStringInstr("", "", UNICODE, 1);
+    assertStringInstr("a", "", UTF8_BINARY, 1);
+    assertStringInstr("a", "", UTF8_LCASE, 1);
+    assertStringInstr("a", "", UNICODE, 1);
+    assertStringInstr("a", "", UNICODE_CI, 1);
+    assertStringInstr("", "x", UTF8_BINARY, 0);
+    assertStringInstr("", "x", UTF8_LCASE, 0);
+    assertStringInstr("", "x", UNICODE, 0);
+    assertStringInstr("", "x", UNICODE_CI, 0);
     // Basic tests.
-    assertStringInstr("aaads", "aa", "UTF8_BINARY", 1);
-    assertStringInstr("aaads", "aa", "UTF8_LCASE", 1);
-    assertStringInstr("aaads", "aa", "UNICODE", 1);
-    assertStringInstr("aaads", "aa", "UNICODE_CI", 1);
-    assertStringInstr("aaads", "ds", "UTF8_BINARY", 4);
-    assertStringInstr("aaads", "ds", "UTF8_LCASE", 4);
-    assertStringInstr("aaads", "ds", "UNICODE", 4);
-    assertStringInstr("aaads", "ds", "UNICODE_CI", 4);
-    assertStringInstr("aaads", "Aa", "UTF8_BINARY", 0);
-    assertStringInstr("aaads", "Aa", "UTF8_LCASE", 1);
-    assertStringInstr("aaads", "Aa", "UNICODE", 0);
-    assertStringInstr("aaads", "Aa", "UNICODE_CI", 1);
-    assertStringInstr("aaaDs", "de", "UTF8_BINARY", 0);
-    assertStringInstr("aaaDs", "de", "UTF8_LCASE", 0);
-    assertStringInstr("aaaDs", "de", "UNICODE", 0);
-    assertStringInstr("aaaDs", "de", "UNICODE_CI", 0);
-    assertStringInstr("aaaDs", "ds", "UTF8_BINARY", 0);
-    assertStringInstr("aaaDs", "ds", "UTF8_LCASE", 4);
-    assertStringInstr("aaaDs", "ds", "UNICODE", 0);
-    assertStringInstr("aaaDs", "ds", "UNICODE_CI", 4);
-    assertStringInstr("aaadS", "Ds", "UTF8_BINARY", 0);
-    assertStringInstr("aaadS", "Ds", "UTF8_LCASE", 4);
-    assertStringInstr("aaadS", "Ds", "UNICODE", 0);
-    assertStringInstr("aaadS", "Ds", "UNICODE_CI", 4);
+    assertStringInstr("aaads", "aa", UTF8_BINARY, 1);
+    assertStringInstr("aaads", "aa", UTF8_LCASE, 1);
+    assertStringInstr("aaads", "aa", UNICODE, 1);
+    assertStringInstr("aaads", "aa", UNICODE_CI, 1);
+    assertStringInstr("aaads", "ds", UTF8_BINARY, 4);
+    assertStringInstr("aaads", "ds", UTF8_LCASE, 4);
+    assertStringInstr("aaads", "ds", UNICODE, 4);
+    assertStringInstr("aaads", "ds", UNICODE_CI, 4);
+    assertStringInstr("aaads", "Aa", UTF8_BINARY, 0);
+    assertStringInstr("aaads", "Aa", UTF8_LCASE, 1);
+    assertStringInstr("aaads", "Aa", UNICODE, 0);
+    assertStringInstr("aaads", "Aa", UNICODE_CI, 1);
+    assertStringInstr("aaaDs", "de", UTF8_BINARY, 0);
+    assertStringInstr("aaaDs", "de", UTF8_LCASE, 0);
+    assertStringInstr("aaaDs", "de", UNICODE, 0);
+    assertStringInstr("aaaDs", "de", UNICODE_CI, 0);
+    assertStringInstr("aaaDs", "ds", UTF8_BINARY, 0);
+    assertStringInstr("aaaDs", "ds", UTF8_LCASE, 4);
+    assertStringInstr("aaaDs", "ds", UNICODE, 0);
+    assertStringInstr("aaaDs", "ds", UNICODE_CI, 4);
+    assertStringInstr("aaadS", "Ds", UTF8_BINARY, 0);
+    assertStringInstr("aaadS", "Ds", UTF8_LCASE, 4);
+    assertStringInstr("aaadS", "Ds", UNICODE, 0);
+    assertStringInstr("aaadS", "Ds", UNICODE_CI, 4);
     assertStringInstr("aaaČŠčšcs", "cs", "SR", 8);
     assertStringInstr("aaaČŠčšcs", "cs", "SR_CI_AI", 4);
     // Advanced tests.
-    assertStringInstr("test大千世界X大千世界", "大千", "UTF8_BINARY", 5);
-    assertStringInstr("test大千世界X大千世界", "大千", "UTF8_LCASE", 5);
-    assertStringInstr("test大千世界X大千世界", "大千", "UNICODE", 5);
-    assertStringInstr("test大千世界X大千世界", "大千", "UNICODE_CI", 5);
-    assertStringInstr("test大千世界X大千世界", "界X", "UTF8_BINARY", 8);
-    assertStringInstr("test大千世界X大千世界", "界X", "UTF8_LCASE", 8);
-    assertStringInstr("test大千世界X大千世界", "界X", "UNICODE", 8);
-    assertStringInstr("test大千世界X大千世界", "界X", "UNICODE_CI", 8);
-    assertStringInstr("test大千世界X大千世界", "界x", "UTF8_BINARY", 0);
-    assertStringInstr("test大千世界X大千世界", "界x", "UTF8_LCASE", 8);
-    assertStringInstr("test大千世界X大千世界", "界x", "UNICODE", 0);
-    assertStringInstr("test大千世界X大千世界", "界x", "UNICODE_CI", 8);
-    assertStringInstr("test大千世界X大千世界", "界y", "UTF8_BINARY", 0);
-    assertStringInstr("test大千世界X大千世界", "界y", "UTF8_LCASE", 0);
-    assertStringInstr("test大千世界X大千世界", "界y", "UNICODE", 0);
-    assertStringInstr("test大千世界X大千世界", "界y", "UNICODE_CI", 0);
+    assertStringInstr("test大千世界X大千世界", "大千", UTF8_BINARY, 5);
+    assertStringInstr("test大千世界X大千世界", "大千", UTF8_LCASE, 5);
+    assertStringInstr("test大千世界X大千世界", "大千", UNICODE, 5);
+    assertStringInstr("test大千世界X大千世界", "大千", UNICODE_CI, 5);
+    assertStringInstr("test大千世界X大千世界", "界X", UTF8_BINARY, 8);
+    assertStringInstr("test大千世界X大千世界", "界X", UTF8_LCASE, 8);
+    assertStringInstr("test大千世界X大千世界", "界X", UNICODE, 8);
+    assertStringInstr("test大千世界X大千世界", "界X", UNICODE_CI, 8);
+    assertStringInstr("test大千世界X大千世界", "界x", UTF8_BINARY, 0);
+    assertStringInstr("test大千世界X大千世界", "界x", UTF8_LCASE, 8);
+    assertStringInstr("test大千世界X大千世界", "界x", UNICODE, 0);
+    assertStringInstr("test大千世界X大千世界", "界x", UNICODE_CI, 8);
+    assertStringInstr("test大千世界X大千世界", "界y", UTF8_BINARY, 0);
+    assertStringInstr("test大千世界X大千世界", "界y", UTF8_LCASE, 0);
+    assertStringInstr("test大千世界X大千世界", "界y", UNICODE, 0);
+    assertStringInstr("test大千世界X大千世界", "界y", UNICODE_CI, 0);
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertStringInstr("i\u0307", "i", "UNICODE_CI", 0);
-    assertStringInstr("i\u0307", "\u0307", "UNICODE_CI", 0);
-    assertStringInstr("i\u0307", "İ", "UNICODE_CI", 1);
-    assertStringInstr("İ", "i", "UNICODE_CI", 0);
-    assertStringInstr("İoi̇o12", "i\u0307o", "UNICODE_CI", 1);
-    assertStringInstr("i̇oİo12", "İo", "UNICODE_CI", 1);
-    assertStringInstr("abİoi̇o", "i\u0307o", "UNICODE_CI", 3);
-    assertStringInstr("abi̇oİo", "İo", "UNICODE_CI", 3);
-    assertStringInstr("ai̇oxXİo", "Xx", "UNICODE_CI", 5);
-    assertStringInstr("aİoi̇oxx", "XX", "UNICODE_CI", 7);
-    assertStringInstr("i\u0307", "i", "UTF8_LCASE", 1); // != UNICODE_CI
-    assertStringInstr("i\u0307", "\u0307", "UTF8_LCASE", 2); // != UNICODE_CI
-    assertStringInstr("i\u0307", "İ", "UTF8_LCASE", 1);
-    assertStringInstr("İ", "i", "UTF8_LCASE", 0);
-    assertStringInstr("İoi̇o12", "i\u0307o", "UTF8_LCASE", 1);
-    assertStringInstr("i̇oİo12", "İo", "UTF8_LCASE", 1);
-    assertStringInstr("abİoi̇o", "i\u0307o", "UTF8_LCASE", 3);
-    assertStringInstr("abi̇oİo", "İo", "UTF8_LCASE", 3);
-    assertStringInstr("abI\u0307oi̇o", "İo", "UTF8_LCASE", 3);
-    assertStringInstr("ai̇oxXİo", "Xx", "UTF8_LCASE", 5);
-    assertStringInstr("abİoi̇o", "\u0307o", "UTF8_LCASE", 6);
-    assertStringInstr("aİoi̇oxx", "XX", "UTF8_LCASE", 7);
+    assertStringInstr("i\u0307", "i", UNICODE_CI, 0);
+    assertStringInstr("i\u0307", "\u0307", UNICODE_CI, 0);
+    assertStringInstr("i\u0307", "İ", UNICODE_CI, 1);
+    assertStringInstr("İ", "i", UNICODE_CI, 0);
+    assertStringInstr("İoi̇o12", "i\u0307o", UNICODE_CI, 1);
+    assertStringInstr("i̇oİo12", "İo", UNICODE_CI, 1);
+    assertStringInstr("abİoi̇o", "i\u0307o", UNICODE_CI, 3);
+    assertStringInstr("abi̇oİo", "İo", UNICODE_CI, 3);
+    assertStringInstr("ai̇oxXİo", "Xx", UNICODE_CI, 5);
+    assertStringInstr("aİoi̇oxx", "XX", UNICODE_CI, 7);
+    assertStringInstr("i\u0307", "i", UTF8_LCASE, 1); // != UNICODE_CI
+    assertStringInstr("i\u0307", "\u0307", UTF8_LCASE, 2); // != UNICODE_CI
+    assertStringInstr("i\u0307", "İ", UTF8_LCASE, 1);
+    assertStringInstr("İ", "i", UTF8_LCASE, 0);
+    assertStringInstr("İoi̇o12", "i\u0307o", UTF8_LCASE, 1);
+    assertStringInstr("i̇oİo12", "İo", UTF8_LCASE, 1);
+    assertStringInstr("abİoi̇o", "i\u0307o", UTF8_LCASE, 3);
+    assertStringInstr("abi̇oİo", "İo", UTF8_LCASE, 3);
+    assertStringInstr("abI\u0307oi̇o", "İo", UTF8_LCASE, 3);
+    assertStringInstr("ai̇oxXİo", "Xx", UTF8_LCASE, 5);
+    assertStringInstr("abİoi̇o", "\u0307o", UTF8_LCASE, 6);
+    assertStringInstr("aİoi̇oxx", "XX", UTF8_LCASE, 7);
     // Conditional case mapping (e.g. Greek sigmas).
-    assertStringInstr("σ", "σ", "UTF8_BINARY", 1);
-    assertStringInstr("σ", "ς", "UTF8_BINARY", 0);
-    assertStringInstr("σ", "Σ", "UTF8_BINARY", 0);
-    assertStringInstr("ς", "σ", "UTF8_BINARY", 0);
-    assertStringInstr("ς", "ς", "UTF8_BINARY", 1);
-    assertStringInstr("ς", "Σ", "UTF8_BINARY", 0);
-    assertStringInstr("Σ", "σ", "UTF8_BINARY", 0);
-    assertStringInstr("Σ", "ς", "UTF8_BINARY", 0);
-    assertStringInstr("Σ", "Σ", "UTF8_BINARY", 1);
-    assertStringInstr("σ", "σ", "UTF8_LCASE", 1);
-    assertStringInstr("σ", "ς", "UTF8_LCASE", 1);
-    assertStringInstr("σ", "Σ", "UTF8_LCASE", 1);
-    assertStringInstr("ς", "σ", "UTF8_LCASE", 1);
-    assertStringInstr("ς", "ς", "UTF8_LCASE", 1);
-    assertStringInstr("ς", "Σ", "UTF8_LCASE", 1);
-    assertStringInstr("Σ", "σ", "UTF8_LCASE", 1);
-    assertStringInstr("Σ", "ς", "UTF8_LCASE", 1);
-    assertStringInstr("Σ", "Σ", "UTF8_LCASE", 1);
-    assertStringInstr("σ", "σ", "UNICODE", 1);
-    assertStringInstr("σ", "ς", "UNICODE", 0);
-    assertStringInstr("σ", "Σ", "UNICODE", 0);
-    assertStringInstr("ς", "σ", "UNICODE", 0);
-    assertStringInstr("ς", "ς", "UNICODE", 1);
-    assertStringInstr("ς", "Σ", "UNICODE", 0);
-    assertStringInstr("Σ", "σ", "UNICODE", 0);
-    assertStringInstr("Σ", "ς", "UNICODE", 0);
-    assertStringInstr("Σ", "Σ", "UNICODE", 1);
-    assertStringInstr("σ", "σ", "UNICODE_CI", 1);
-    assertStringInstr("σ", "ς", "UNICODE_CI", 1);
-    assertStringInstr("σ", "Σ", "UNICODE_CI", 1);
-    assertStringInstr("ς", "σ", "UNICODE_CI", 1);
-    assertStringInstr("ς", "ς", "UNICODE_CI", 1);
-    assertStringInstr("ς", "Σ", "UNICODE_CI", 1);
-    assertStringInstr("Σ", "σ", "UNICODE_CI", 1);
-    assertStringInstr("Σ", "ς", "UNICODE_CI", 1);
-    assertStringInstr("Σ", "Σ", "UNICODE_CI", 1);
+    assertStringInstr("σ", "σ", UTF8_BINARY, 1);
+    assertStringInstr("σ", "ς", UTF8_BINARY, 0);
+    assertStringInstr("σ", "Σ", UTF8_BINARY, 0);
+    assertStringInstr("ς", "σ", UTF8_BINARY, 0);
+    assertStringInstr("ς", "ς", UTF8_BINARY, 1);
+    assertStringInstr("ς", "Σ", UTF8_BINARY, 0);
+    assertStringInstr("Σ", "σ", UTF8_BINARY, 0);
+    assertStringInstr("Σ", "ς", UTF8_BINARY, 0);
+    assertStringInstr("Σ", "Σ", UTF8_BINARY, 1);
+    assertStringInstr("σ", "σ", UTF8_LCASE, 1);
+    assertStringInstr("σ", "ς", UTF8_LCASE, 1);
+    assertStringInstr("σ", "Σ", UTF8_LCASE, 1);
+    assertStringInstr("ς", "σ", UTF8_LCASE, 1);
+    assertStringInstr("ς", "ς", UTF8_LCASE, 1);
+    assertStringInstr("ς", "Σ", UTF8_LCASE, 1);
+    assertStringInstr("Σ", "σ", UTF8_LCASE, 1);
+    assertStringInstr("Σ", "ς", UTF8_LCASE, 1);
+    assertStringInstr("Σ", "Σ", UTF8_LCASE, 1);
+    assertStringInstr("σ", "σ", UNICODE, 1);
+    assertStringInstr("σ", "ς", UNICODE, 0);
+    assertStringInstr("σ", "Σ", UNICODE, 0);
+    assertStringInstr("ς", "σ", UNICODE, 0);
+    assertStringInstr("ς", "ς", UNICODE, 1);
+    assertStringInstr("ς", "Σ", UNICODE, 0);
+    assertStringInstr("Σ", "σ", UNICODE, 0);
+    assertStringInstr("Σ", "ς", UNICODE, 0);
+    assertStringInstr("Σ", "Σ", UNICODE, 1);
+    assertStringInstr("σ", "σ", UNICODE_CI, 1);
+    assertStringInstr("σ", "ς", UNICODE_CI, 1);
+    assertStringInstr("σ", "Σ", UNICODE_CI, 1);
+    assertStringInstr("ς", "σ", UNICODE_CI, 1);
+    assertStringInstr("ς", "ς", UNICODE_CI, 1);
+    assertStringInstr("ς", "Σ", UNICODE_CI, 1);
+    assertStringInstr("Σ", "σ", UNICODE_CI, 1);
+    assertStringInstr("Σ", "ς", UNICODE_CI, 1);
+    assertStringInstr("Σ", "Σ", UNICODE_CI, 1);
     // Surrogate pairs.
-    assertStringInstr("a🙃b", "a", "UTF8_BINARY", 1);
-    assertStringInstr("a🙃b", "a", "UTF8_LCASE", 1);
-    assertStringInstr("a🙃b", "a", "UNICODE", 1);
-    assertStringInstr("a🙃b", "a", "UNICODE_CI", 1);
-    assertStringInstr("a🙃b", "🙃", "UTF8_BINARY", 2);
-    assertStringInstr("a🙃b", "🙃", "UTF8_LCASE", 2);
-    assertStringInstr("a🙃b", "🙃", "UNICODE", 2);
-    assertStringInstr("a🙃b", "🙃", "UNICODE_CI", 2);
-    assertStringInstr("a🙃b", "b", "UTF8_BINARY", 3);
-    assertStringInstr("a🙃b", "b", "UTF8_LCASE", 3);
-    assertStringInstr("a🙃b", "b", "UNICODE", 3);
-    assertStringInstr("a🙃b", "b", "UNICODE_CI", 3);
-    assertStringInstr("a🙃🙃b", "🙃", "UTF8_BINARY", 2);
-    assertStringInstr("a🙃🙃b", "🙃", "UTF8_LCASE", 2);
-    assertStringInstr("a🙃🙃b", "🙃", "UNICODE", 2);
-    assertStringInstr("a🙃🙃b", "🙃", "UNICODE_CI", 2);
-    assertStringInstr("a🙃🙃b", "b", "UTF8_BINARY", 4);
-    assertStringInstr("a🙃🙃b", "b", "UTF8_LCASE", 4);
-    assertStringInstr("a🙃🙃b", "b", "UNICODE", 4);
-    assertStringInstr("a🙃🙃b", "b", "UNICODE_CI", 4);
-    assertStringInstr("a🙃x🙃b", "b", "UTF8_BINARY", 5);
-    assertStringInstr("a🙃x🙃b", "b", "UTF8_LCASE", 5);
-    assertStringInstr("a🙃x🙃b", "b", "UNICODE", 5);
-    assertStringInstr("a🙃x🙃b", "b", "UNICODE_CI", 5);
+    assertStringInstr("a🙃b", "a", UTF8_BINARY, 1);
+    assertStringInstr("a🙃b", "a", UTF8_LCASE, 1);
+    assertStringInstr("a🙃b", "a", UNICODE, 1);
+    assertStringInstr("a🙃b", "a", UNICODE_CI, 1);
+    assertStringInstr("a🙃b", "🙃", UTF8_BINARY, 2);
+    assertStringInstr("a🙃b", "🙃", UTF8_LCASE, 2);
+    assertStringInstr("a🙃b", "🙃", UNICODE, 2);
+    assertStringInstr("a🙃b", "🙃", UNICODE_CI, 2);
+    assertStringInstr("a🙃b", "b", UTF8_BINARY, 3);
+    assertStringInstr("a🙃b", "b", UTF8_LCASE, 3);
+    assertStringInstr("a🙃b", "b", UNICODE, 3);
+    assertStringInstr("a🙃b", "b", UNICODE_CI, 3);
+    assertStringInstr("a🙃🙃b", "🙃", UTF8_BINARY, 2);
+    assertStringInstr("a🙃🙃b", "🙃", UTF8_LCASE, 2);
+    assertStringInstr("a🙃🙃b", "🙃", UNICODE, 2);
+    assertStringInstr("a🙃🙃b", "🙃", UNICODE_CI, 2);
+    assertStringInstr("a🙃🙃b", "b", UTF8_BINARY, 4);
+    assertStringInstr("a🙃🙃b", "b", UTF8_LCASE, 4);
+    assertStringInstr("a🙃🙃b", "b", UNICODE, 4);
+    assertStringInstr("a🙃🙃b", "b", UNICODE_CI, 4);
+    assertStringInstr("a🙃x🙃b", "b", UTF8_BINARY, 5);
+    assertStringInstr("a🙃x🙃b", "b", UTF8_LCASE, 5);
+    assertStringInstr("a🙃x🙃b", "b", UNICODE, 5);
+    assertStringInstr("a🙃x🙃b", "b", UNICODE_CI, 5);
   }
 
   /**
@@ -1717,256 +1718,256 @@ private void assertFindInSet(String word, UTF8String set, String collationName,
   @Test
   public void testFindInSet() throws SparkException {
     // Empty strings.
-    assertFindInSet("", UTF8String.fromString(""), "UTF8_BINARY", 1);
-    assertFindInSet("", UTF8String.fromString(""), "UTF8_LCASE", 1);
-    assertFindInSet("", UTF8String.fromString(""), "UNICODE", 1);
-    assertFindInSet("", UTF8String.fromString(""), "UNICODE_CI", 1);
-    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
-    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
-    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
-    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
-    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UTF8_BINARY", 1);
-    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UTF8_LCASE", 1);
-    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UNICODE", 1);
-    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UNICODE_CI", 1);
-    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UTF8_BINARY", 6);
-    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UTF8_LCASE", 6);
-    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UNICODE", 6);
-    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UNICODE_CI", 6);
-    assertFindInSet("", UTF8String.fromString("abc"), "UTF8_BINARY", 0);
-    assertFindInSet("", UTF8String.fromString("abc"), "UTF8_LCASE", 0);
-    assertFindInSet("", UTF8String.fromString("abc"), "UNICODE", 0);
-    assertFindInSet("", UTF8String.fromString("abc"), "UNICODE_CI", 0);
+    assertFindInSet("", UTF8String.fromString(""), UTF8_BINARY, 1);
+    assertFindInSet("", UTF8String.fromString(""), UTF8_LCASE, 1);
+    assertFindInSet("", UTF8String.fromString(""), UNICODE, 1);
+    assertFindInSet("", UTF8String.fromString(""), UNICODE_CI, 1);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 0);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 0);
+    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), UTF8_BINARY, 1);
+    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), UTF8_LCASE, 1);
+    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), UNICODE, 1);
+    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), UNICODE_CI, 1);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), UTF8_BINARY, 6);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), UTF8_LCASE, 6);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), UNICODE, 6);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), UNICODE_CI, 6);
+    assertFindInSet("", UTF8String.fromString("abc"), UTF8_BINARY, 0);
+    assertFindInSet("", UTF8String.fromString("abc"), UTF8_LCASE, 0);
+    assertFindInSet("", UTF8String.fromString("abc"), UNICODE, 0);
+    assertFindInSet("", UTF8String.fromString("abc"), UNICODE_CI, 0);
     // Basic tests.
-    assertFindInSet("xx", UTF8String.fromString("xx"), "UTF8_BINARY", 1);
-    assertFindInSet("xx", UTF8String.fromString("xx"), "UTF8_LCASE", 1);
-    assertFindInSet("xx", UTF8String.fromString("xx"), "UNICODE", 1);
-    assertFindInSet("xx", UTF8String.fromString("xx"), "UNICODE_CI", 1);
-    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
-    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
-    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
-    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
-    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 1);
-    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 1);
-    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 1);
-    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 1);
-    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
-    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
-    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
-    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
-    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 5);
-    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 5);
-    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 5);
-    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 5);
-    assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
-    assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
-    assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
-    assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
-    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
-    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 3);
-    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
-    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 3);
-    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
-    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
-    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
-    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
-    assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
-    assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 4);
-    assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
-    assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 4);
+    assertFindInSet("xx", UTF8String.fromString("xx"), UTF8_BINARY, 1);
+    assertFindInSet("xx", UTF8String.fromString("xx"), UTF8_LCASE, 1);
+    assertFindInSet("xx", UTF8String.fromString("xx"), UNICODE, 1);
+    assertFindInSet("xx", UTF8String.fromString("xx"), UNICODE_CI, 1);
+    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0);
+    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 0);
+    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0);
+    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 0);
+    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 1);
+    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 1);
+    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 1);
+    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 1);
+    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0);
+    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 0);
+    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0);
+    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 0);
+    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 5);
+    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 5);
+    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 5);
+    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 5);
+    assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0);
+    assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 0);
+    assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0);
+    assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 0);
+    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0);
+    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 3);
+    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0);
+    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 3);
+    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0);
+    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 0);
+    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0);
+    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 0);
+    assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), UTF8_BINARY, 0);
+    assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), UTF8_LCASE, 4);
+    assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), UNICODE, 0);
+    assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), UNICODE_CI, 4);
     // Advanced tests.
-    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_BINARY", 5);
-    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_LCASE", 5);
-    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE", 5);
-    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE_CI", 5);
-    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_BINARY", 0);
-    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_LCASE", 4);
-    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE", 0);
-    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE_CI", 4);
-    assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UTF8_BINARY", 0);
-    assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UTF8_LCASE", 5);
-    assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UNICODE", 0);
-    assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UNICODE_CI", 5);
+    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UTF8_BINARY, 5);
+    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UTF8_LCASE, 5);
+    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UNICODE, 5);
+    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UNICODE_CI, 5);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UTF8_BINARY, 0);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UTF8_LCASE, 4);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UNICODE, 0);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), UNICODE_CI, 4);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), UTF8_BINARY, 0);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), UTF8_LCASE, 5);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), UNICODE, 0);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), UNICODE_CI, 5);
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertFindInSet("i\u0307", UTF8String.fromString("İ"), "UTF8_BINARY", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("İ"), "UTF8_LCASE", 1);
-    assertFindInSet("i\u0307", UTF8String.fromString("İ"), "UNICODE", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("İ"), "UNICODE_CI", 1);
-    assertFindInSet("i", UTF8String.fromString("İ"), "UTF8_BINARY", 0);
-    assertFindInSet("i", UTF8String.fromString("İ"), "UTF8_LCASE", 0);
-    assertFindInSet("i", UTF8String.fromString("İ"), "UNICODE", 0);
-    assertFindInSet("i", UTF8String.fromString("İ"), "UNICODE_CI", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), "UTF8_BINARY", 1);
-    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), "UTF8_LCASE", 1);
-    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), "UNICODE", 1);
-    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), "UNICODE_CI", 1);
-    assertFindInSet("i", UTF8String.fromString("i\u0307"), "UTF8_BINARY", 0);
-    assertFindInSet("i", UTF8String.fromString("i\u0307"), "UTF8_LCASE", 0);
-    assertFindInSet("i", UTF8String.fromString("i\u0307"), "UNICODE", 0);
-    assertFindInSet("i", UTF8String.fromString("i\u0307"), "UNICODE_CI", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("İ,"), "UTF8_BINARY", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("İ,"), "UTF8_LCASE", 1);
-    assertFindInSet("i\u0307", UTF8String.fromString("İ,"), "UNICODE", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("İ,"), "UNICODE_CI", 1);
-    assertFindInSet("i", UTF8String.fromString("İ,"), "UTF8_BINARY", 0);
-    assertFindInSet("i", UTF8String.fromString("İ,"), "UTF8_LCASE", 0);
-    assertFindInSet("i", UTF8String.fromString("İ,"), "UNICODE", 0);
-    assertFindInSet("i", UTF8String.fromString("İ,"), "UNICODE_CI", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), "UTF8_BINARY", 1);
-    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), "UTF8_LCASE", 1);
-    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), "UNICODE", 1);
-    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), "UNICODE_CI", 1);
-    assertFindInSet("i", UTF8String.fromString("i\u0307,"), "UTF8_BINARY", 0);
-    assertFindInSet("i", UTF8String.fromString("i\u0307,"), "UTF8_LCASE", 0);
-    assertFindInSet("i", UTF8String.fromString("i\u0307,"), "UNICODE", 0);
-    assertFindInSet("i", UTF8String.fromString("i\u0307,"), "UNICODE_CI", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), "UTF8_BINARY", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), "UTF8_LCASE", 2);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), "UNICODE", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), "UNICODE_CI", 2);
-    assertFindInSet("i", UTF8String.fromString("ab,İ"), "UTF8_BINARY", 0);
-    assertFindInSet("i", UTF8String.fromString("ab,İ"), "UTF8_LCASE", 0);
-    assertFindInSet("i", UTF8String.fromString("ab,İ"), "UNICODE", 0);
-    assertFindInSet("i", UTF8String.fromString("ab,İ"), "UNICODE_CI", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), "UTF8_BINARY", 2);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), "UTF8_LCASE", 2);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), "UNICODE", 2);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), "UNICODE_CI", 2);
-    assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), "UTF8_BINARY", 0);
-    assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), "UTF8_LCASE", 0);
-    assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), "UNICODE", 0);
-    assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), "UNICODE_CI", 0);
-    assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), "UTF8_BINARY", 0);
-    assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), "UTF8_LCASE", 2);
-    assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), "UNICODE", 0);
-    assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), "UNICODE_CI", 2);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), "UTF8_BINARY", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), "UTF8_LCASE", 2);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), "UNICODE", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), "UNICODE_CI", 2);
-    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UTF8_BINARY", 0);
-    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UTF8_LCASE", 0);
-    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UNICODE", 0);
-    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UNICODE_CI", 0);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), "UTF8_BINARY", 2);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), "UTF8_LCASE", 2);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), "UNICODE", 2);
-    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), "UNICODE_CI", 2);
-    assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), "UTF8_BINARY", 0);
-    assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), "UTF8_LCASE", 0);
-    assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), "UNICODE", 0);
-    assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), "UNICODE_CI", 0);
-    assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), "UTF8_BINARY", 0);
-    assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), "UTF8_LCASE", 2);
-    assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), "UNICODE", 0);
-    assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), "UNICODE_CI", 2);
-    assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), "UTF8_BINARY", 0);
-    assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), "UTF8_LCASE", 2);
-    assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), "UNICODE", 0);
-    assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), "UNICODE_CI", 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ"), UTF8_BINARY, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ"), UTF8_LCASE, 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ"), UNICODE, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ"), UNICODE_CI, 1);
+    assertFindInSet("i", UTF8String.fromString("İ"), UTF8_BINARY, 0);
+    assertFindInSet("i", UTF8String.fromString("İ"), UTF8_LCASE, 0);
+    assertFindInSet("i", UTF8String.fromString("İ"), UNICODE, 0);
+    assertFindInSet("i", UTF8String.fromString("İ"), UNICODE_CI, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), UTF8_BINARY, 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), UTF8_LCASE, 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), UNICODE, 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), UNICODE_CI, 1);
+    assertFindInSet("i", UTF8String.fromString("i\u0307"), UTF8_BINARY, 0);
+    assertFindInSet("i", UTF8String.fromString("i\u0307"), UTF8_LCASE, 0);
+    assertFindInSet("i", UTF8String.fromString("i\u0307"), UNICODE, 0);
+    assertFindInSet("i", UTF8String.fromString("i\u0307"), UNICODE_CI, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ,"), UTF8_BINARY, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ,"), UTF8_LCASE, 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ,"), UNICODE, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ,"), UNICODE_CI, 1);
+    assertFindInSet("i", UTF8String.fromString("İ,"), UTF8_BINARY, 0);
+    assertFindInSet("i", UTF8String.fromString("İ,"), UTF8_LCASE, 0);
+    assertFindInSet("i", UTF8String.fromString("İ,"), UNICODE, 0);
+    assertFindInSet("i", UTF8String.fromString("İ,"), UNICODE_CI, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), UTF8_BINARY, 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), UTF8_LCASE, 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), UNICODE, 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), UNICODE_CI, 1);
+    assertFindInSet("i", UTF8String.fromString("i\u0307,"), UTF8_BINARY, 0);
+    assertFindInSet("i", UTF8String.fromString("i\u0307,"), UTF8_LCASE, 0);
+    assertFindInSet("i", UTF8String.fromString("i\u0307,"), UNICODE, 0);
+    assertFindInSet("i", UTF8String.fromString("i\u0307,"), UNICODE_CI, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), UTF8_BINARY, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), UTF8_LCASE, 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), UNICODE, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), UNICODE_CI, 2);
+    assertFindInSet("i", UTF8String.fromString("ab,İ"), UTF8_BINARY, 0);
+    assertFindInSet("i", UTF8String.fromString("ab,İ"), UTF8_LCASE, 0);
+    assertFindInSet("i", UTF8String.fromString("ab,İ"), UNICODE, 0);
+    assertFindInSet("i", UTF8String.fromString("ab,İ"), UNICODE_CI, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), UTF8_BINARY, 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), UTF8_LCASE, 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), UNICODE, 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), UNICODE_CI, 2);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), UTF8_BINARY, 0);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), UTF8_LCASE, 0);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), UNICODE, 0);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), UNICODE_CI, 0);
+    assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), UTF8_BINARY, 0);
+    assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), UTF8_LCASE, 2);
+    assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), UNICODE, 0);
+    assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), UNICODE_CI, 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), UTF8_BINARY, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), UTF8_LCASE, 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), UNICODE, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), UNICODE_CI, 2);
+    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), UTF8_BINARY, 0);
+    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), UTF8_LCASE, 0);
+    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), UNICODE, 0);
+    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), UNICODE_CI, 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), UTF8_BINARY, 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), UTF8_LCASE, 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), UNICODE, 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), UNICODE_CI, 2);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), UTF8_BINARY, 0);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), UTF8_LCASE, 0);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), UNICODE, 0);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), UNICODE_CI, 0);
+    assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), UTF8_BINARY, 0);
+    assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), UTF8_LCASE, 2);
+    assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), UNICODE, 0);
+    assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), UNICODE_CI, 2);
+    assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), UTF8_BINARY, 0);
+    assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), UTF8_LCASE, 2);
+    assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), UNICODE, 0);
+    assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), UNICODE_CI, 2);
     // Conditional case mapping (e.g. Greek sigmas).
-    assertFindInSet("σ", UTF8String.fromString("σ"), "UTF8_BINARY", 1);
-    assertFindInSet("σ", UTF8String.fromString("ς"), "UTF8_BINARY", 0);
-    assertFindInSet("σ", UTF8String.fromString("Σ"), "UTF8_BINARY", 0);
-    assertFindInSet("ς", UTF8String.fromString("σ"), "UTF8_BINARY", 0);
-    assertFindInSet("ς", UTF8String.fromString("ς"), "UTF8_BINARY", 1);
-    assertFindInSet("ς", UTF8String.fromString("Σ"), "UTF8_BINARY", 0);
-    assertFindInSet("Σ", UTF8String.fromString("σ"), "UTF8_BINARY", 0);
-    assertFindInSet("Σ", UTF8String.fromString("ς"), "UTF8_BINARY", 0);
-    assertFindInSet("Σ", UTF8String.fromString("Σ"), "UTF8_BINARY", 1);
-    assertFindInSet("σ", UTF8String.fromString("σ"), "UTF8_LCASE", 1);
-    assertFindInSet("σ", UTF8String.fromString("ς"), "UTF8_LCASE", 1);
-    assertFindInSet("σ", UTF8String.fromString("Σ"), "UTF8_LCASE", 1);
-    assertFindInSet("ς", UTF8String.fromString("σ"), "UTF8_LCASE", 1);
-    assertFindInSet("ς", UTF8String.fromString("ς"), "UTF8_LCASE", 1);
-    assertFindInSet("ς", UTF8String.fromString("Σ"), "UTF8_LCASE", 1);
-    assertFindInSet("Σ", UTF8String.fromString("σ"), "UTF8_LCASE", 1);
-    assertFindInSet("Σ", UTF8String.fromString("ς"), "UTF8_LCASE", 1);
-    assertFindInSet("Σ", UTF8String.fromString("Σ"), "UTF8_LCASE", 1);
-    assertFindInSet("σ", UTF8String.fromString("σ"), "UNICODE", 1);
-    assertFindInSet("σ", UTF8String.fromString("ς"), "UNICODE", 0);
-    assertFindInSet("σ", UTF8String.fromString("Σ"), "UNICODE", 0);
-    assertFindInSet("ς", UTF8String.fromString("σ"), "UNICODE", 0);
-    assertFindInSet("ς", UTF8String.fromString("ς"), "UNICODE", 1);
-    assertFindInSet("ς", UTF8String.fromString("Σ"), "UNICODE", 0);
-    assertFindInSet("Σ", UTF8String.fromString("σ"), "UNICODE", 0);
-    assertFindInSet("Σ", UTF8String.fromString("ς"), "UNICODE", 0);
-    assertFindInSet("Σ", UTF8String.fromString("Σ"), "UNICODE", 1);
-    assertFindInSet("σ", UTF8String.fromString("σ"), "UNICODE_CI", 1);
-    assertFindInSet("σ", UTF8String.fromString("ς"), "UNICODE_CI", 1);
-    assertFindInSet("σ", UTF8String.fromString("Σ"), "UNICODE_CI", 1);
-    assertFindInSet("ς", UTF8String.fromString("σ"), "UNICODE_CI", 1);
-    assertFindInSet("ς", UTF8String.fromString("ς"), "UNICODE_CI", 1);
-    assertFindInSet("ς", UTF8String.fromString("Σ"), "UNICODE_CI", 1);
-    assertFindInSet("Σ", UTF8String.fromString("σ"), "UNICODE_CI", 1);
-    assertFindInSet("Σ", UTF8String.fromString("ς"), "UNICODE_CI", 1);
-    assertFindInSet("Σ", UTF8String.fromString("Σ"), "UNICODE_CI", 1);
+    assertFindInSet("σ", UTF8String.fromString("σ"), UTF8_BINARY, 1);
+    assertFindInSet("σ", UTF8String.fromString("ς"), UTF8_BINARY, 0);
+    assertFindInSet("σ", UTF8String.fromString("Σ"), UTF8_BINARY, 0);
+    assertFindInSet("ς", UTF8String.fromString("σ"), UTF8_BINARY, 0);
+    assertFindInSet("ς", UTF8String.fromString("ς"), UTF8_BINARY, 1);
+    assertFindInSet("ς", UTF8String.fromString("Σ"), UTF8_BINARY, 0);
+    assertFindInSet("Σ", UTF8String.fromString("σ"), UTF8_BINARY, 0);
+    assertFindInSet("Σ", UTF8String.fromString("ς"), UTF8_BINARY, 0);
+    assertFindInSet("Σ", UTF8String.fromString("Σ"), UTF8_BINARY, 1);
+    assertFindInSet("σ", UTF8String.fromString("σ"), UTF8_LCASE, 1);
+    assertFindInSet("σ", UTF8String.fromString("ς"), UTF8_LCASE, 1);
+    assertFindInSet("σ", UTF8String.fromString("Σ"), UTF8_LCASE, 1);
+    assertFindInSet("ς", UTF8String.fromString("σ"), UTF8_LCASE, 1);
+    assertFindInSet("ς", UTF8String.fromString("ς"), UTF8_LCASE, 1);
+    assertFindInSet("ς", UTF8String.fromString("Σ"), UTF8_LCASE, 1);
+    assertFindInSet("Σ", UTF8String.fromString("σ"), UTF8_LCASE, 1);
+    assertFindInSet("Σ", UTF8String.fromString("ς"), UTF8_LCASE, 1);
+    assertFindInSet("Σ", UTF8String.fromString("Σ"), UTF8_LCASE, 1);
+    assertFindInSet("σ", UTF8String.fromString("σ"), UNICODE, 1);
+    assertFindInSet("σ", UTF8String.fromString("ς"), UNICODE, 0);
+    assertFindInSet("σ", UTF8String.fromString("Σ"), UNICODE, 0);
+    assertFindInSet("ς", UTF8String.fromString("σ"), UNICODE, 0);
+    assertFindInSet("ς", UTF8String.fromString("ς"), UNICODE, 1);
+    assertFindInSet("ς", UTF8String.fromString("Σ"), UNICODE, 0);
+    assertFindInSet("Σ", UTF8String.fromString("σ"), UNICODE, 0);
+    assertFindInSet("Σ", UTF8String.fromString("ς"), UNICODE, 0);
+    assertFindInSet("Σ", UTF8String.fromString("Σ"), UNICODE, 1);
+    assertFindInSet("σ", UTF8String.fromString("σ"), UNICODE_CI, 1);
+    assertFindInSet("σ", UTF8String.fromString("ς"), UNICODE_CI, 1);
+    assertFindInSet("σ", UTF8String.fromString("Σ"), UNICODE_CI, 1);
+    assertFindInSet("ς", UTF8String.fromString("σ"), UNICODE_CI, 1);
+    assertFindInSet("ς", UTF8String.fromString("ς"), UNICODE_CI, 1);
+    assertFindInSet("ς", UTF8String.fromString("Σ"), UNICODE_CI, 1);
+    assertFindInSet("Σ", UTF8String.fromString("σ"), UNICODE_CI, 1);
+    assertFindInSet("Σ", UTF8String.fromString("ς"), UNICODE_CI, 1);
+    assertFindInSet("Σ", UTF8String.fromString("Σ"), UNICODE_CI, 1);
     // Surrogate pairs.
-    assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_BINARY", 0);
-    assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_LCASE", 0);
-    assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE", 0);
-    assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE_CI", 0);
-    assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_BINARY", 1);
-    assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_LCASE", 1);
-    assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE", 1);
-    assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE_CI", 1);
-    assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_BINARY", 2);
-    assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_LCASE", 2);
-    assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE", 2);
-    assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE_CI", 2);
-    assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_BINARY", 3);
-    assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_LCASE", 3);
-    assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE", 3);
-    assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE_CI", 3);
-    assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), "UTF8_BINARY", 0);
-    assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), "UTF8_LCASE", 0);
-    assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), "UNICODE", 0);
-    assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), "UNICODE_CI", 0);
-    assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), "UTF8_BINARY", 1);
-    assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), "UTF8_LCASE", 1);
-    assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), "UNICODE", 1);
-    assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), "UNICODE_CI", 1);
-    assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), "UTF8_BINARY", 2);
-    assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), "UTF8_LCASE", 2);
-    assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), "UNICODE", 2);
-    assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), "UNICODE_CI", 2);
-    assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 0);
-    assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 0);
-    assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 0);
-    assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 0);
-    assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 1);
-    assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 1);
-    assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 1);
-    assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 1);
-    assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 0);
-    assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 1);
-    assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 0);
-    assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 1);
-    assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 3);
-    assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 3);
-    assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 3);
-    assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 1);
-    assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 2);
-    assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 2);
-    assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 2);
-    assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 2);
-    assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 0);
-    assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 2);
-    assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 0);
-    assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 2);
+    assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), UTF8_BINARY, 0);
+    assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), UTF8_LCASE, 0);
+    assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), UNICODE, 0);
+    assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), UNICODE_CI, 0);
+    assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), UTF8_BINARY, 1);
+    assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), UTF8_LCASE, 1);
+    assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), UNICODE, 1);
+    assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), UNICODE_CI, 1);
+    assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), UTF8_BINARY, 2);
+    assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), UTF8_LCASE, 2);
+    assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), UNICODE, 2);
+    assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), UNICODE_CI, 2);
+    assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), UTF8_BINARY, 3);
+    assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), UTF8_LCASE, 3);
+    assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), UNICODE, 3);
+    assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), UNICODE_CI, 3);
+    assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), UTF8_BINARY, 0);
+    assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), UTF8_LCASE, 0);
+    assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), UNICODE, 0);
+    assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), UNICODE_CI, 0);
+    assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), UTF8_BINARY, 1);
+    assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), UTF8_LCASE, 1);
+    assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), UNICODE, 1);
+    assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), UNICODE_CI, 1);
+    assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), UTF8_BINARY, 2);
+    assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), UTF8_LCASE, 2);
+    assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), UNICODE, 2);
+    assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), UNICODE_CI, 2);
+    assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), UTF8_BINARY, 0);
+    assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), UTF8_LCASE, 0);
+    assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), UNICODE, 0);
+    assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), UNICODE_CI, 0);
+    assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), UTF8_BINARY, 1);
+    assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), UTF8_LCASE, 1);
+    assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), UNICODE, 1);
+    assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), UNICODE_CI, 1);
+    assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), UTF8_BINARY, 0);
+    assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), UTF8_LCASE, 1);
+    assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), UNICODE, 0);
+    assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), UNICODE_CI, 1);
+    assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), UTF8_BINARY, 3);
+    assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), UTF8_LCASE, 3);
+    assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), UNICODE, 3);
+    assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), UNICODE_CI, 1);
+    assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), UTF8_BINARY, 2);
+    assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), UTF8_LCASE, 2);
+    assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), UNICODE, 2);
+    assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), UNICODE_CI, 2);
+    assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), UTF8_BINARY, 0);
+    assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), UTF8_LCASE, 2);
+    assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), UNICODE, 0);
+    assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), UNICODE_CI, 2);
     // Invalid UTF8 strings
     assertFindInSet("C", UTF8String.fromBytes(
       new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }),
-      "UTF8_BINARY", 3);
+      UTF8_BINARY, 3);
     assertFindInSet("c", UTF8String.fromBytes(
       new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }),
-      "UTF8_LCASE", 2);
+      UTF8_LCASE, 2);
     assertFindInSet("C", UTF8String.fromBytes(
       new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }),
-      "UNICODE", 2);
+      UNICODE, 2);
     assertFindInSet("c", UTF8String.fromBytes(
       new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }),
-      "UNICODE_CI", 2);
+      UNICODE_CI, 2);
   }
 
   /**
@@ -1986,145 +1987,145 @@ private void assertStringReplace(String source, String search, String replace,
   @Test
   public void testStringReplace() throws SparkException {
     // Empty strings.
-    assertStringReplace("", "", "", "UTF8_BINARY", "");
-    assertStringReplace("", "", "", "UTF8_LCASE", "");
-    assertStringReplace("", "", "", "UNICODE", "");
-    assertStringReplace("", "", "", "UNICODE_CI", "");
-    assertStringReplace("abc", "", "", "UTF8_BINARY", "abc");
-    assertStringReplace("abc", "", "", "UTF8_LCASE", "abc");
-    assertStringReplace("abc", "", "", "UNICODE", "abc");
-    assertStringReplace("abc", "", "", "UNICODE_CI", "abc");
-    assertStringReplace("", "x", "", "UTF8_BINARY", "");
-    assertStringReplace("", "x", "", "UTF8_LCASE", "");
-    assertStringReplace("", "x", "", "UNICODE", "");
-    assertStringReplace("", "x", "", "UNICODE_CI", "");
-    assertStringReplace("", "", "x", "UTF8_BINARY", "");
-    assertStringReplace("", "", "x", "UTF8_LCASE", "");
-    assertStringReplace("", "", "x", "UNICODE", "");
-    assertStringReplace("", "", "x", "UNICODE_CI", "");
-    assertStringReplace("", "b", "x", "UTF8_BINARY", "");
-    assertStringReplace("", "b", "x", "UTF8_LCASE", "");
-    assertStringReplace("", "b", "x", "UNICODE", "");
-    assertStringReplace("", "b", "x", "UNICODE_CI", "");
-    assertStringReplace("abc", "b", "", "UTF8_BINARY", "ac");
-    assertStringReplace("abc", "b", "", "UTF8_LCASE", "ac");
-    assertStringReplace("abc", "b", "", "UNICODE", "ac");
-    assertStringReplace("abc", "b", "", "UNICODE_CI", "ac");
-    assertStringReplace("abc", "", "x", "UTF8_BINARY", "abc");
-    assertStringReplace("abc", "", "x", "UTF8_LCASE", "abc");
-    assertStringReplace("abc", "", "x", "UNICODE", "abc");
-    assertStringReplace("abc", "", "x", "UNICODE_CI", "abc");
+    assertStringReplace("", "", "", UTF8_BINARY, "");
+    assertStringReplace("", "", "", UTF8_LCASE, "");
+    assertStringReplace("", "", "", UNICODE, "");
+    assertStringReplace("", "", "", UNICODE_CI, "");
+    assertStringReplace("abc", "", "", UTF8_BINARY, "abc");
+    assertStringReplace("abc", "", "", UTF8_LCASE, "abc");
+    assertStringReplace("abc", "", "", UNICODE, "abc");
+    assertStringReplace("abc", "", "", UNICODE_CI, "abc");
+    assertStringReplace("", "x", "", UTF8_BINARY, "");
+    assertStringReplace("", "x", "", UTF8_LCASE, "");
+    assertStringReplace("", "x", "", UNICODE, "");
+    assertStringReplace("", "x", "", UNICODE_CI, "");
+    assertStringReplace("", "", "x", UTF8_BINARY, "");
+    assertStringReplace("", "", "x", UTF8_LCASE, "");
+    assertStringReplace("", "", "x", UNICODE, "");
+    assertStringReplace("", "", "x", UNICODE_CI, "");
+    assertStringReplace("", "b", "x", UTF8_BINARY, "");
+    assertStringReplace("", "b", "x", UTF8_LCASE, "");
+    assertStringReplace("", "b", "x", UNICODE, "");
+    assertStringReplace("", "b", "x", UNICODE_CI, "");
+    assertStringReplace("abc", "b", "", UTF8_BINARY, "ac");
+    assertStringReplace("abc", "b", "", UTF8_LCASE, "ac");
+    assertStringReplace("abc", "b", "", UNICODE, "ac");
+    assertStringReplace("abc", "b", "", UNICODE_CI, "ac");
+    assertStringReplace("abc", "", "x", UTF8_BINARY, "abc");
+    assertStringReplace("abc", "", "x", UTF8_LCASE, "abc");
+    assertStringReplace("abc", "", "x", UNICODE, "abc");
+    assertStringReplace("abc", "", "x", UNICODE_CI, "abc");
     // Basic tests.
-    assertStringReplace("replace", "pl", "", "UTF8_BINARY", "reace");
-    assertStringReplace("replace", "pl", "", "UTF8_LCASE", "reace");
-    assertStringReplace("replace", "pl", "", "UNICODE", "reace");
-    assertStringReplace("replace", "pl", "", "UNICODE_CI", "reace");
-    assertStringReplace("replace", "", "123", "UTF8_BINARY", "replace");
-    assertStringReplace("replace", "", "123", "UTF8_LCASE", "replace");
-    assertStringReplace("replace", "", "123", "UNICODE", "replace");
-    assertStringReplace("replace", "", "123", "UNICODE_CI", "replace");
-    assertStringReplace("abcabc", "b", "12", "UTF8_BINARY", "a12ca12c");
-    assertStringReplace("abcabc", "b", "12", "UTF8_LCASE", "a12ca12c");
-    assertStringReplace("abcabc", "b", "12", "UNICODE", "a12ca12c");
-    assertStringReplace("abcabc", "b", "12", "UNICODE_CI", "a12ca12c");
-    assertStringReplace("replace", "plx", "123", "UTF8_BINARY", "replace");
-    assertStringReplace("replace", "plx", "123", "UTF8_LCASE", "replace");
-    assertStringReplace("replace", "plx", "123", "UNICODE", "replace");
-    assertStringReplace("replace", "plx", "123", "UNICODE_CI", "replace");
-    assertStringReplace("Replace", "re", "", "UTF8_BINARY", "Replace");
-    assertStringReplace("Replace", "re", "", "UTF8_LCASE", "place");
-    assertStringReplace("Replace", "re", "", "UNICODE", "Replace");
-    assertStringReplace("Replace", "re", "", "UNICODE_CI", "place");
-    assertStringReplace("abcdabcd", "Bc", "", "UTF8_BINARY", "abcdabcd");
-    assertStringReplace("abcdabcd", "Bc", "", "UTF8_LCASE", "adad");
-    assertStringReplace("abcdabcd", "Bc", "", "UNICODE", "abcdabcd");
-    assertStringReplace("abcdabcd", "Bc", "", "UNICODE_CI", "adad");
-    assertStringReplace("AbcdabCd", "Bc", "", "UTF8_BINARY", "AbcdabCd");
-    assertStringReplace("AbcdabCd", "Bc", "", "UTF8_LCASE", "Adad");
-    assertStringReplace("AbcdabCd", "Bc", "", "UNICODE", "AbcdabCd");
-    assertStringReplace("AbcdabCd", "Bc", "", "UNICODE_CI", "Adad");
+    assertStringReplace("replace", "pl", "", UTF8_BINARY, "reace");
+    assertStringReplace("replace", "pl", "", UTF8_LCASE, "reace");
+    assertStringReplace("replace", "pl", "", UNICODE, "reace");
+    assertStringReplace("replace", "pl", "", UNICODE_CI, "reace");
+    assertStringReplace("replace", "", "123", UTF8_BINARY, "replace");
+    assertStringReplace("replace", "", "123", UTF8_LCASE, "replace");
+    assertStringReplace("replace", "", "123", UNICODE, "replace");
+    assertStringReplace("replace", "", "123", UNICODE_CI, "replace");
+    assertStringReplace("abcabc", "b", "12", UTF8_BINARY, "a12ca12c");
+    assertStringReplace("abcabc", "b", "12", UTF8_LCASE, "a12ca12c");
+    assertStringReplace("abcabc", "b", "12", UNICODE, "a12ca12c");
+    assertStringReplace("abcabc", "b", "12", UNICODE_CI, "a12ca12c");
+    assertStringReplace("replace", "plx", "123", UTF8_BINARY, "replace");
+    assertStringReplace("replace", "plx", "123", UTF8_LCASE, "replace");
+    assertStringReplace("replace", "plx", "123", UNICODE, "replace");
+    assertStringReplace("replace", "plx", "123", UNICODE_CI, "replace");
+    assertStringReplace("Replace", "re", "", UTF8_BINARY, "Replace");
+    assertStringReplace("Replace", "re", "", UTF8_LCASE, "place");
+    assertStringReplace("Replace", "re", "", UNICODE, "Replace");
+    assertStringReplace("Replace", "re", "", UNICODE_CI, "place");
+    assertStringReplace("abcdabcd", "Bc", "", UTF8_BINARY, "abcdabcd");
+    assertStringReplace("abcdabcd", "Bc", "", UTF8_LCASE, "adad");
+    assertStringReplace("abcdabcd", "Bc", "", UNICODE, "abcdabcd");
+    assertStringReplace("abcdabcd", "Bc", "", UNICODE_CI, "adad");
+    assertStringReplace("AbcdabCd", "Bc", "", UTF8_BINARY, "AbcdabCd");
+    assertStringReplace("AbcdabCd", "Bc", "", UTF8_LCASE, "Adad");
+    assertStringReplace("AbcdabCd", "Bc", "", UNICODE, "AbcdabCd");
+    assertStringReplace("AbcdabCd", "Bc", "", UNICODE_CI, "Adad");
     // Advanced tests.
-    assertStringReplace("abcdabcd", "bc", "", "UTF8_BINARY", "adad");
-    assertStringReplace("r世eplace", "pl", "123", "UTF8_BINARY", "r世e123ace");
-    assertStringReplace("世Replace", "re", "", "UTF8_BINARY", "世Replace");
-    assertStringReplace("r世eplace", "pl", "xx", "UTF8_LCASE", "r世exxace");
-    assertStringReplace("repl世ace", "PL", "AB", "UTF8_LCASE", "reAB世ace");
-    assertStringReplace("re世place", "世", "x", "UTF8_LCASE", "rexplace");
-    assertStringReplace("re世place", "plx", "123", "UNICODE", "re世place");
-    assertStringReplace("replace世", "", "123", "UNICODE", "replace世");
-    assertStringReplace("aBc世abc", "b", "12", "UNICODE", "aBc世a12c");
-    assertStringReplace("aBc世abc", "b", "12", "UNICODE_CI", "a12c世a12c");
-    assertStringReplace("a世Bcdabcd", "bC", "", "UNICODE_CI", "a世dad");
-    assertStringReplace("repl世ace", "Pl", "", "UNICODE_CI", "re世ace");
+    assertStringReplace("abcdabcd", "bc", "", UTF8_BINARY, "adad");
+    assertStringReplace("r世eplace", "pl", "123", UTF8_BINARY, "r世e123ace");
+    assertStringReplace("世Replace", "re", "", UTF8_BINARY, "世Replace");
+    assertStringReplace("r世eplace", "pl", "xx", UTF8_LCASE, "r世exxace");
+    assertStringReplace("repl世ace", "PL", "AB", UTF8_LCASE, "reAB世ace");
+    assertStringReplace("re世place", "世", "x", UTF8_LCASE, "rexplace");
+    assertStringReplace("re世place", "plx", "123", UNICODE, "re世place");
+    assertStringReplace("replace世", "", "123", UNICODE, "replace世");
+    assertStringReplace("aBc世abc", "b", "12", UNICODE, "aBc世a12c");
+    assertStringReplace("aBc世abc", "b", "12", UNICODE_CI, "a12c世a12c");
+    assertStringReplace("a世Bcdabcd", "bC", "", UNICODE_CI, "a世dad");
+    assertStringReplace("repl世ace", "Pl", "", UNICODE_CI, "re世ace");
     assertStringReplace("abcčšdabĆŠscd", "cs", "", "SR_CI_AI", "abcdabscd");
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertStringReplace("abi̇12", "i", "X", "UNICODE_CI", "abi̇12");
-    assertStringReplace("abi̇12", "\u0307", "X", "UNICODE_CI", "abi̇12");
-    assertStringReplace("abi̇12", "İ", "X", "UNICODE_CI", "abX12");
-    assertStringReplace("abİ12", "i", "X", "UNICODE_CI", "abİ12");
-    assertStringReplace("İi̇İi̇İi̇", "i\u0307", "x", "UNICODE_CI", "xxxxxx");
-    assertStringReplace("İi̇İi̇İi̇", "i", "x", "UNICODE_CI", "İi̇İi̇İi̇");
-    assertStringReplace("abİo12i̇o", "i\u0307o", "xx", "UNICODE_CI", "abxx12xx");
-    assertStringReplace("abi̇o12i̇o", "İo", "yy", "UNICODE_CI", "abyy12yy");
-    assertStringReplace("abi̇12", "i", "X", "UTF8_LCASE", "abX\u030712"); // != UNICODE_CI
-    assertStringReplace("abi̇12", "\u0307", "X", "UTF8_LCASE", "abiX12"); // != UNICODE_CI
-    assertStringReplace("abi̇12", "İ", "X", "UTF8_LCASE", "abX12");
-    assertStringReplace("abİ12", "i", "X", "UTF8_LCASE", "abİ12");
-    assertStringReplace("İi̇İi̇İi̇", "i\u0307", "x", "UTF8_LCASE", "xxxxxx");
-    assertStringReplace("İi̇İi̇İi̇", "i", "x", "UTF8_LCASE",
+    assertStringReplace("abi̇12", "i", "X", UNICODE_CI, "abi̇12");
+    assertStringReplace("abi̇12", "\u0307", "X", UNICODE_CI, "abi̇12");
+    assertStringReplace("abi̇12", "İ", "X", UNICODE_CI, "abX12");
+    assertStringReplace("abİ12", "i", "X", UNICODE_CI, "abİ12");
+    assertStringReplace("İi̇İi̇İi̇", "i\u0307", "x", UNICODE_CI, "xxxxxx");
+    assertStringReplace("İi̇İi̇İi̇", "i", "x", UNICODE_CI, "İi̇İi̇İi̇");
+    assertStringReplace("abİo12i̇o", "i\u0307o", "xx", UNICODE_CI, "abxx12xx");
+    assertStringReplace("abi̇o12i̇o", "İo", "yy", UNICODE_CI, "abyy12yy");
+    assertStringReplace("abi̇12", "i", "X", UTF8_LCASE, "abX\u030712"); // != UNICODE_CI
+    assertStringReplace("abi̇12", "\u0307", "X", UTF8_LCASE, "abiX12"); // != UNICODE_CI
+    assertStringReplace("abi̇12", "İ", "X", UTF8_LCASE, "abX12");
+    assertStringReplace("abİ12", "i", "X", UTF8_LCASE, "abİ12");
+    assertStringReplace("İi̇İi̇İi̇", "i\u0307", "x", UTF8_LCASE, "xxxxxx");
+    assertStringReplace("İi̇İi̇İi̇", "i", "x", UTF8_LCASE,
       "İx\u0307İx\u0307İx\u0307"); // != UNICODE_CI
-    assertStringReplace("abİo12i̇o", "i\u0307o", "xx", "UTF8_LCASE", "abxx12xx");
-    assertStringReplace("abi̇o12i̇o", "İo", "yy", "UTF8_LCASE", "abyy12yy");
+    assertStringReplace("abİo12i̇o", "i\u0307o", "xx", UTF8_LCASE, "abxx12xx");
+    assertStringReplace("abi̇o12i̇o", "İo", "yy", UTF8_LCASE, "abyy12yy");
     // Conditional case mapping (e.g. Greek sigmas).
-    assertStringReplace("σ", "σ", "x", "UTF8_BINARY", "x");
-    assertStringReplace("σ", "ς", "x", "UTF8_BINARY", "σ");
-    assertStringReplace("σ", "Σ", "x", "UTF8_BINARY", "σ");
-    assertStringReplace("ς", "σ", "x", "UTF8_BINARY", "ς");
-    assertStringReplace("ς", "ς", "x", "UTF8_BINARY", "x");
-    assertStringReplace("ς", "Σ", "x", "UTF8_BINARY", "ς");
-    assertStringReplace("Σ", "σ", "x", "UTF8_BINARY", "Σ");
-    assertStringReplace("Σ", "ς", "x", "UTF8_BINARY", "Σ");
-    assertStringReplace("Σ", "Σ", "x", "UTF8_BINARY", "x");
-    assertStringReplace("σ", "σ", "x", "UTF8_LCASE", "x");
-    assertStringReplace("σ", "ς", "x", "UTF8_LCASE", "x");
-    assertStringReplace("σ", "Σ", "x", "UTF8_LCASE", "x");
-    assertStringReplace("ς", "σ", "x", "UTF8_LCASE", "x");
-    assertStringReplace("ς", "ς", "x", "UTF8_LCASE", "x");
-    assertStringReplace("ς", "Σ", "x", "UTF8_LCASE", "x");
-    assertStringReplace("Σ", "σ", "x", "UTF8_LCASE", "x");
-    assertStringReplace("Σ", "ς", "x", "UTF8_LCASE", "x");
-    assertStringReplace("Σ", "Σ", "x", "UTF8_LCASE", "x");
-    assertStringReplace("σ", "σ", "x", "UNICODE", "x");
-    assertStringReplace("σ", "ς", "x", "UNICODE", "σ");
-    assertStringReplace("σ", "Σ", "x", "UNICODE", "σ");
-    assertStringReplace("ς", "σ", "x", "UNICODE", "ς");
-    assertStringReplace("ς", "ς", "x", "UNICODE", "x");
-    assertStringReplace("ς", "Σ", "x", "UNICODE", "ς");
-    assertStringReplace("Σ", "σ", "x", "UNICODE", "Σ");
-    assertStringReplace("Σ", "ς", "x", "UNICODE", "Σ");
-    assertStringReplace("Σ", "Σ", "x", "UNICODE", "x");
-    assertStringReplace("σ", "σ", "x", "UNICODE_CI", "x");
-    assertStringReplace("σ", "ς", "x", "UNICODE_CI", "x");
-    assertStringReplace("σ", "Σ", "x", "UNICODE_CI", "x");
-    assertStringReplace("ς", "σ", "x", "UNICODE_CI", "x");
-    assertStringReplace("ς", "ς", "x", "UNICODE_CI", "x");
-    assertStringReplace("ς", "Σ", "x", "UNICODE_CI", "x");
-    assertStringReplace("Σ", "σ", "x", "UNICODE_CI", "x");
-    assertStringReplace("Σ", "ς", "x", "UNICODE_CI", "x");
-    assertStringReplace("Σ", "Σ", "x", "UNICODE_CI", "x");
+    assertStringReplace("σ", "σ", "x", UTF8_BINARY, "x");
+    assertStringReplace("σ", "ς", "x", UTF8_BINARY, "σ");
+    assertStringReplace("σ", "Σ", "x", UTF8_BINARY, "σ");
+    assertStringReplace("ς", "σ", "x", UTF8_BINARY, "ς");
+    assertStringReplace("ς", "ς", "x", UTF8_BINARY, "x");
+    assertStringReplace("ς", "Σ", "x", UTF8_BINARY, "ς");
+    assertStringReplace("Σ", "σ", "x", UTF8_BINARY, "Σ");
+    assertStringReplace("Σ", "ς", "x", UTF8_BINARY, "Σ");
+    assertStringReplace("Σ", "Σ", "x", UTF8_BINARY, "x");
+    assertStringReplace("σ", "σ", "x", UTF8_LCASE, "x");
+    assertStringReplace("σ", "ς", "x", UTF8_LCASE, "x");
+    assertStringReplace("σ", "Σ", "x", UTF8_LCASE, "x");
+    assertStringReplace("ς", "σ", "x", UTF8_LCASE, "x");
+    assertStringReplace("ς", "ς", "x", UTF8_LCASE, "x");
+    assertStringReplace("ς", "Σ", "x", UTF8_LCASE, "x");
+    assertStringReplace("Σ", "σ", "x", UTF8_LCASE, "x");
+    assertStringReplace("Σ", "ς", "x", UTF8_LCASE, "x");
+    assertStringReplace("Σ", "Σ", "x", UTF8_LCASE, "x");
+    assertStringReplace("σ", "σ", "x", UNICODE, "x");
+    assertStringReplace("σ", "ς", "x", UNICODE, "σ");
+    assertStringReplace("σ", "Σ", "x", UNICODE, "σ");
+    assertStringReplace("ς", "σ", "x", UNICODE, "ς");
+    assertStringReplace("ς", "ς", "x", UNICODE, "x");
+    assertStringReplace("ς", "Σ", "x", UNICODE, "ς");
+    assertStringReplace("Σ", "σ", "x", UNICODE, "Σ");
+    assertStringReplace("Σ", "ς", "x", UNICODE, "Σ");
+    assertStringReplace("Σ", "Σ", "x", UNICODE, "x");
+    assertStringReplace("σ", "σ", "x", UNICODE_CI, "x");
+    assertStringReplace("σ", "ς", "x", UNICODE_CI, "x");
+    assertStringReplace("σ", "Σ", "x", UNICODE_CI, "x");
+    assertStringReplace("ς", "σ", "x", UNICODE_CI, "x");
+    assertStringReplace("ς", "ς", "x", UNICODE_CI, "x");
+    assertStringReplace("ς", "Σ", "x", UNICODE_CI, "x");
+    assertStringReplace("Σ", "σ", "x", UNICODE_CI, "x");
+    assertStringReplace("Σ", "ς", "x", UNICODE_CI, "x");
+    assertStringReplace("Σ", "Σ", "x", UNICODE_CI, "x");
     // Surrogate pairs.
-    assertStringReplace("a🙃b", "a", "x", "UTF8_BINARY", "x🙃b");
-    assertStringReplace("a🙃b", "b", "x", "UTF8_BINARY", "a🙃x");
-    assertStringReplace("a🙃b", "🙃", "x", "UTF8_BINARY", "axb");
-    assertStringReplace("a🙃b", "b", "c", "UTF8_LCASE", "a🙃c");
-    assertStringReplace("a🙃b", "b", "x", "UTF8_LCASE", "a🙃x");
-    assertStringReplace("a🙃b", "🙃", "x", "UTF8_LCASE", "axb");
-    assertStringReplace("a🙃b", "b", "c", "UNICODE", "a🙃c");
-    assertStringReplace("a🙃b", "b", "x", "UNICODE", "a🙃x");
-    assertStringReplace("a🙃b", "🙃", "x", "UNICODE", "axb");
-    assertStringReplace("a🙃b", "b", "c", "UNICODE_CI", "a🙃c");
-    assertStringReplace("a🙃b", "b", "x", "UNICODE_CI", "a🙃x");
-    assertStringReplace("a🙃b", "🙃", "x", "UNICODE_CI", "axb");
+    assertStringReplace("a🙃b", "a", "x", UTF8_BINARY, "x🙃b");
+    assertStringReplace("a🙃b", "b", "x", UTF8_BINARY, "a🙃x");
+    assertStringReplace("a🙃b", "🙃", "x", UTF8_BINARY, "axb");
+    assertStringReplace("a🙃b", "b", "c", UTF8_LCASE, "a🙃c");
+    assertStringReplace("a🙃b", "b", "x", UTF8_LCASE, "a🙃x");
+    assertStringReplace("a🙃b", "🙃", "x", UTF8_LCASE, "axb");
+    assertStringReplace("a🙃b", "b", "c", UNICODE, "a🙃c");
+    assertStringReplace("a🙃b", "b", "x", UNICODE, "a🙃x");
+    assertStringReplace("a🙃b", "🙃", "x", UNICODE, "axb");
+    assertStringReplace("a🙃b", "b", "c", UNICODE_CI, "a🙃c");
+    assertStringReplace("a🙃b", "b", "x", UNICODE_CI, "a🙃x");
+    assertStringReplace("a🙃b", "🙃", "x", UNICODE_CI, "axb");
   }
 
   /**
@@ -2145,293 +2146,293 @@ private void assertStringLocate(String substring, String string, int start,
   @Test
   public void testStringLocate() throws SparkException {
     // Empty strings.
-    assertStringLocate("", "", -1, "UTF8_BINARY", 1);
-    assertStringLocate("", "", -1, "UTF8_LCASE", 1);
-    assertStringLocate("", "", -1, "UNICODE", 1);
-    assertStringLocate("", "", -1, "UNICODE_CI", 1);
-    assertStringLocate("", "", 0, "UTF8_BINARY", 1);
-    assertStringLocate("", "", 0, "UTF8_LCASE", 1);
-    assertStringLocate("", "", 0, "UNICODE", 1);
-    assertStringLocate("", "", 0, "UNICODE_CI", 1);
-    assertStringLocate("", "", 1, "UTF8_BINARY", 1);
-    assertStringLocate("", "", 1, "UTF8_LCASE", 1);
-    assertStringLocate("", "", 1, "UNICODE", 1);
-    assertStringLocate("", "", 1, "UNICODE_CI", 1);
-    assertStringLocate("a", "", -1, "UTF8_BINARY", 0);
-    assertStringLocate("a", "", -1, "UTF8_LCASE", 0);
-    assertStringLocate("a", "", -1, "UNICODE", 0);
-    assertStringLocate("a", "", -1, "UNICODE_CI", 0);
-    assertStringLocate("a", "", 0, "UTF8_BINARY", 0);
-    assertStringLocate("a", "", 0, "UTF8_LCASE", 0);
-    assertStringLocate("a", "", 0, "UNICODE", 0);
-    assertStringLocate("a", "", 0, "UNICODE_CI", 0);
-    assertStringLocate("a", "", 1, "UTF8_BINARY", 0);
-    assertStringLocate("a", "", 1, "UTF8_LCASE", 0);
-    assertStringLocate("a", "", 1, "UNICODE", 0);
-    assertStringLocate("a", "", 1, "UNICODE_CI", 0);
-    assertStringLocate("", "x", -1, "UTF8_BINARY", 1);
-    assertStringLocate("", "x", -1, "UTF8_LCASE", 1);
-    assertStringLocate("", "x", -1, "UNICODE", 1);
-    assertStringLocate("", "x", -1, "UNICODE_CI", 1);
-    assertStringLocate("", "x", 0, "UTF8_BINARY", 1);
-    assertStringLocate("", "x", 0, "UTF8_LCASE", 1);
-    assertStringLocate("", "x", 0, "UNICODE", 1);
-    assertStringLocate("", "x", 0, "UNICODE_CI", 1);
-    assertStringLocate("", "x", 1, "UTF8_BINARY", 1);
-    assertStringLocate("", "x", 1, "UTF8_LCASE", 1);
-    assertStringLocate("", "x", 1, "UNICODE", 1);
-    assertStringLocate("", "x", 1, "UNICODE_CI", 1);
+    assertStringLocate("", "", -1, UTF8_BINARY, 1);
+    assertStringLocate("", "", -1, UTF8_LCASE, 1);
+    assertStringLocate("", "", -1, UNICODE, 1);
+    assertStringLocate("", "", -1, UNICODE_CI, 1);
+    assertStringLocate("", "", 0, UTF8_BINARY, 1);
+    assertStringLocate("", "", 0, UTF8_LCASE, 1);
+    assertStringLocate("", "", 0, UNICODE, 1);
+    assertStringLocate("", "", 0, UNICODE_CI, 1);
+    assertStringLocate("", "", 1, UTF8_BINARY, 1);
+    assertStringLocate("", "", 1, UTF8_LCASE, 1);
+    assertStringLocate("", "", 1, UNICODE, 1);
+    assertStringLocate("", "", 1, UNICODE_CI, 1);
+    assertStringLocate("a", "", -1, UTF8_BINARY, 0);
+    assertStringLocate("a", "", -1, UTF8_LCASE, 0);
+    assertStringLocate("a", "", -1, UNICODE, 0);
+    assertStringLocate("a", "", -1, UNICODE_CI, 0);
+    assertStringLocate("a", "", 0, UTF8_BINARY, 0);
+    assertStringLocate("a", "", 0, UTF8_LCASE, 0);
+    assertStringLocate("a", "", 0, UNICODE, 0);
+    assertStringLocate("a", "", 0, UNICODE_CI, 0);
+    assertStringLocate("a", "", 1, UTF8_BINARY, 0);
+    assertStringLocate("a", "", 1, UTF8_LCASE, 0);
+    assertStringLocate("a", "", 1, UNICODE, 0);
+    assertStringLocate("a", "", 1, UNICODE_CI, 0);
+    assertStringLocate("", "x", -1, UTF8_BINARY, 1);
+    assertStringLocate("", "x", -1, UTF8_LCASE, 1);
+    assertStringLocate("", "x", -1, UNICODE, 1);
+    assertStringLocate("", "x", -1, UNICODE_CI, 1);
+    assertStringLocate("", "x", 0, UTF8_BINARY, 1);
+    assertStringLocate("", "x", 0, UTF8_LCASE, 1);
+    assertStringLocate("", "x", 0, UNICODE, 1);
+    assertStringLocate("", "x", 0, UNICODE_CI, 1);
+    assertStringLocate("", "x", 1, UTF8_BINARY, 1);
+    assertStringLocate("", "x", 1, UTF8_LCASE, 1);
+    assertStringLocate("", "x", 1, UNICODE, 1);
+    assertStringLocate("", "x", 1, UNICODE_CI, 1);
     // Basic tests.
-    assertStringLocate("aa", "aaads", 1, "UTF8_BINARY", 1);
-    assertStringLocate("aa", "aaads", 1, "UTF8_LCASE", 1);
-    assertStringLocate("aa", "aaads", 1, "UNICODE", 1);
-    assertStringLocate("aa", "aaads", 1, "UNICODE_CI", 1);
-    assertStringLocate("aa", "aaads", 2, "UTF8_BINARY", 2);
-    assertStringLocate("aa", "aaads", 2, "UTF8_LCASE", 2);
-    assertStringLocate("aa", "aaads", 2, "UNICODE", 2);
-    assertStringLocate("aa", "aaads", 2, "UNICODE_CI", 2);
-    assertStringLocate("aa", "aaads", 3, "UTF8_BINARY", 0);
-    assertStringLocate("aa", "aaads", 3, "UTF8_LCASE", 0);
-    assertStringLocate("aa", "aaads", 3, "UNICODE", 0);
-    assertStringLocate("aa", "aaads", 3, "UNICODE_CI", 0);
-    assertStringLocate("Aa", "aaads", 1, "UTF8_BINARY", 0);
-    assertStringLocate("Aa", "aaads", 1, "UTF8_LCASE", 1);
-    assertStringLocate("Aa", "aaads", 1, "UNICODE", 0);
-    assertStringLocate("Aa", "aaads", 1, "UNICODE_CI", 1);
-    assertStringLocate("Aa", "aaads", 2, "UTF8_BINARY", 0);
-    assertStringLocate("Aa", "aaads", 2, "UTF8_LCASE", 2);
-    assertStringLocate("Aa", "aaads", 2, "UNICODE", 0);
-    assertStringLocate("Aa", "aaads", 2, "UNICODE_CI", 2);
-    assertStringLocate("Aa", "aaads", 3, "UTF8_BINARY", 0);
-    assertStringLocate("Aa", "aaads", 3, "UTF8_LCASE", 0);
-    assertStringLocate("Aa", "aaads", 3, "UNICODE", 0);
-    assertStringLocate("Aa", "aaads", 3, "UNICODE_CI", 0);
-    assertStringLocate("Aa", "aAads", 1, "UTF8_BINARY", 2);
-    assertStringLocate("Aa", "aAads", 1, "UTF8_LCASE", 1);
-    assertStringLocate("Aa", "aAads", 1, "UNICODE", 2);
-    assertStringLocate("Aa", "aAads", 1, "UNICODE_CI", 1);
-    assertStringLocate("AA", "aaads", 1, "UTF8_BINARY", 0);
-    assertStringLocate("AA", "aaads", 1, "UTF8_LCASE", 1);
-    assertStringLocate("AA", "aaads", 1, "UNICODE", 0);
-    assertStringLocate("AA", "aaads", 1, "UNICODE_CI", 1);
-    assertStringLocate("aa", "aAads", 2, "UTF8_BINARY", 0);
-    assertStringLocate("aa", "aAads", 2, "UTF8_LCASE", 2);
-    assertStringLocate("aa", "aAads", 2, "UNICODE", 0);
-    assertStringLocate("aa", "aAads", 2, "UNICODE_CI", 2);
-    assertStringLocate("aa", "aaAds", 3, "UTF8_BINARY", 0);
-    assertStringLocate("aa", "aaAds", 3, "UTF8_LCASE", 0);
-    assertStringLocate("aa", "aaAds", 3, "UNICODE", 0);
-    assertStringLocate("aa", "aaAds", 3, "UNICODE_CI", 0);
-    assertStringLocate("abC", "abcabc", 1, "UTF8_BINARY", 0);
-    assertStringLocate("abC", "abcabc", 1, "UTF8_LCASE", 1);
-    assertStringLocate("abC", "abcabc", 1, "UNICODE", 0);
-    assertStringLocate("abC", "abcabc", 1, "UNICODE_CI", 1);
-    assertStringLocate("abC", "abCabc", 2, "UTF8_BINARY", 0);
-    assertStringLocate("abC", "abCabc", 2, "UTF8_LCASE", 4);
-    assertStringLocate("abC", "abCabc", 2, "UNICODE", 0);
-    assertStringLocate("abC", "abCabc", 2, "UNICODE_CI", 4);
-    assertStringLocate("abc", "abcabc", 1, "UTF8_BINARY", 1);
-    assertStringLocate("abc", "abcabc", 1, "UTF8_LCASE", 1);
-    assertStringLocate("abc", "abcabc", 1, "UNICODE", 1);
-    assertStringLocate("abc", "abcabc", 1, "UNICODE_CI", 1);
-    assertStringLocate("abc", "abcabc", 2, "UTF8_BINARY", 4);
-    assertStringLocate("abc", "abcabc", 2, "UTF8_LCASE", 4);
-    assertStringLocate("abc", "abcabc", 2, "UNICODE", 4);
-    assertStringLocate("abc", "abcabc", 2, "UNICODE_CI", 4);
-    assertStringLocate("abc", "abcabc", 3, "UTF8_BINARY", 4);
-    assertStringLocate("abc", "abcabc", 3, "UTF8_LCASE", 4);
-    assertStringLocate("abc", "abcabc", 3, "UNICODE", 4);
-    assertStringLocate("abc", "abcabc", 3, "UNICODE_CI", 4);
-    assertStringLocate("abc", "abcabc", 4, "UTF8_BINARY", 4);
-    assertStringLocate("abc", "abcabc", 4, "UTF8_LCASE", 4);
-    assertStringLocate("abc", "abcabc", 4, "UNICODE", 4);
-    assertStringLocate("abc", "abcabc", 4, "UNICODE_CI", 4);
-    assertStringLocate("aa", "Aaads", 1, "UTF8_BINARY", 2);
-    assertStringLocate("aa", "Aaads", 1, "UTF8_LCASE", 1);
-    assertStringLocate("aa", "Aaads", 1, "UNICODE", 2);
-    assertStringLocate("aa", "Aaads", 1, "UNICODE_CI", 1);
+    assertStringLocate("aa", "aaads", 1, UTF8_BINARY, 1);
+    assertStringLocate("aa", "aaads", 1, UTF8_LCASE, 1);
+    assertStringLocate("aa", "aaads", 1, UNICODE, 1);
+    assertStringLocate("aa", "aaads", 1, UNICODE_CI, 1);
+    assertStringLocate("aa", "aaads", 2, UTF8_BINARY, 2);
+    assertStringLocate("aa", "aaads", 2, UTF8_LCASE, 2);
+    assertStringLocate("aa", "aaads", 2, UNICODE, 2);
+    assertStringLocate("aa", "aaads", 2, UNICODE_CI, 2);
+    assertStringLocate("aa", "aaads", 3, UTF8_BINARY, 0);
+    assertStringLocate("aa", "aaads", 3, UTF8_LCASE, 0);
+    assertStringLocate("aa", "aaads", 3, UNICODE, 0);
+    assertStringLocate("aa", "aaads", 3, UNICODE_CI, 0);
+    assertStringLocate("Aa", "aaads", 1, UTF8_BINARY, 0);
+    assertStringLocate("Aa", "aaads", 1, UTF8_LCASE, 1);
+    assertStringLocate("Aa", "aaads", 1, UNICODE, 0);
+    assertStringLocate("Aa", "aaads", 1, UNICODE_CI, 1);
+    assertStringLocate("Aa", "aaads", 2, UTF8_BINARY, 0);
+    assertStringLocate("Aa", "aaads", 2, UTF8_LCASE, 2);
+    assertStringLocate("Aa", "aaads", 2, UNICODE, 0);
+    assertStringLocate("Aa", "aaads", 2, UNICODE_CI, 2);
+    assertStringLocate("Aa", "aaads", 3, UTF8_BINARY, 0);
+    assertStringLocate("Aa", "aaads", 3, UTF8_LCASE, 0);
+    assertStringLocate("Aa", "aaads", 3, UNICODE, 0);
+    assertStringLocate("Aa", "aaads", 3, UNICODE_CI, 0);
+    assertStringLocate("Aa", "aAads", 1, UTF8_BINARY, 2);
+    assertStringLocate("Aa", "aAads", 1, UTF8_LCASE, 1);
+    assertStringLocate("Aa", "aAads", 1, UNICODE, 2);
+    assertStringLocate("Aa", "aAads", 1, UNICODE_CI, 1);
+    assertStringLocate("AA", "aaads", 1, UTF8_BINARY, 0);
+    assertStringLocate("AA", "aaads", 1, UTF8_LCASE, 1);
+    assertStringLocate("AA", "aaads", 1, UNICODE, 0);
+    assertStringLocate("AA", "aaads", 1, UNICODE_CI, 1);
+    assertStringLocate("aa", "aAads", 2, UTF8_BINARY, 0);
+    assertStringLocate("aa", "aAads", 2, UTF8_LCASE, 2);
+    assertStringLocate("aa", "aAads", 2, UNICODE, 0);
+    assertStringLocate("aa", "aAads", 2, UNICODE_CI, 2);
+    assertStringLocate("aa", "aaAds", 3, UTF8_BINARY, 0);
+    assertStringLocate("aa", "aaAds", 3, UTF8_LCASE, 0);
+    assertStringLocate("aa", "aaAds", 3, UNICODE, 0);
+    assertStringLocate("aa", "aaAds", 3, UNICODE_CI, 0);
+    assertStringLocate("abC", "abcabc", 1, UTF8_BINARY, 0);
+    assertStringLocate("abC", "abcabc", 1, UTF8_LCASE, 1);
+    assertStringLocate("abC", "abcabc", 1, UNICODE, 0);
+    assertStringLocate("abC", "abcabc", 1, UNICODE_CI, 1);
+    assertStringLocate("abC", "abCabc", 2, UTF8_BINARY, 0);
+    assertStringLocate("abC", "abCabc", 2, UTF8_LCASE, 4);
+    assertStringLocate("abC", "abCabc", 2, UNICODE, 0);
+    assertStringLocate("abC", "abCabc", 2, UNICODE_CI, 4);
+    assertStringLocate("abc", "abcabc", 1, UTF8_BINARY, 1);
+    assertStringLocate("abc", "abcabc", 1, UTF8_LCASE, 1);
+    assertStringLocate("abc", "abcabc", 1, UNICODE, 1);
+    assertStringLocate("abc", "abcabc", 1, UNICODE_CI, 1);
+    assertStringLocate("abc", "abcabc", 2, UTF8_BINARY, 4);
+    assertStringLocate("abc", "abcabc", 2, UTF8_LCASE, 4);
+    assertStringLocate("abc", "abcabc", 2, UNICODE, 4);
+    assertStringLocate("abc", "abcabc", 2, UNICODE_CI, 4);
+    assertStringLocate("abc", "abcabc", 3, UTF8_BINARY, 4);
+    assertStringLocate("abc", "abcabc", 3, UTF8_LCASE, 4);
+    assertStringLocate("abc", "abcabc", 3, UNICODE, 4);
+    assertStringLocate("abc", "abcabc", 3, UNICODE_CI, 4);
+    assertStringLocate("abc", "abcabc", 4, UTF8_BINARY, 4);
+    assertStringLocate("abc", "abcabc", 4, UTF8_LCASE, 4);
+    assertStringLocate("abc", "abcabc", 4, UNICODE, 4);
+    assertStringLocate("abc", "abcabc", 4, UNICODE_CI, 4);
+    assertStringLocate("aa", "Aaads", 1, UTF8_BINARY, 2);
+    assertStringLocate("aa", "Aaads", 1, UTF8_LCASE, 1);
+    assertStringLocate("aa", "Aaads", 1, UNICODE, 2);
+    assertStringLocate("aa", "Aaads", 1, UNICODE_CI, 1);
     assertStringLocate("ćČ", "CćČČćCČĆČcČcććČč", 3, "SR", 14);
     assertStringLocate("ćČ", "CćČČćCČĆČcČcććČč", 3, "SR_CI_AI", 3);
     // Advanced tests.
-    assertStringLocate("界x", "test大千世界X大千世界", 1, "UTF8_BINARY", 0);
-    assertStringLocate("界X", "test大千世界X大千世界", 1, "UTF8_BINARY", 8);
-    assertStringLocate("界", "test大千世界X大千世界", 13, "UTF8_BINARY", 13);
-    assertStringLocate("界x", "test大千世界X大千世界", 1, "UTF8_LCASE", 8);
-    assertStringLocate("界X", "test大千世界Xtest大千世界", 1, "UTF8_LCASE", 8);
-    assertStringLocate("界", "test大千世界X大千世界", 13, "UTF8_LCASE", 13);
-    assertStringLocate("大千", "test大千世界大千世界", 1, "UTF8_LCASE", 5);
-    assertStringLocate("大千", "test大千世界大千世界", 9, "UTF8_LCASE", 9);
-    assertStringLocate("大千", "大千世界大千世界", 1, "UTF8_LCASE", 1);
-    assertStringLocate("界x", "test大千世界X大千世界", 1, "UNICODE", 0);
-    assertStringLocate("界X", "test大千世界X大千世界", 1, "UNICODE", 8);
-    assertStringLocate("界", "test大千世界X大千世界", 13, "UNICODE", 13);
-    assertStringLocate("界x", "test大千世界X大千世界", 1, "UNICODE_CI", 8);
-    assertStringLocate("界", "test大千世界X大千世界", 13, "UNICODE_CI", 13);
-    assertStringLocate("大千", "test大千世界大千世界", 1, "UNICODE_CI", 5);
-    assertStringLocate("大千", "test大千世界大千世界", 9, "UNICODE_CI", 9);
-    assertStringLocate("大千", "大千世界大千世界", 1, "UNICODE_CI", 1);
+    assertStringLocate("界x", "test大千世界X大千世界", 1, UTF8_BINARY, 0);
+    assertStringLocate("界X", "test大千世界X大千世界", 1, UTF8_BINARY, 8);
+    assertStringLocate("界", "test大千世界X大千世界", 13, UTF8_BINARY, 13);
+    assertStringLocate("界x", "test大千世界X大千世界", 1, UTF8_LCASE, 8);
+    assertStringLocate("界X", "test大千世界Xtest大千世界", 1, UTF8_LCASE, 8);
+    assertStringLocate("界", "test大千世界X大千世界", 13, UTF8_LCASE, 13);
+    assertStringLocate("大千", "test大千世界大千世界", 1, UTF8_LCASE, 5);
+    assertStringLocate("大千", "test大千世界大千世界", 9, UTF8_LCASE, 9);
+    assertStringLocate("大千", "大千世界大千世界", 1, UTF8_LCASE, 1);
+    assertStringLocate("界x", "test大千世界X大千世界", 1, UNICODE, 0);
+    assertStringLocate("界X", "test大千世界X大千世界", 1, UNICODE, 8);
+    assertStringLocate("界", "test大千世界X大千世界", 13, UNICODE, 13);
+    assertStringLocate("界x", "test大千世界X大千世界", 1, UNICODE_CI, 8);
+    assertStringLocate("界", "test大千世界X大千世界", 13, UNICODE_CI, 13);
+    assertStringLocate("大千", "test大千世界大千世界", 1, UNICODE_CI, 5);
+    assertStringLocate("大千", "test大千世界大千世界", 9, UNICODE_CI, 9);
+    assertStringLocate("大千", "大千世界大千世界", 1, UNICODE_CI, 1);
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertStringLocate("\u0307", "i\u0307", 1, "UTF8_BINARY", 2);
-    assertStringLocate("\u0307", "İ", 1, "UTF8_LCASE", 0); // != UTF8_BINARY
-    assertStringLocate("i", "i\u0307", 1, "UNICODE_CI", 0);
-    assertStringLocate("\u0307", "i\u0307", 1, "UNICODE_CI", 0);
-    assertStringLocate("i\u0307", "i", 1, "UNICODE_CI", 0);
-    assertStringLocate("İ", "i\u0307", 1, "UNICODE_CI", 1);
-    assertStringLocate("İ", "i", 1, "UNICODE_CI", 0);
-    assertStringLocate("i", "i\u0307", 1, "UTF8_LCASE", 1); // != UNICODE_CI
-    assertStringLocate("\u0307", "i\u0307", 1, "UTF8_LCASE", 2); // != UNICODE_CI
-    assertStringLocate("i\u0307", "i", 1, "UTF8_LCASE", 0);
-    assertStringLocate("İ", "i\u0307", 1, "UTF8_LCASE", 1);
-    assertStringLocate("İ", "i", 1, "UTF8_LCASE", 0);
-    assertStringLocate("i\u0307o", "İo世界大千世界", 1, "UNICODE_CI", 1);
-    assertStringLocate("i\u0307o", "大千İo世界大千世界", 1, "UNICODE_CI", 3);
-    assertStringLocate("i\u0307o", "世界İo大千世界大千İo", 4, "UNICODE_CI", 11);
-    assertStringLocate("İo", "i̇o世界大千世界", 1, "UNICODE_CI", 1);
-    assertStringLocate("İo", "大千i̇o世界大千世界", 1, "UNICODE_CI", 3);
-    assertStringLocate("İo", "世界i̇o大千世界大千i̇o", 4, "UNICODE_CI", 12);
+    assertStringLocate("\u0307", "i\u0307", 1, UTF8_BINARY, 2);
+    assertStringLocate("\u0307", "İ", 1, UTF8_LCASE, 0); // != UTF8_BINARY
+    assertStringLocate("i", "i\u0307", 1, UNICODE_CI, 0);
+    assertStringLocate("\u0307", "i\u0307", 1, UNICODE_CI, 0);
+    assertStringLocate("i\u0307", "i", 1, UNICODE_CI, 0);
+    assertStringLocate("İ", "i\u0307", 1, UNICODE_CI, 1);
+    assertStringLocate("İ", "i", 1, UNICODE_CI, 0);
+    assertStringLocate("i", "i\u0307", 1, UTF8_LCASE, 1); // != UNICODE_CI
+    assertStringLocate("\u0307", "i\u0307", 1, UTF8_LCASE, 2); // != UNICODE_CI
+    assertStringLocate("i\u0307", "i", 1, UTF8_LCASE, 0);
+    assertStringLocate("İ", "i\u0307", 1, UTF8_LCASE, 1);
+    assertStringLocate("İ", "i", 1, UTF8_LCASE, 0);
+    assertStringLocate("i\u0307o", "İo世界大千世界", 1, UNICODE_CI, 1);
+    assertStringLocate("i\u0307o", "大千İo世界大千世界", 1, UNICODE_CI, 3);
+    assertStringLocate("i\u0307o", "世界İo大千世界大千İo", 4, UNICODE_CI, 11);
+    assertStringLocate("İo", "i̇o世界大千世界", 1, UNICODE_CI, 1);
+    assertStringLocate("İo", "大千i̇o世界大千世界", 1, UNICODE_CI, 3);
+    assertStringLocate("İo", "世界i̇o大千世界大千i̇o", 4, UNICODE_CI, 12);
     // Conditional case mapping (e.g. Greek sigmas).
-    assertStringLocate("σ", "σ", 1, "UTF8_BINARY", 1);
-    assertStringLocate("σ", "ς", 1, "UTF8_BINARY", 0);
-    assertStringLocate("σ", "Σ", 1, "UTF8_BINARY", 0);
-    assertStringLocate("ς", "σ", 1, "UTF8_BINARY", 0);
-    assertStringLocate("ς", "ς", 1, "UTF8_BINARY", 1);
-    assertStringLocate("ς", "Σ", 1, "UTF8_BINARY", 0);
-    assertStringLocate("Σ", "σ", 1, "UTF8_BINARY", 0);
-    assertStringLocate("Σ", "ς", 1, "UTF8_BINARY", 0);
-    assertStringLocate("Σ", "Σ", 1, "UTF8_BINARY", 1);
-    assertStringLocate("σ", "σ", 1, "UTF8_LCASE", 1);
-    assertStringLocate("σ", "ς", 1, "UTF8_LCASE", 1);
-    assertStringLocate("σ", "Σ", 1, "UTF8_LCASE", 1);
-    assertStringLocate("ς", "σ", 1, "UTF8_LCASE", 1);
-    assertStringLocate("ς", "ς", 1, "UTF8_LCASE", 1);
-    assertStringLocate("ς", "Σ", 1, "UTF8_LCASE", 1);
-    assertStringLocate("Σ", "σ", 1, "UTF8_LCASE", 1);
-    assertStringLocate("Σ", "ς", 1, "UTF8_LCASE", 1);
-    assertStringLocate("Σ", "Σ", 1, "UTF8_LCASE", 1);
-    assertStringLocate("σ", "σ", 1, "UNICODE", 1);
-    assertStringLocate("σ", "ς", 1, "UNICODE", 0);
-    assertStringLocate("σ", "Σ", 1, "UNICODE", 0);
-    assertStringLocate("ς", "σ", 1, "UNICODE", 0);
-    assertStringLocate("ς", "ς", 1, "UNICODE", 1);
-    assertStringLocate("ς", "Σ", 1, "UNICODE", 0);
-    assertStringLocate("Σ", "σ", 1, "UNICODE", 0);
-    assertStringLocate("Σ", "ς", 1, "UNICODE", 0);
-    assertStringLocate("Σ", "Σ", 1, "UNICODE", 1);
-    assertStringLocate("σ", "σ", 1, "UNICODE_CI", 1);
-    assertStringLocate("σ", "ς", 1, "UNICODE_CI", 1);
-    assertStringLocate("σ", "Σ", 1, "UNICODE_CI", 1);
-    assertStringLocate("ς", "σ", 1, "UNICODE_CI", 1);
-    assertStringLocate("ς", "ς", 1, "UNICODE_CI", 1);
-    assertStringLocate("ς", "Σ", 1, "UNICODE_CI", 1);
-    assertStringLocate("Σ", "σ", 1, "UNICODE_CI", 1);
-    assertStringLocate("Σ", "ς", 1, "UNICODE_CI", 1);
-    assertStringLocate("Σ", "Σ", 1, "UNICODE_CI", 1);
+    assertStringLocate("σ", "σ", 1, UTF8_BINARY, 1);
+    assertStringLocate("σ", "ς", 1, UTF8_BINARY, 0);
+    assertStringLocate("σ", "Σ", 1, UTF8_BINARY, 0);
+    assertStringLocate("ς", "σ", 1, UTF8_BINARY, 0);
+    assertStringLocate("ς", "ς", 1, UTF8_BINARY, 1);
+    assertStringLocate("ς", "Σ", 1, UTF8_BINARY, 0);
+    assertStringLocate("Σ", "σ", 1, UTF8_BINARY, 0);
+    assertStringLocate("Σ", "ς", 1, UTF8_BINARY, 0);
+    assertStringLocate("Σ", "Σ", 1, UTF8_BINARY, 1);
+    assertStringLocate("σ", "σ", 1, UTF8_LCASE, 1);
+    assertStringLocate("σ", "ς", 1, UTF8_LCASE, 1);
+    assertStringLocate("σ", "Σ", 1, UTF8_LCASE, 1);
+    assertStringLocate("ς", "σ", 1, UTF8_LCASE, 1);
+    assertStringLocate("ς", "ς", 1, UTF8_LCASE, 1);
+    assertStringLocate("ς", "Σ", 1, UTF8_LCASE, 1);
+    assertStringLocate("Σ", "σ", 1, UTF8_LCASE, 1);
+    assertStringLocate("Σ", "ς", 1, UTF8_LCASE, 1);
+    assertStringLocate("Σ", "Σ", 1, UTF8_LCASE, 1);
+    assertStringLocate("σ", "σ", 1, UNICODE, 1);
+    assertStringLocate("σ", "ς", 1, UNICODE, 0);
+    assertStringLocate("σ", "Σ", 1, UNICODE, 0);
+    assertStringLocate("ς", "σ", 1, UNICODE, 0);
+    assertStringLocate("ς", "ς", 1, UNICODE, 1);
+    assertStringLocate("ς", "Σ", 1, UNICODE, 0);
+    assertStringLocate("Σ", "σ", 1, UNICODE, 0);
+    assertStringLocate("Σ", "ς", 1, UNICODE, 0);
+    assertStringLocate("Σ", "Σ", 1, UNICODE, 1);
+    assertStringLocate("σ", "σ", 1, UNICODE_CI, 1);
+    assertStringLocate("σ", "ς", 1, UNICODE_CI, 1);
+    assertStringLocate("σ", "Σ", 1, UNICODE_CI, 1);
+    assertStringLocate("ς", "σ", 1, UNICODE_CI, 1);
+    assertStringLocate("ς", "ς", 1, UNICODE_CI, 1);
+    assertStringLocate("ς", "Σ", 1, UNICODE_CI, 1);
+    assertStringLocate("Σ", "σ", 1, UNICODE_CI, 1);
+    assertStringLocate("Σ", "ς", 1, UNICODE_CI, 1);
+    assertStringLocate("Σ", "Σ", 1, UNICODE_CI, 1);
     // Surrogate pairs.
-    assertStringLocate("a", "a🙃b", 1, "UTF8_BINARY", 1);
-    assertStringLocate("a", "a🙃b", 1, "UTF8_LCASE", 1);
-    assertStringLocate("a", "a🙃b", 1, "UNICODE", 1);
-    assertStringLocate("a", "a🙃b", 1, "UNICODE_CI", 1);
-    assertStringLocate("a", "a🙃b", 2, "UTF8_BINARY", 0);
-    assertStringLocate("a", "a🙃b", 2, "UTF8_LCASE", 0);
-    assertStringLocate("a", "a🙃b", 2, "UNICODE", 0);
-    assertStringLocate("a", "a🙃b", 2, "UNICODE_CI", 0);
-    assertStringLocate("a", "a🙃b", 3, "UTF8_BINARY", 0);
-    assertStringLocate("a", "a🙃b", 3, "UTF8_LCASE", 0);
-    assertStringLocate("a", "a🙃b", 3, "UNICODE", 0);
-    assertStringLocate("a", "a🙃b", 3, "UNICODE_CI", 0);
-    assertStringLocate("🙃", "a🙃b", 1, "UTF8_BINARY", 2);
-    assertStringLocate("🙃", "a🙃b", 1, "UTF8_LCASE", 2);
-    assertStringLocate("🙃", "a🙃b", 1, "UNICODE", 2);
-    assertStringLocate("🙃", "a🙃b", 1, "UNICODE_CI", 2);
-    assertStringLocate("🙃", "a🙃b", 2, "UTF8_BINARY", 2);
-    assertStringLocate("🙃", "a🙃b", 2, "UTF8_LCASE", 2);
-    assertStringLocate("🙃", "a🙃b", 2, "UNICODE", 2);
-    assertStringLocate("🙃", "a🙃b", 2, "UNICODE_CI", 2);
-    assertStringLocate("🙃", "a🙃b", 3, "UTF8_BINARY", 0);
-    assertStringLocate("🙃", "a🙃b", 3, "UTF8_LCASE", 0);
-    assertStringLocate("🙃", "a🙃b", 3, "UNICODE", 0);
-    assertStringLocate("🙃", "a🙃b", 3, "UNICODE_CI", 0);
-    assertStringLocate("b", "a🙃b", 1, "UTF8_BINARY", 3);
-    assertStringLocate("b", "a🙃b", 1, "UTF8_LCASE", 3);
-    assertStringLocate("b", "a🙃b", 1, "UNICODE", 3);
-    assertStringLocate("b", "a🙃b", 1, "UNICODE_CI", 3);
-    assertStringLocate("b", "a🙃b", 2, "UTF8_BINARY", 3);
-    assertStringLocate("b", "a🙃b", 2, "UTF8_LCASE", 3);
-    assertStringLocate("b", "a🙃b", 2, "UNICODE", 3);
-    assertStringLocate("b", "a🙃b", 2, "UNICODE_CI", 3);
-    assertStringLocate("b", "a🙃b", 3, "UTF8_BINARY", 3);
-    assertStringLocate("b", "a🙃b", 3, "UTF8_LCASE", 3);
-    assertStringLocate("b", "a🙃b", 3, "UNICODE", 3);
-    assertStringLocate("b", "a🙃b", 3, "UNICODE_CI", 3);
-    assertStringLocate("🙃", "a🙃🙃b", 1, "UTF8_BINARY", 2);
-    assertStringLocate("🙃", "a🙃🙃b", 1, "UTF8_LCASE", 2);
-    assertStringLocate("🙃", "a🙃🙃b", 1, "UNICODE", 2);
-    assertStringLocate("🙃", "a🙃🙃b", 1, "UNICODE_CI", 2);
-    assertStringLocate("🙃", "a🙃🙃b", 2, "UTF8_BINARY", 2);
-    assertStringLocate("🙃", "a🙃🙃b", 2, "UTF8_LCASE", 2);
-    assertStringLocate("🙃", "a🙃🙃b", 2, "UNICODE", 2);
-    assertStringLocate("🙃", "a🙃🙃b", 2, "UNICODE_CI", 2);
-    assertStringLocate("🙃", "a🙃🙃b", 3, "UTF8_BINARY", 3);
-    assertStringLocate("🙃", "a🙃🙃b", 3, "UTF8_LCASE", 3);
-    assertStringLocate("🙃", "a🙃🙃b", 3, "UNICODE", 3);
-    assertStringLocate("🙃", "a🙃🙃b", 3, "UNICODE_CI", 3);
-    assertStringLocate("🙃", "a🙃🙃b", 4, "UTF8_BINARY", 0);
-    assertStringLocate("🙃", "a🙃🙃b", 4, "UTF8_LCASE", 0);
-    assertStringLocate("🙃", "a🙃🙃b", 4, "UNICODE", 0);
-    assertStringLocate("🙃", "a🙃🙃b", 4, "UNICODE_CI", 0);
-    assertStringLocate("b", "a🙃🙃b", 1, "UTF8_BINARY", 4);
-    assertStringLocate("b", "a🙃🙃b", 1, "UTF8_LCASE", 4);
-    assertStringLocate("b", "a🙃🙃b", 1, "UNICODE", 4);
-    assertStringLocate("b", "a🙃🙃b", 1, "UNICODE_CI", 4);
-    assertStringLocate("b", "a🙃🙃b", 2, "UTF8_BINARY", 4);
-    assertStringLocate("b", "a🙃🙃b", 2, "UTF8_LCASE", 4);
-    assertStringLocate("b", "a🙃🙃b", 2, "UNICODE", 4);
-    assertStringLocate("b", "a🙃🙃b", 2, "UNICODE_CI", 4);
-    assertStringLocate("b", "a🙃🙃b", 3, "UTF8_BINARY", 4);
-    assertStringLocate("b", "a🙃🙃b", 3, "UTF8_LCASE", 4);
-    assertStringLocate("b", "a🙃🙃b", 3, "UNICODE", 4);
-    assertStringLocate("b", "a🙃🙃b", 3, "UNICODE_CI", 4);
-    assertStringLocate("b", "a🙃🙃b", 4, "UTF8_BINARY", 4);
-    assertStringLocate("b", "a🙃🙃b", 4, "UTF8_LCASE", 4);
-    assertStringLocate("b", "a🙃🙃b", 4, "UNICODE", 4);
-    assertStringLocate("b", "a🙃🙃b", 4, "UNICODE_CI", 4);
-    assertStringLocate("b", "a🙃x🙃b", 1, "UTF8_BINARY", 5);
-    assertStringLocate("b", "a🙃x🙃b", 1, "UTF8_LCASE", 5);
-    assertStringLocate("b", "a🙃x🙃b", 1, "UNICODE", 5);
-    assertStringLocate("b", "a🙃x🙃b", 1, "UNICODE_CI", 5);
-    assertStringLocate("b", "a🙃x🙃b", 2, "UTF8_BINARY", 5);
-    assertStringLocate("b", "a🙃x🙃b", 2, "UTF8_LCASE", 5);
-    assertStringLocate("b", "a🙃x🙃b", 2, "UNICODE", 5);
-    assertStringLocate("b", "a🙃x🙃b", 2, "UNICODE_CI", 5);
-    assertStringLocate("b", "a🙃x🙃b", 3, "UTF8_BINARY", 5);
-    assertStringLocate("b", "a🙃x🙃b", 3, "UTF8_LCASE", 5);
-    assertStringLocate("b", "a🙃x🙃b", 3, "UNICODE", 5);
-    assertStringLocate("b", "a🙃x🙃b", 3, "UNICODE_CI", 5);
-    assertStringLocate("b", "a🙃x🙃b", 4, "UTF8_BINARY", 5);
-    assertStringLocate("b", "a🙃x🙃b", 4, "UTF8_LCASE", 5);
-    assertStringLocate("b", "a🙃x🙃b", 4, "UNICODE", 5);
-    assertStringLocate("b", "a🙃x🙃b", 4, "UNICODE_CI", 5);
+    assertStringLocate("a", "a🙃b", 1, UTF8_BINARY, 1);
+    assertStringLocate("a", "a🙃b", 1, UTF8_LCASE, 1);
+    assertStringLocate("a", "a🙃b", 1, UNICODE, 1);
+    assertStringLocate("a", "a🙃b", 1, UNICODE_CI, 1);
+    assertStringLocate("a", "a🙃b", 2, UTF8_BINARY, 0);
+    assertStringLocate("a", "a🙃b", 2, UTF8_LCASE, 0);
+    assertStringLocate("a", "a🙃b", 2, UNICODE, 0);
+    assertStringLocate("a", "a🙃b", 2, UNICODE_CI, 0);
+    assertStringLocate("a", "a🙃b", 3, UTF8_BINARY, 0);
+    assertStringLocate("a", "a🙃b", 3, UTF8_LCASE, 0);
+    assertStringLocate("a", "a🙃b", 3, UNICODE, 0);
+    assertStringLocate("a", "a🙃b", 3, UNICODE_CI, 0);
+    assertStringLocate("🙃", "a🙃b", 1, UTF8_BINARY, 2);
+    assertStringLocate("🙃", "a🙃b", 1, UTF8_LCASE, 2);
+    assertStringLocate("🙃", "a🙃b", 1, UNICODE, 2);
+    assertStringLocate("🙃", "a🙃b", 1, UNICODE_CI, 2);
+    assertStringLocate("🙃", "a🙃b", 2, UTF8_BINARY, 2);
+    assertStringLocate("🙃", "a🙃b", 2, UTF8_LCASE, 2);
+    assertStringLocate("🙃", "a🙃b", 2, UNICODE, 2);
+    assertStringLocate("🙃", "a🙃b", 2, UNICODE_CI, 2);
+    assertStringLocate("🙃", "a🙃b", 3, UTF8_BINARY, 0);
+    assertStringLocate("🙃", "a🙃b", 3, UTF8_LCASE, 0);
+    assertStringLocate("🙃", "a🙃b", 3, UNICODE, 0);
+    assertStringLocate("🙃", "a🙃b", 3, UNICODE_CI, 0);
+    assertStringLocate("b", "a🙃b", 1, UTF8_BINARY, 3);
+    assertStringLocate("b", "a🙃b", 1, UTF8_LCASE, 3);
+    assertStringLocate("b", "a🙃b", 1, UNICODE, 3);
+    assertStringLocate("b", "a🙃b", 1, UNICODE_CI, 3);
+    assertStringLocate("b", "a🙃b", 2, UTF8_BINARY, 3);
+    assertStringLocate("b", "a🙃b", 2, UTF8_LCASE, 3);
+    assertStringLocate("b", "a🙃b", 2, UNICODE, 3);
+    assertStringLocate("b", "a🙃b", 2, UNICODE_CI, 3);
+    assertStringLocate("b", "a🙃b", 3, UTF8_BINARY, 3);
+    assertStringLocate("b", "a🙃b", 3, UTF8_LCASE, 3);
+    assertStringLocate("b", "a🙃b", 3, UNICODE, 3);
+    assertStringLocate("b", "a🙃b", 3, UNICODE_CI, 3);
+    assertStringLocate("🙃", "a🙃🙃b", 1, UTF8_BINARY, 2);
+    assertStringLocate("🙃", "a🙃🙃b", 1, UTF8_LCASE, 2);
+    assertStringLocate("🙃", "a🙃🙃b", 1, UNICODE, 2);
+    assertStringLocate("🙃", "a🙃🙃b", 1, UNICODE_CI, 2);
+    assertStringLocate("🙃", "a🙃🙃b", 2, UTF8_BINARY, 2);
+    assertStringLocate("🙃", "a🙃🙃b", 2, UTF8_LCASE, 2);
+    assertStringLocate("🙃", "a🙃🙃b", 2, UNICODE, 2);
+    assertStringLocate("🙃", "a🙃🙃b", 2, UNICODE_CI, 2);
+    assertStringLocate("🙃", "a🙃🙃b", 3, UTF8_BINARY, 3);
+    assertStringLocate("🙃", "a🙃🙃b", 3, UTF8_LCASE, 3);
+    assertStringLocate("🙃", "a🙃🙃b", 3, UNICODE, 3);
+    assertStringLocate("🙃", "a🙃🙃b", 3, UNICODE_CI, 3);
+    assertStringLocate("🙃", "a🙃🙃b", 4, UTF8_BINARY, 0);
+    assertStringLocate("🙃", "a🙃🙃b", 4, UTF8_LCASE, 0);
+    assertStringLocate("🙃", "a🙃🙃b", 4, UNICODE, 0);
+    assertStringLocate("🙃", "a🙃🙃b", 4, UNICODE_CI, 0);
+    assertStringLocate("b", "a🙃🙃b", 1, UTF8_BINARY, 4);
+    assertStringLocate("b", "a🙃🙃b", 1, UTF8_LCASE, 4);
+    assertStringLocate("b", "a🙃🙃b", 1, UNICODE, 4);
+    assertStringLocate("b", "a🙃🙃b", 1, UNICODE_CI, 4);
+    assertStringLocate("b", "a🙃🙃b", 2, UTF8_BINARY, 4);
+    assertStringLocate("b", "a🙃🙃b", 2, UTF8_LCASE, 4);
+    assertStringLocate("b", "a🙃🙃b", 2, UNICODE, 4);
+    assertStringLocate("b", "a🙃🙃b", 2, UNICODE_CI, 4);
+    assertStringLocate("b", "a🙃🙃b", 3, UTF8_BINARY, 4);
+    assertStringLocate("b", "a🙃🙃b", 3, UTF8_LCASE, 4);
+    assertStringLocate("b", "a🙃🙃b", 3, UNICODE, 4);
+    assertStringLocate("b", "a🙃🙃b", 3, UNICODE_CI, 4);
+    assertStringLocate("b", "a🙃🙃b", 4, UTF8_BINARY, 4);
+    assertStringLocate("b", "a🙃🙃b", 4, UTF8_LCASE, 4);
+    assertStringLocate("b", "a🙃🙃b", 4, UNICODE, 4);
+    assertStringLocate("b", "a🙃🙃b", 4, UNICODE_CI, 4);
+    assertStringLocate("b", "a🙃x🙃b", 1, UTF8_BINARY, 5);
+    assertStringLocate("b", "a🙃x🙃b", 1, UTF8_LCASE, 5);
+    assertStringLocate("b", "a🙃x🙃b", 1, UNICODE, 5);
+    assertStringLocate("b", "a🙃x🙃b", 1, UNICODE_CI, 5);
+    assertStringLocate("b", "a🙃x🙃b", 2, UTF8_BINARY, 5);
+    assertStringLocate("b", "a🙃x🙃b", 2, UTF8_LCASE, 5);
+    assertStringLocate("b", "a🙃x🙃b", 2, UNICODE, 5);
+    assertStringLocate("b", "a🙃x🙃b", 2, UNICODE_CI, 5);
+    assertStringLocate("b", "a🙃x🙃b", 3, UTF8_BINARY, 5);
+    assertStringLocate("b", "a🙃x🙃b", 3, UTF8_LCASE, 5);
+    assertStringLocate("b", "a🙃x🙃b", 3, UNICODE, 5);
+    assertStringLocate("b", "a🙃x🙃b", 3, UNICODE_CI, 5);
+    assertStringLocate("b", "a🙃x🙃b", 4, UTF8_BINARY, 5);
+    assertStringLocate("b", "a🙃x🙃b", 4, UTF8_LCASE, 5);
+    assertStringLocate("b", "a🙃x🙃b", 4, UNICODE, 5);
+    assertStringLocate("b", "a🙃x🙃b", 4, UNICODE_CI, 5);
     // Out of bounds test cases.
-    assertStringLocate("a", "asd", 4, "UTF8_BINARY", 0);
-    assertStringLocate("a", "asd", 4, "UTF8_LCASE", 0);
-    assertStringLocate("a", "asd", 4, "UNICODE", 0);
-    assertStringLocate("a", "asd", 4, "UNICODE_CI", 0);
-    assertStringLocate("a", "asd", 100, "UTF8_BINARY", 0);
-    assertStringLocate("a", "asd", 100, "UTF8_LCASE", 0);
-    assertStringLocate("a", "asd", 100, "UNICODE", 0);
-    assertStringLocate("a", "asd", 100, "UNICODE_CI", 0);
-    assertStringLocate("a", "🙃🙃", 4, "UTF8_BINARY", 0);
-    assertStringLocate("a", "🙃🙃", 4, "UTF8_LCASE", 0);
-    assertStringLocate("a", "🙃🙃", 4, "UNICODE", 0);
-    assertStringLocate("a", "🙃🙃", 4, "UNICODE_CI", 0);
-    assertStringLocate("", "asd", 100, "UTF8_BINARY", 1);
-    assertStringLocate("", "asd", 100, "UTF8_LCASE", 1);
-    assertStringLocate("", "asd", 100, "UNICODE", 1);
-    assertStringLocate("", "asd", 100, "UNICODE_CI", 1);
-    assertStringLocate("asd", "", 100, "UTF8_BINARY", 0);
-    assertStringLocate("asd", "", 100, "UTF8_LCASE", 0);
-    assertStringLocate("asd", "", 100, "UNICODE", 0);
-    assertStringLocate("asd", "", 100, "UNICODE_CI", 0);
+    assertStringLocate("a", "asd", 4, UTF8_BINARY, 0);
+    assertStringLocate("a", "asd", 4, UTF8_LCASE, 0);
+    assertStringLocate("a", "asd", 4, UNICODE, 0);
+    assertStringLocate("a", "asd", 4, UNICODE_CI, 0);
+    assertStringLocate("a", "asd", 100, UTF8_BINARY, 0);
+    assertStringLocate("a", "asd", 100, UTF8_LCASE, 0);
+    assertStringLocate("a", "asd", 100, UNICODE, 0);
+    assertStringLocate("a", "asd", 100, UNICODE_CI, 0);
+    assertStringLocate("a", "🙃🙃", 4, UTF8_BINARY, 0);
+    assertStringLocate("a", "🙃🙃", 4, UTF8_LCASE, 0);
+    assertStringLocate("a", "🙃🙃", 4, UNICODE, 0);
+    assertStringLocate("a", "🙃🙃", 4, UNICODE_CI, 0);
+    assertStringLocate("", "asd", 100, UTF8_BINARY, 1);
+    assertStringLocate("", "asd", 100, UTF8_LCASE, 1);
+    assertStringLocate("", "asd", 100, UNICODE, 1);
+    assertStringLocate("", "asd", 100, UNICODE_CI, 1);
+    assertStringLocate("asd", "", 100, UTF8_BINARY, 0);
+    assertStringLocate("asd", "", 100, UTF8_LCASE, 0);
+    assertStringLocate("asd", "", 100, UNICODE, 0);
+    assertStringLocate("asd", "", 100, UNICODE_CI, 0);
   }
 
   /**
@@ -2450,292 +2451,292 @@ private void assertSubstringIndex(String string, String delimiter, int count,
   @Test
   public void testSubstringIndex() throws SparkException {
     // Empty strings.
-    assertSubstringIndex("", "", 0, "UTF8_BINARY", "");
-    assertSubstringIndex("", "", 0, "UTF8_LCASE", "");
-    assertSubstringIndex("", "", 0, "UNICODE", "");
-    assertSubstringIndex("", "", 0, "UNICODE_CI", "");
-    assertSubstringIndex("", "", 1, "UTF8_BINARY", "");
-    assertSubstringIndex("", "", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("", "", 1, "UNICODE", "");
-    assertSubstringIndex("", "", 1, "UNICODE_CI", "");
-    assertSubstringIndex("", "", -1, "UTF8_BINARY", "");
-    assertSubstringIndex("", "", -1, "UTF8_LCASE", "");
-    assertSubstringIndex("", "", -1, "UNICODE", "");
-    assertSubstringIndex("", "", -1, "UNICODE_CI", "");
-    assertSubstringIndex("", "x", 0, "UTF8_BINARY", "");
-    assertSubstringIndex("", "x", 0, "UTF8_LCASE", "");
-    assertSubstringIndex("", "x", 0, "UNICODE", "");
-    assertSubstringIndex("", "x", 0, "UNICODE_CI", "");
-    assertSubstringIndex("", "x", 1, "UTF8_BINARY", "");
-    assertSubstringIndex("", "x", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("", "x", 1, "UNICODE", "");
-    assertSubstringIndex("", "x", 1, "UNICODE_CI", "");
-    assertSubstringIndex("", "x", -1, "UTF8_BINARY", "");
-    assertSubstringIndex("", "x", -1, "UTF8_LCASE", "");
-    assertSubstringIndex("", "x", -1, "UNICODE", "");
-    assertSubstringIndex("", "x", -1, "UNICODE_CI", "");
-    assertSubstringIndex("abc", "", 0, "UTF8_BINARY", "");
-    assertSubstringIndex("abc", "", 0, "UTF8_LCASE", "");
-    assertSubstringIndex("abc", "", 0, "UNICODE", "");
-    assertSubstringIndex("abc", "", 0, "UNICODE_CI", "");
-    assertSubstringIndex("abc", "", 1, "UTF8_BINARY", "");
-    assertSubstringIndex("abc", "", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("abc", "", 1, "UNICODE", "");
-    assertSubstringIndex("abc", "", 1, "UNICODE_CI", "");
-    assertSubstringIndex("abc", "", -1, "UTF8_BINARY", "");
-    assertSubstringIndex("abc", "", -1, "UTF8_LCASE", "");
-    assertSubstringIndex("abc", "", -1, "UNICODE", "");
-    assertSubstringIndex("abc", "", -1, "UNICODE_CI", "");
+    assertSubstringIndex("", "", 0, UTF8_BINARY, "");
+    assertSubstringIndex("", "", 0, UTF8_LCASE, "");
+    assertSubstringIndex("", "", 0, UNICODE, "");
+    assertSubstringIndex("", "", 0, UNICODE_CI, "");
+    assertSubstringIndex("", "", 1, UTF8_BINARY, "");
+    assertSubstringIndex("", "", 1, UTF8_LCASE, "");
+    assertSubstringIndex("", "", 1, UNICODE, "");
+    assertSubstringIndex("", "", 1, UNICODE_CI, "");
+    assertSubstringIndex("", "", -1, UTF8_BINARY, "");
+    assertSubstringIndex("", "", -1, UTF8_LCASE, "");
+    assertSubstringIndex("", "", -1, UNICODE, "");
+    assertSubstringIndex("", "", -1, UNICODE_CI, "");
+    assertSubstringIndex("", "x", 0, UTF8_BINARY, "");
+    assertSubstringIndex("", "x", 0, UTF8_LCASE, "");
+    assertSubstringIndex("", "x", 0, UNICODE, "");
+    assertSubstringIndex("", "x", 0, UNICODE_CI, "");
+    assertSubstringIndex("", "x", 1, UTF8_BINARY, "");
+    assertSubstringIndex("", "x", 1, UTF8_LCASE, "");
+    assertSubstringIndex("", "x", 1, UNICODE, "");
+    assertSubstringIndex("", "x", 1, UNICODE_CI, "");
+    assertSubstringIndex("", "x", -1, UTF8_BINARY, "");
+    assertSubstringIndex("", "x", -1, UTF8_LCASE, "");
+    assertSubstringIndex("", "x", -1, UNICODE, "");
+    assertSubstringIndex("", "x", -1, UNICODE_CI, "");
+    assertSubstringIndex("abc", "", 0, UTF8_BINARY, "");
+    assertSubstringIndex("abc", "", 0, UTF8_LCASE, "");
+    assertSubstringIndex("abc", "", 0, UNICODE, "");
+    assertSubstringIndex("abc", "", 0, UNICODE_CI, "");
+    assertSubstringIndex("abc", "", 1, UTF8_BINARY, "");
+    assertSubstringIndex("abc", "", 1, UTF8_LCASE, "");
+    assertSubstringIndex("abc", "", 1, UNICODE, "");
+    assertSubstringIndex("abc", "", 1, UNICODE_CI, "");
+    assertSubstringIndex("abc", "", -1, UTF8_BINARY, "");
+    assertSubstringIndex("abc", "", -1, UTF8_LCASE, "");
+    assertSubstringIndex("abc", "", -1, UNICODE, "");
+    assertSubstringIndex("abc", "", -1, UNICODE_CI, "");
     // Basic tests.
-    assertSubstringIndex("axbxc", "a", 1, "UTF8_BINARY", "");
-    assertSubstringIndex("axbxc", "a", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("axbxc", "a", 1, "UNICODE", "");
-    assertSubstringIndex("axbxc", "a", 1, "UNICODE_CI", "");
-    assertSubstringIndex("axbxc", "x", 1, "UTF8_BINARY", "a");
-    assertSubstringIndex("axbxc", "x", 1, "UTF8_LCASE", "a");
-    assertSubstringIndex("axbxc", "x", 1, "UNICODE", "a");
-    assertSubstringIndex("axbxc", "x", 1, "UNICODE_CI", "a");
-    assertSubstringIndex("axbxc", "b", 1, "UTF8_BINARY", "ax");
-    assertSubstringIndex("axbxc", "b", 1, "UTF8_LCASE", "ax");
-    assertSubstringIndex("axbxc", "b", 1, "UNICODE", "ax");
-    assertSubstringIndex("axbxc", "b", 1, "UNICODE_CI", "ax");
-    assertSubstringIndex("axbxc", "x", 2, "UTF8_BINARY", "axb");
-    assertSubstringIndex("axbxc", "x", 2, "UTF8_LCASE", "axb");
-    assertSubstringIndex("axbxc", "x", 2, "UNICODE", "axb");
-    assertSubstringIndex("axbxc", "x", 2, "UNICODE_CI", "axb");
-    assertSubstringIndex("axbxc", "c", 1, "UTF8_BINARY", "axbx");
-    assertSubstringIndex("axbxc", "c", 1, "UTF8_LCASE", "axbx");
-    assertSubstringIndex("axbxc", "c", 1, "UNICODE", "axbx");
-    assertSubstringIndex("axbxc", "c", 1, "UNICODE_CI", "axbx");
-    assertSubstringIndex("axbxc", "x", 3, "UTF8_BINARY", "axbxc");
-    assertSubstringIndex("axbxc", "x", 3, "UTF8_LCASE", "axbxc");
-    assertSubstringIndex("axbxc", "x", 3, "UNICODE", "axbxc");
-    assertSubstringIndex("axbxc", "x", 3, "UNICODE_CI", "axbxc");
-    assertSubstringIndex("axbxc", "d", 1, "UTF8_BINARY", "axbxc");
-    assertSubstringIndex("axbxc", "d", 1, "UTF8_LCASE", "axbxc");
-    assertSubstringIndex("axbxc", "d", 1, "UNICODE", "axbxc");
-    assertSubstringIndex("axbxc", "d", 1, "UNICODE_CI", "axbxc");
-    assertSubstringIndex("axbxc", "c", -1, "UTF8_BINARY", "");
-    assertSubstringIndex("axbxc", "c", -1, "UTF8_LCASE", "");
-    assertSubstringIndex("axbxc", "c", -1, "UNICODE", "");
-    assertSubstringIndex("axbxc", "c", -1, "UNICODE_CI", "");
-    assertSubstringIndex("axbxc", "x", -1, "UTF8_BINARY", "c");
-    assertSubstringIndex("axbxc", "x", -1, "UTF8_LCASE", "c");
-    assertSubstringIndex("axbxc", "x", -1, "UNICODE", "c");
-    assertSubstringIndex("axbxc", "x", -1, "UNICODE_CI", "c");
-    assertSubstringIndex("axbxc", "b", -1, "UTF8_BINARY", "xc");
-    assertSubstringIndex("axbxc", "b", -1, "UTF8_LCASE", "xc");
-    assertSubstringIndex("axbxc", "b", -1, "UNICODE", "xc");
-    assertSubstringIndex("axbxc", "b", -1, "UNICODE_CI", "xc");
-    assertSubstringIndex("axbxc", "x", -2, "UTF8_BINARY", "bxc");
-    assertSubstringIndex("axbxc", "x", -2, "UTF8_LCASE", "bxc");
-    assertSubstringIndex("axbxc", "x", -2, "UNICODE", "bxc");
-    assertSubstringIndex("axbxc", "x", -2, "UNICODE_CI", "bxc");
-    assertSubstringIndex("axbxc", "a", -1, "UTF8_BINARY", "xbxc");
-    assertSubstringIndex("axbxc", "a", -1, "UTF8_LCASE", "xbxc");
-    assertSubstringIndex("axbxc", "a", -1, "UNICODE", "xbxc");
-    assertSubstringIndex("axbxc", "a", -1, "UNICODE_CI", "xbxc");
-    assertSubstringIndex("axbxc", "x", -3, "UTF8_BINARY", "axbxc");
-    assertSubstringIndex("axbxc", "x", -3, "UTF8_LCASE", "axbxc");
-    assertSubstringIndex("axbxc", "x", -3, "UNICODE", "axbxc");
-    assertSubstringIndex("axbxc", "x", -3, "UNICODE_CI", "axbxc");
-    assertSubstringIndex("axbxc", "d", -1, "UTF8_BINARY", "axbxc");
-    assertSubstringIndex("axbxc", "d", -1, "UTF8_LCASE", "axbxc");
-    assertSubstringIndex("axbxc", "d", -1, "UNICODE", "axbxc");
-    assertSubstringIndex("axbxc", "d", -1, "UNICODE_CI", "axbxc");
+    assertSubstringIndex("axbxc", "a", 1, UTF8_BINARY, "");
+    assertSubstringIndex("axbxc", "a", 1, UTF8_LCASE, "");
+    assertSubstringIndex("axbxc", "a", 1, UNICODE, "");
+    assertSubstringIndex("axbxc", "a", 1, UNICODE_CI, "");
+    assertSubstringIndex("axbxc", "x", 1, UTF8_BINARY, "a");
+    assertSubstringIndex("axbxc", "x", 1, UTF8_LCASE, "a");
+    assertSubstringIndex("axbxc", "x", 1, UNICODE, "a");
+    assertSubstringIndex("axbxc", "x", 1, UNICODE_CI, "a");
+    assertSubstringIndex("axbxc", "b", 1, UTF8_BINARY, "ax");
+    assertSubstringIndex("axbxc", "b", 1, UTF8_LCASE, "ax");
+    assertSubstringIndex("axbxc", "b", 1, UNICODE, "ax");
+    assertSubstringIndex("axbxc", "b", 1, UNICODE_CI, "ax");
+    assertSubstringIndex("axbxc", "x", 2, UTF8_BINARY, "axb");
+    assertSubstringIndex("axbxc", "x", 2, UTF8_LCASE, "axb");
+    assertSubstringIndex("axbxc", "x", 2, UNICODE, "axb");
+    assertSubstringIndex("axbxc", "x", 2, UNICODE_CI, "axb");
+    assertSubstringIndex("axbxc", "c", 1, UTF8_BINARY, "axbx");
+    assertSubstringIndex("axbxc", "c", 1, UTF8_LCASE, "axbx");
+    assertSubstringIndex("axbxc", "c", 1, UNICODE, "axbx");
+    assertSubstringIndex("axbxc", "c", 1, UNICODE_CI, "axbx");
+    assertSubstringIndex("axbxc", "x", 3, UTF8_BINARY, "axbxc");
+    assertSubstringIndex("axbxc", "x", 3, UTF8_LCASE, "axbxc");
+    assertSubstringIndex("axbxc", "x", 3, UNICODE, "axbxc");
+    assertSubstringIndex("axbxc", "x", 3, UNICODE_CI, "axbxc");
+    assertSubstringIndex("axbxc", "d", 1, UTF8_BINARY, "axbxc");
+    assertSubstringIndex("axbxc", "d", 1, UTF8_LCASE, "axbxc");
+    assertSubstringIndex("axbxc", "d", 1, UNICODE, "axbxc");
+    assertSubstringIndex("axbxc", "d", 1, UNICODE_CI, "axbxc");
+    assertSubstringIndex("axbxc", "c", -1, UTF8_BINARY, "");
+    assertSubstringIndex("axbxc", "c", -1, UTF8_LCASE, "");
+    assertSubstringIndex("axbxc", "c", -1, UNICODE, "");
+    assertSubstringIndex("axbxc", "c", -1, UNICODE_CI, "");
+    assertSubstringIndex("axbxc", "x", -1, UTF8_BINARY, "c");
+    assertSubstringIndex("axbxc", "x", -1, UTF8_LCASE, "c");
+    assertSubstringIndex("axbxc", "x", -1, UNICODE, "c");
+    assertSubstringIndex("axbxc", "x", -1, UNICODE_CI, "c");
+    assertSubstringIndex("axbxc", "b", -1, UTF8_BINARY, "xc");
+    assertSubstringIndex("axbxc", "b", -1, UTF8_LCASE, "xc");
+    assertSubstringIndex("axbxc", "b", -1, UNICODE, "xc");
+    assertSubstringIndex("axbxc", "b", -1, UNICODE_CI, "xc");
+    assertSubstringIndex("axbxc", "x", -2, UTF8_BINARY, "bxc");
+    assertSubstringIndex("axbxc", "x", -2, UTF8_LCASE, "bxc");
+    assertSubstringIndex("axbxc", "x", -2, UNICODE, "bxc");
+    assertSubstringIndex("axbxc", "x", -2, UNICODE_CI, "bxc");
+    assertSubstringIndex("axbxc", "a", -1, UTF8_BINARY, "xbxc");
+    assertSubstringIndex("axbxc", "a", -1, UTF8_LCASE, "xbxc");
+    assertSubstringIndex("axbxc", "a", -1, UNICODE, "xbxc");
+    assertSubstringIndex("axbxc", "a", -1, UNICODE_CI, "xbxc");
+    assertSubstringIndex("axbxc", "x", -3, UTF8_BINARY, "axbxc");
+    assertSubstringIndex("axbxc", "x", -3, UTF8_LCASE, "axbxc");
+    assertSubstringIndex("axbxc", "x", -3, UNICODE, "axbxc");
+    assertSubstringIndex("axbxc", "x", -3, UNICODE_CI, "axbxc");
+    assertSubstringIndex("axbxc", "d", -1, UTF8_BINARY, "axbxc");
+    assertSubstringIndex("axbxc", "d", -1, UTF8_LCASE, "axbxc");
+    assertSubstringIndex("axbxc", "d", -1, UNICODE, "axbxc");
+    assertSubstringIndex("axbxc", "d", -1, UNICODE_CI, "axbxc");
     // Advanced tests.
-    assertSubstringIndex("wwwgapachegorg", "g", -3, "UTF8_BINARY", "apachegorg");
-    assertSubstringIndex("www||apache||org", "||", 2, "UTF8_BINARY", "www||apache");
-    assertSubstringIndex("aaaaaaaaaa", "aa", 2, "UTF8_BINARY", "a");
-    assertSubstringIndex("AaAaAaAaAa", "aa", 2, "UTF8_LCASE", "A");
-    assertSubstringIndex("www.apache.org", ".", 3, "UTF8_LCASE", "www.apache.org");
-    assertSubstringIndex("wwwXapacheXorg", "x", 2, "UTF8_LCASE", "wwwXapache");
-    assertSubstringIndex("wwwxapachexorg", "X", 1, "UTF8_LCASE", "www");
-    assertSubstringIndex("www.apache.org", ".", 0, "UTF8_LCASE", "");
-    assertSubstringIndex("www.apache.ORG", ".", -3, "UTF8_LCASE", "www.apache.ORG");
-    assertSubstringIndex("wwwGapacheGorg", "g", 1, "UTF8_LCASE", "www");
-    assertSubstringIndex("wwwGapacheGorg", "g", 3, "UTF8_LCASE", "wwwGapacheGor");
-    assertSubstringIndex("gwwwGapacheGorg", "g", 3, "UTF8_LCASE", "gwwwGapache");
-    assertSubstringIndex("wwwGapacheGorg", "g", -3, "UTF8_LCASE", "apacheGorg");
-    assertSubstringIndex("wwwmapacheMorg", "M", -2, "UTF8_LCASE", "apacheMorg");
-    assertSubstringIndex("www.apache.org", ".", -1, "UTF8_LCASE", "org");
-    assertSubstringIndex("www.apache.org.", ".", -1, "UTF8_LCASE", "");
-    assertSubstringIndex("", ".", -2, "UTF8_LCASE", "");
-    assertSubstringIndex("test大千世界X大千世界", "x", -1, "UTF8_LCASE", "大千世界");
-    assertSubstringIndex("test大千世界X大千世界", "X", 1, "UTF8_LCASE", "test大千世界");
-    assertSubstringIndex("test大千世界大千世界", "千", 2, "UTF8_LCASE", "test大千世界大");
-    assertSubstringIndex("www||APACHE||org", "||", 2, "UTF8_LCASE", "www||APACHE");
-    assertSubstringIndex("www||APACHE||org", "||", -1, "UTF8_LCASE", "org");
-    assertSubstringIndex("AaAaAaAaAa", "Aa", 2, "UNICODE", "Aa");
-    assertSubstringIndex("wwwYapacheyorg", "y", 3, "UNICODE", "wwwYapacheyorg");
-    assertSubstringIndex("www.apache.org", ".", 2, "UNICODE", "www.apache");
-    assertSubstringIndex("wwwYapacheYorg", "Y", 1, "UNICODE", "www");
-    assertSubstringIndex("wwwYapacheYorg", "y", 1, "UNICODE", "wwwYapacheYorg");
-    assertSubstringIndex("wwwGapacheGorg", "g", 1, "UNICODE", "wwwGapacheGor");
-    assertSubstringIndex("GwwwGapacheGorG", "G", 3, "UNICODE", "GwwwGapache");
-    assertSubstringIndex("wwwGapacheGorG", "G", -3, "UNICODE", "apacheGorG");
-    assertSubstringIndex("www.apache.org", ".", 0, "UNICODE", "");
-    assertSubstringIndex("www.apache.org", ".", -3, "UNICODE", "www.apache.org");
-    assertSubstringIndex("www.apache.org", ".", -2, "UNICODE", "apache.org");
-    assertSubstringIndex("www.apache.org", ".", -1, "UNICODE", "org");
-    assertSubstringIndex("", ".", -2, "UNICODE", "");
-    assertSubstringIndex("test大千世界X大千世界", "X", -1, "UNICODE", "大千世界");
-    assertSubstringIndex("test大千世界X大千世界", "X", 1, "UNICODE", "test大千世界");
-    assertSubstringIndex("大x千世界大千世x界", "x", 1, "UNICODE", "大");
-    assertSubstringIndex("大x千世界大千世x界", "x", -1, "UNICODE", "界");
-    assertSubstringIndex("大x千世界大千世x界", "x", -2, "UNICODE", "千世界大千世x界");
-    assertSubstringIndex("大千世界大千世界", "千", 2, "UNICODE", "大千世界大");
-    assertSubstringIndex("www||apache||org", "||", 2, "UNICODE", "www||apache");
-    assertSubstringIndex("AaAaAaAaAa", "aa", 2, "UNICODE_CI", "A");
-    assertSubstringIndex("www.apache.org", ".", 3, "UNICODE_CI", "www.apache.org");
-    assertSubstringIndex("wwwXapacheXorg", "x", 2, "UNICODE_CI", "wwwXapache");
-    assertSubstringIndex("wwwxapacheXorg", "X", 1, "UNICODE_CI", "www");
-    assertSubstringIndex("www.apache.org", ".", 0, "UNICODE_CI", "");
-    assertSubstringIndex("wwwGapacheGorg", "G", 3, "UNICODE_CI", "wwwGapacheGor");
-    assertSubstringIndex("gwwwGapacheGorg", "g", 3, "UNICODE_CI", "gwwwGapache");
-    assertSubstringIndex("gwwwGapacheGorg", "g", -3, "UNICODE_CI", "apacheGorg");
-    assertSubstringIndex("www.apache.ORG", ".", -3, "UNICODE_CI", "www.apache.ORG");
-    assertSubstringIndex("wwwmapacheMorg", "M", -2, "UNICODE_CI", "apacheMorg");
-    assertSubstringIndex("www.apache.org", ".", -1, "UNICODE_CI", "org");
-    assertSubstringIndex("", ".", -2, "UNICODE_CI", "");
-    assertSubstringIndex("test大千世界X大千世界", "X", -1, "UNICODE_CI", "大千世界");
-    assertSubstringIndex("test大千世界X大千世界", "X", 1, "UNICODE_CI", "test大千世界");
-    assertSubstringIndex("test大千世界大千世界", "千", 2, "UNICODE_CI", "test大千世界大");
-    assertSubstringIndex("www||APACHE||org", "||", 2, "UNICODE_CI", "www||APACHE");
+    assertSubstringIndex("wwwgapachegorg", "g", -3, UTF8_BINARY, "apachegorg");
+    assertSubstringIndex("www||apache||org", "||", 2, UTF8_BINARY, "www||apache");
+    assertSubstringIndex("aaaaaaaaaa", "aa", 2, UTF8_BINARY, "a");
+    assertSubstringIndex("AaAaAaAaAa", "aa", 2, UTF8_LCASE, "A");
+    assertSubstringIndex("www.apache.org", ".", 3, UTF8_LCASE, "www.apache.org");
+    assertSubstringIndex("wwwXapacheXorg", "x", 2, UTF8_LCASE, "wwwXapache");
+    assertSubstringIndex("wwwxapachexorg", "X", 1, UTF8_LCASE, "www");
+    assertSubstringIndex("www.apache.org", ".", 0, UTF8_LCASE, "");
+    assertSubstringIndex("www.apache.ORG", ".", -3, UTF8_LCASE, "www.apache.ORG");
+    assertSubstringIndex("wwwGapacheGorg", "g", 1, UTF8_LCASE, "www");
+    assertSubstringIndex("wwwGapacheGorg", "g", 3, UTF8_LCASE, "wwwGapacheGor");
+    assertSubstringIndex("gwwwGapacheGorg", "g", 3, UTF8_LCASE, "gwwwGapache");
+    assertSubstringIndex("wwwGapacheGorg", "g", -3, UTF8_LCASE, "apacheGorg");
+    assertSubstringIndex("wwwmapacheMorg", "M", -2, UTF8_LCASE, "apacheMorg");
+    assertSubstringIndex("www.apache.org", ".", -1, UTF8_LCASE, "org");
+    assertSubstringIndex("www.apache.org.", ".", -1, UTF8_LCASE, "");
+    assertSubstringIndex("", ".", -2, UTF8_LCASE, "");
+    assertSubstringIndex("test大千世界X大千世界", "x", -1, UTF8_LCASE, "大千世界");
+    assertSubstringIndex("test大千世界X大千世界", "X", 1, UTF8_LCASE, "test大千世界");
+    assertSubstringIndex("test大千世界大千世界", "千", 2, UTF8_LCASE, "test大千世界大");
+    assertSubstringIndex("www||APACHE||org", "||", 2, UTF8_LCASE, "www||APACHE");
+    assertSubstringIndex("www||APACHE||org", "||", -1, UTF8_LCASE, "org");
+    assertSubstringIndex("AaAaAaAaAa", "Aa", 2, UNICODE, "Aa");
+    assertSubstringIndex("wwwYapacheyorg", "y", 3, UNICODE, "wwwYapacheyorg");
+    assertSubstringIndex("www.apache.org", ".", 2, UNICODE, "www.apache");
+    assertSubstringIndex("wwwYapacheYorg", "Y", 1, UNICODE, "www");
+    assertSubstringIndex("wwwYapacheYorg", "y", 1, UNICODE, "wwwYapacheYorg");
+    assertSubstringIndex("wwwGapacheGorg", "g", 1, UNICODE, "wwwGapacheGor");
+    assertSubstringIndex("GwwwGapacheGorG", "G", 3, UNICODE, "GwwwGapache");
+    assertSubstringIndex("wwwGapacheGorG", "G", -3, UNICODE, "apacheGorG");
+    assertSubstringIndex("www.apache.org", ".", 0, UNICODE, "");
+    assertSubstringIndex("www.apache.org", ".", -3, UNICODE, "www.apache.org");
+    assertSubstringIndex("www.apache.org", ".", -2, UNICODE, "apache.org");
+    assertSubstringIndex("www.apache.org", ".", -1, UNICODE, "org");
+    assertSubstringIndex("", ".", -2, UNICODE, "");
+    assertSubstringIndex("test大千世界X大千世界", "X", -1, UNICODE, "大千世界");
+    assertSubstringIndex("test大千世界X大千世界", "X", 1, UNICODE, "test大千世界");
+    assertSubstringIndex("大x千世界大千世x界", "x", 1, UNICODE, "大");
+    assertSubstringIndex("大x千世界大千世x界", "x", -1, UNICODE, "界");
+    assertSubstringIndex("大x千世界大千世x界", "x", -2, UNICODE, "千世界大千世x界");
+    assertSubstringIndex("大千世界大千世界", "千", 2, UNICODE, "大千世界大");
+    assertSubstringIndex("www||apache||org", "||", 2, UNICODE, "www||apache");
+    assertSubstringIndex("AaAaAaAaAa", "aa", 2, UNICODE_CI, "A");
+    assertSubstringIndex("www.apache.org", ".", 3, UNICODE_CI, "www.apache.org");
+    assertSubstringIndex("wwwXapacheXorg", "x", 2, UNICODE_CI, "wwwXapache");
+    assertSubstringIndex("wwwxapacheXorg", "X", 1, UNICODE_CI, "www");
+    assertSubstringIndex("www.apache.org", ".", 0, UNICODE_CI, "");
+    assertSubstringIndex("wwwGapacheGorg", "G", 3, UNICODE_CI, "wwwGapacheGor");
+    assertSubstringIndex("gwwwGapacheGorg", "g", 3, UNICODE_CI, "gwwwGapache");
+    assertSubstringIndex("gwwwGapacheGorg", "g", -3, UNICODE_CI, "apacheGorg");
+    assertSubstringIndex("www.apache.ORG", ".", -3, UNICODE_CI, "www.apache.ORG");
+    assertSubstringIndex("wwwmapacheMorg", "M", -2, UNICODE_CI, "apacheMorg");
+    assertSubstringIndex("www.apache.org", ".", -1, UNICODE_CI, "org");
+    assertSubstringIndex("", ".", -2, UNICODE_CI, "");
+    assertSubstringIndex("test大千世界X大千世界", "X", -1, UNICODE_CI, "大千世界");
+    assertSubstringIndex("test大千世界X大千世界", "X", 1, UNICODE_CI, "test大千世界");
+    assertSubstringIndex("test大千世界大千世界", "千", 2, UNICODE_CI, "test大千世界大");
+    assertSubstringIndex("www||APACHE||org", "||", 2, UNICODE_CI, "www||APACHE");
     assertSubstringIndex("wwwèapacheËorg", "Ê", -3, "AF_CI_AI", "apacheËorg");
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertSubstringIndex("abİo12", "i\u0307o", 1, "UNICODE_CI", "ab");
-    assertSubstringIndex("abİo12", "i\u0307o", -1, "UNICODE_CI", "12");
-    assertSubstringIndex("abi̇o12", "İo", 1, "UNICODE_CI", "ab");
-    assertSubstringIndex("abi̇o12", "İo", -1, "UNICODE_CI", "12");
-    assertSubstringIndex("ai̇bi̇o12", "İo", 1, "UNICODE_CI", "ai̇b");
-    assertSubstringIndex("ai̇bi̇o12i̇o", "İo", 2, "UNICODE_CI", "ai̇bi̇o12");
-    assertSubstringIndex("ai̇bi̇o12i̇o", "İo", -1, "UNICODE_CI", "");
-    assertSubstringIndex("ai̇bi̇o12i̇o", "İo", -2, "UNICODE_CI", "12i̇o");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, "UNICODE_CI", "İo12İoi̇o");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, "UNICODE_CI", "İo12İoi̇o");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, "UNICODE_CI", "i̇o12i̇oİo");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, "UNICODE_CI", "i̇o12i̇oİo");
-    assertSubstringIndex("abi̇12", "i", 1, "UNICODE_CI", "abi̇12");
-    assertSubstringIndex("abi̇12", "\u0307", 1, "UNICODE_CI", "abi̇12");
-    assertSubstringIndex("abi̇12", "İ", 1, "UNICODE_CI", "ab");
-    assertSubstringIndex("abİ12", "i", 1, "UNICODE_CI", "abİ12");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, "UNICODE_CI", "İo12İoi̇o");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, "UNICODE_CI", "İo12İoi̇o");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, "UNICODE_CI", "i̇o12i̇oİo");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, "UNICODE_CI", "i̇o12i̇oİo");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", 3, "UNICODE_CI", "ai̇bi̇oİo12");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", 3, "UNICODE_CI", "ai̇bi̇oİo12");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", 3, "UNICODE_CI", "ai̇bİoi̇o12");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", 3, "UNICODE_CI", "ai̇bİoi̇o12");
-    assertSubstringIndex("abi̇12", "i", 1, "UTF8_LCASE", "ab"); // != UNICODE_CI
-    assertSubstringIndex("abi̇12", "\u0307", 1, "UTF8_LCASE", "abi"); // != UNICODE_CI
-    assertSubstringIndex("abi̇12", "İ", 1, "UTF8_LCASE", "ab");
-    assertSubstringIndex("abİ12", "i", 1, "UTF8_LCASE", "abİ12");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, "UTF8_LCASE", "İo12İoi̇o");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, "UTF8_LCASE", "İo12İoi̇o");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, "UTF8_LCASE", "i̇o12i̇oİo");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, "UTF8_LCASE", "i̇o12i̇oİo");
-    assertSubstringIndex("bİoi̇o12i̇o", "\u0307oi", 1, "UTF8_LCASE", "bİoi̇o12i̇o");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", 3, "UTF8_LCASE", "ai̇bi̇oİo12");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", 3, "UTF8_LCASE", "ai̇bi̇oİo12");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", 3, "UTF8_LCASE", "ai̇bİoi̇o12");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", 3, "UTF8_LCASE", "ai̇bİoi̇o12");
-    assertSubstringIndex("bİoi̇o12i̇o", "\u0307oi", 1, "UTF8_LCASE", "bİoi̇o12i̇o");
+    assertSubstringIndex("abİo12", "i\u0307o", 1, UNICODE_CI, "ab");
+    assertSubstringIndex("abİo12", "i\u0307o", -1, UNICODE_CI, "12");
+    assertSubstringIndex("abi̇o12", "İo", 1, UNICODE_CI, "ab");
+    assertSubstringIndex("abi̇o12", "İo", -1, UNICODE_CI, "12");
+    assertSubstringIndex("ai̇bi̇o12", "İo", 1, UNICODE_CI, "ai̇b");
+    assertSubstringIndex("ai̇bi̇o12i̇o", "İo", 2, UNICODE_CI, "ai̇bi̇o12");
+    assertSubstringIndex("ai̇bi̇o12i̇o", "İo", -1, UNICODE_CI, "");
+    assertSubstringIndex("ai̇bi̇o12i̇o", "İo", -2, UNICODE_CI, "12i̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, UNICODE_CI, "İo12İoi̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, UNICODE_CI, "İo12İoi̇o");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, UNICODE_CI, "i̇o12i̇oİo");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, UNICODE_CI, "i̇o12i̇oİo");
+    assertSubstringIndex("abi̇12", "i", 1, UNICODE_CI, "abi̇12");
+    assertSubstringIndex("abi̇12", "\u0307", 1, UNICODE_CI, "abi̇12");
+    assertSubstringIndex("abi̇12", "İ", 1, UNICODE_CI, "ab");
+    assertSubstringIndex("abİ12", "i", 1, UNICODE_CI, "abİ12");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, UNICODE_CI, "İo12İoi̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, UNICODE_CI, "İo12İoi̇o");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, UNICODE_CI, "i̇o12i̇oİo");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, UNICODE_CI, "i̇o12i̇oİo");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", 3, UNICODE_CI, "ai̇bi̇oİo12");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", 3, UNICODE_CI, "ai̇bi̇oİo12");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", 3, UNICODE_CI, "ai̇bİoi̇o12");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", 3, UNICODE_CI, "ai̇bİoi̇o12");
+    assertSubstringIndex("abi̇12", "i", 1, UTF8_LCASE, "ab"); // != UNICODE_CI
+    assertSubstringIndex("abi̇12", "\u0307", 1, UTF8_LCASE, "abi"); // != UNICODE_CI
+    assertSubstringIndex("abi̇12", "İ", 1, UTF8_LCASE, "ab");
+    assertSubstringIndex("abİ12", "i", 1, UTF8_LCASE, "abİ12");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, UTF8_LCASE, "İo12İoi̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, UTF8_LCASE, "İo12İoi̇o");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, UTF8_LCASE, "i̇o12i̇oİo");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, UTF8_LCASE, "i̇o12i̇oİo");
+    assertSubstringIndex("bİoi̇o12i̇o", "\u0307oi", 1, UTF8_LCASE, "bİoi̇o12i̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", 3, UTF8_LCASE, "ai̇bi̇oİo12");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", 3, UTF8_LCASE, "ai̇bi̇oİo12");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", 3, UTF8_LCASE, "ai̇bİoi̇o12");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", 3, UTF8_LCASE, "ai̇bİoi̇o12");
+    assertSubstringIndex("bİoi̇o12i̇o", "\u0307oi", 1, UTF8_LCASE, "bİoi̇o12i̇o");
     // Conditional case mapping (e.g. Greek sigmas).
-    assertSubstringIndex("σ", "σ", 1, "UTF8_BINARY", "");
-    assertSubstringIndex("σ", "ς", 1, "UTF8_BINARY", "σ");
-    assertSubstringIndex("σ", "Σ", 1, "UTF8_BINARY", "σ");
-    assertSubstringIndex("ς", "σ", 1, "UTF8_BINARY", "ς");
-    assertSubstringIndex("ς", "ς", 1, "UTF8_BINARY", "");
-    assertSubstringIndex("ς", "Σ", 1, "UTF8_BINARY", "ς");
-    assertSubstringIndex("Σ", "σ", 1, "UTF8_BINARY", "Σ");
-    assertSubstringIndex("Σ", "ς", 1, "UTF8_BINARY", "Σ");
-    assertSubstringIndex("Σ", "Σ", 1, "UTF8_BINARY", "");
-    assertSubstringIndex("σ", "σ", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("σ", "ς", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("σ", "Σ", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("ς", "σ", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("ς", "ς", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("ς", "Σ", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("Σ", "σ", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("Σ", "ς", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("Σ", "Σ", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("σ", "σ", 1, "UNICODE", "");
-    assertSubstringIndex("σ", "ς", 1, "UNICODE", "σ");
-    assertSubstringIndex("σ", "Σ", 1, "UNICODE", "σ");
-    assertSubstringIndex("ς", "σ", 1, "UNICODE", "ς");
-    assertSubstringIndex("ς", "ς", 1, "UNICODE", "");
-    assertSubstringIndex("ς", "Σ", 1, "UNICODE", "ς");
-    assertSubstringIndex("Σ", "σ", 1, "UNICODE", "Σ");
-    assertSubstringIndex("Σ", "ς", 1, "UNICODE", "Σ");
-    assertSubstringIndex("Σ", "Σ", 1, "UNICODE", "");
-    assertSubstringIndex("σ", "σ", 1, "UNICODE_CI", "");
-    assertSubstringIndex("σ", "ς", 1, "UNICODE_CI", "");
-    assertSubstringIndex("σ", "Σ", 1, "UNICODE_CI", "");
-    assertSubstringIndex("ς", "σ", 1, "UNICODE_CI", "");
-    assertSubstringIndex("ς", "ς", 1, "UNICODE_CI", "");
-    assertSubstringIndex("ς", "Σ", 1, "UNICODE_CI", "");
-    assertSubstringIndex("Σ", "σ", 1, "UNICODE_CI", "");
-    assertSubstringIndex("Σ", "ς", 1, "UNICODE_CI", "");
-    assertSubstringIndex("Σ", "Σ", 1, "UNICODE_CI", "");
+    assertSubstringIndex("σ", "σ", 1, UTF8_BINARY, "");
+    assertSubstringIndex("σ", "ς", 1, UTF8_BINARY, "σ");
+    assertSubstringIndex("σ", "Σ", 1, UTF8_BINARY, "σ");
+    assertSubstringIndex("ς", "σ", 1, UTF8_BINARY, "ς");
+    assertSubstringIndex("ς", "ς", 1, UTF8_BINARY, "");
+    assertSubstringIndex("ς", "Σ", 1, UTF8_BINARY, "ς");
+    assertSubstringIndex("Σ", "σ", 1, UTF8_BINARY, "Σ");
+    assertSubstringIndex("Σ", "ς", 1, UTF8_BINARY, "Σ");
+    assertSubstringIndex("Σ", "Σ", 1, UTF8_BINARY, "");
+    assertSubstringIndex("σ", "σ", 1, UTF8_LCASE, "");
+    assertSubstringIndex("σ", "ς", 1, UTF8_LCASE, "");
+    assertSubstringIndex("σ", "Σ", 1, UTF8_LCASE, "");
+    assertSubstringIndex("ς", "σ", 1, UTF8_LCASE, "");
+    assertSubstringIndex("ς", "ς", 1, UTF8_LCASE, "");
+    assertSubstringIndex("ς", "Σ", 1, UTF8_LCASE, "");
+    assertSubstringIndex("Σ", "σ", 1, UTF8_LCASE, "");
+    assertSubstringIndex("Σ", "ς", 1, UTF8_LCASE, "");
+    assertSubstringIndex("Σ", "Σ", 1, UTF8_LCASE, "");
+    assertSubstringIndex("σ", "σ", 1, UNICODE, "");
+    assertSubstringIndex("σ", "ς", 1, UNICODE, "σ");
+    assertSubstringIndex("σ", "Σ", 1, UNICODE, "σ");
+    assertSubstringIndex("ς", "σ", 1, UNICODE, "ς");
+    assertSubstringIndex("ς", "ς", 1, UNICODE, "");
+    assertSubstringIndex("ς", "Σ", 1, UNICODE, "ς");
+    assertSubstringIndex("Σ", "σ", 1, UNICODE, "Σ");
+    assertSubstringIndex("Σ", "ς", 1, UNICODE, "Σ");
+    assertSubstringIndex("Σ", "Σ", 1, UNICODE, "");
+    assertSubstringIndex("σ", "σ", 1, UNICODE_CI, "");
+    assertSubstringIndex("σ", "ς", 1, UNICODE_CI, "");
+    assertSubstringIndex("σ", "Σ", 1, UNICODE_CI, "");
+    assertSubstringIndex("ς", "σ", 1, UNICODE_CI, "");
+    assertSubstringIndex("ς", "ς", 1, UNICODE_CI, "");
+    assertSubstringIndex("ς", "Σ", 1, UNICODE_CI, "");
+    assertSubstringIndex("Σ", "σ", 1, UNICODE_CI, "");
+    assertSubstringIndex("Σ", "ς", 1, UNICODE_CI, "");
+    assertSubstringIndex("Σ", "Σ", 1, UNICODE_CI, "");
     // Surrogate pairs.
-    assertSubstringIndex("a🙃b🙃c", "a", 1, "UTF8_BINARY", "");
-    assertSubstringIndex("a🙃b🙃c", "a", 1, "UTF8_LCASE", "");
-    assertSubstringIndex("a🙃b🙃c", "a", 1, "UNICODE", "");
-    assertSubstringIndex("a🙃b🙃c", "a", 1, "UNICODE_CI", "");
-    assertSubstringIndex("a🙃b🙃c", "🙃", 1, "UTF8_BINARY", "a");
-    assertSubstringIndex("a🙃b🙃c", "🙃", 1, "UTF8_LCASE", "a");
-    assertSubstringIndex("a🙃b🙃c", "🙃", 1, "UNICODE", "a");
-    assertSubstringIndex("a🙃b🙃c", "🙃", 1, "UNICODE_CI", "a");
-    assertSubstringIndex("a🙃b🙃c", "b", 1, "UTF8_BINARY", "a🙃");
-    assertSubstringIndex("a🙃b🙃c", "b", 1, "UTF8_LCASE", "a🙃");
-    assertSubstringIndex("a🙃b🙃c", "b", 1, "UNICODE", "a🙃");
-    assertSubstringIndex("a🙃b🙃c", "b", 1, "UNICODE_CI", "a🙃");
-    assertSubstringIndex("a🙃b🙃c", "🙃", 2, "UTF8_BINARY", "a🙃b");
-    assertSubstringIndex("a🙃b🙃c", "🙃", 2, "UTF8_LCASE", "a🙃b");
-    assertSubstringIndex("a🙃b🙃c", "🙃", 2, "UNICODE", "a🙃b");
-    assertSubstringIndex("a🙃b🙃c", "🙃", 2, "UNICODE_CI", "a🙃b");
-    assertSubstringIndex("a🙃b🙃c", "c", 1, "UTF8_BINARY", "a🙃b🙃");
-    assertSubstringIndex("a🙃b🙃c", "c", 1, "UTF8_LCASE", "a🙃b🙃");
-    assertSubstringIndex("a🙃b🙃c", "c", 1, "UNICODE", "a🙃b🙃");
-    assertSubstringIndex("a🙃b🙃c", "c", 1, "UNICODE_CI", "a🙃b🙃");
-    assertSubstringIndex("a🙃b🙃c", "🙃", 3, "UTF8_BINARY", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", 3, "UTF8_LCASE", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", 3, "UNICODE", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", 3, "UNICODE_CI", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "d", 1, "UTF8_BINARY", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "d", 1, "UTF8_LCASE", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "d", 1, "UNICODE", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "d", 1, "UNICODE_CI", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "c", -1, "UTF8_BINARY", "");
-    assertSubstringIndex("a🙃b🙃c", "c", -1, "UTF8_LCASE", "");
-    assertSubstringIndex("a🙃b🙃c", "c", -1, "UNICODE", "");
-    assertSubstringIndex("a🙃b🙃c", "c", -1, "UNICODE_CI", "");
-    assertSubstringIndex("a🙃b🙃c", "🙃", -1, "UTF8_BINARY", "c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", -1, "UTF8_LCASE", "c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", -1, "UNICODE", "c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", -1, "UNICODE_CI", "c");
-    assertSubstringIndex("a🙃b🙃c", "b", -1, "UTF8_BINARY", "🙃c");
-    assertSubstringIndex("a🙃b🙃c", "b", -1, "UTF8_LCASE", "🙃c");
-    assertSubstringIndex("a🙃b🙃c", "b", -1, "UNICODE", "🙃c");
-    assertSubstringIndex("a🙃b🙃c", "b", -1, "UNICODE_CI", "🙃c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", -2, "UTF8_BINARY", "b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", -2, "UTF8_LCASE", "b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", -2, "UNICODE", "b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", -2, "UNICODE_CI", "b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "a", -1, "UTF8_BINARY", "🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "a", -1, "UTF8_LCASE", "🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "a", -1, "UNICODE", "🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "a", -1, "UNICODE_CI", "🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", -3, "UTF8_BINARY", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", -3, "UTF8_LCASE", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", -3, "UNICODE", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "🙃", -3, "UNICODE_CI", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "d", -1, "UTF8_BINARY", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "d", -1, "UTF8_LCASE", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "d", -1, "UNICODE", "a🙃b🙃c");
-    assertSubstringIndex("a🙃b🙃c", "d", -1, "UNICODE_CI", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "a", 1, UTF8_BINARY, "");
+    assertSubstringIndex("a🙃b🙃c", "a", 1, UTF8_LCASE, "");
+    assertSubstringIndex("a🙃b🙃c", "a", 1, UNICODE, "");
+    assertSubstringIndex("a🙃b🙃c", "a", 1, UNICODE_CI, "");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 1, UTF8_BINARY, "a");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 1, UTF8_LCASE, "a");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 1, UNICODE, "a");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 1, UNICODE_CI, "a");
+    assertSubstringIndex("a🙃b🙃c", "b", 1, UTF8_BINARY, "a🙃");
+    assertSubstringIndex("a🙃b🙃c", "b", 1, UTF8_LCASE, "a🙃");
+    assertSubstringIndex("a🙃b🙃c", "b", 1, UNICODE, "a🙃");
+    assertSubstringIndex("a🙃b🙃c", "b", 1, UNICODE_CI, "a🙃");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 2, UTF8_BINARY, "a🙃b");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 2, UTF8_LCASE, "a🙃b");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 2, UNICODE, "a🙃b");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 2, UNICODE_CI, "a🙃b");
+    assertSubstringIndex("a🙃b🙃c", "c", 1, UTF8_BINARY, "a🙃b🙃");
+    assertSubstringIndex("a🙃b🙃c", "c", 1, UTF8_LCASE, "a🙃b🙃");
+    assertSubstringIndex("a🙃b🙃c", "c", 1, UNICODE, "a🙃b🙃");
+    assertSubstringIndex("a🙃b🙃c", "c", 1, UNICODE_CI, "a🙃b🙃");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 3, UTF8_BINARY, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 3, UTF8_LCASE, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 3, UNICODE, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 3, UNICODE_CI, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", 1, UTF8_BINARY, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", 1, UTF8_LCASE, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", 1, UNICODE, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", 1, UNICODE_CI, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "c", -1, UTF8_BINARY, "");
+    assertSubstringIndex("a🙃b🙃c", "c", -1, UTF8_LCASE, "");
+    assertSubstringIndex("a🙃b🙃c", "c", -1, UNICODE, "");
+    assertSubstringIndex("a🙃b🙃c", "c", -1, UNICODE_CI, "");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -1, UTF8_BINARY, "c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -1, UTF8_LCASE, "c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -1, UNICODE, "c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -1, UNICODE_CI, "c");
+    assertSubstringIndex("a🙃b🙃c", "b", -1, UTF8_BINARY, "🙃c");
+    assertSubstringIndex("a🙃b🙃c", "b", -1, UTF8_LCASE, "🙃c");
+    assertSubstringIndex("a🙃b🙃c", "b", -1, UNICODE, "🙃c");
+    assertSubstringIndex("a🙃b🙃c", "b", -1, UNICODE_CI, "🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -2, UTF8_BINARY, "b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -2, UTF8_LCASE, "b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -2, UNICODE, "b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -2, UNICODE_CI, "b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "a", -1, UTF8_BINARY, "🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "a", -1, UTF8_LCASE, "🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "a", -1, UNICODE, "🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "a", -1, UNICODE_CI, "🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -3, UTF8_BINARY, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -3, UTF8_LCASE, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -3, UNICODE, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -3, UNICODE_CI, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", -1, UTF8_BINARY, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", -1, UTF8_LCASE, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", -1, UNICODE, "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", -1, UNICODE_CI, "a🙃b🙃c");
   }
 
   /**
@@ -2776,279 +2777,279 @@ private void assertStringTrim(String collationName, String sourceString, String
   @Test
   public void testStringTrim() throws SparkException {
     // Basic tests.
-    assertStringTrim("UTF8_BINARY", "", "", "");
-    assertStringTrim("UTF8_BINARY", "", "xyz", "");
-    assertStringTrim("UTF8_BINARY", "asd", "", "asd");
-    assertStringTrim("UTF8_BINARY", "asd", null, "asd");
-    assertStringTrim("UTF8_BINARY", "  asd  ", null, "asd");
-    assertStringTrim("UTF8_BINARY", " a世a ", null, "a世a");
-    assertStringTrim("UTF8_BINARY", "asd", "x", "asd");
-    assertStringTrim("UTF8_BINARY", "xxasdxx", "x", "asd");
-    assertStringTrim("UTF8_BINARY", "xa世ax", "x", "a世a");
-    assertStringTrim("UTF8_LCASE", "", "", "");
-    assertStringTrim("UTF8_LCASE", "", "xyz", "");
-    assertStringTrim("UTF8_LCASE", "asd", "", "asd");
-    assertStringTrim("UTF8_LCASE", "asd", null, "asd");
-    assertStringTrim("UTF8_LCASE", "  asd  ", null, "asd");
-    assertStringTrim("UTF8_LCASE", " a世a ", null, "a世a");
-    assertStringTrim("UTF8_LCASE", "asd", "x", "asd");
-    assertStringTrim("UTF8_LCASE", "xxasdxx", "x", "asd");
-    assertStringTrim("UTF8_LCASE", "xa世ax", "x", "a世a");
-    assertStringTrim("UNICODE", "", "", "");
-    assertStringTrim("UNICODE", "", "xyz", "");
-    assertStringTrim("UNICODE", "asd", "", "asd");
-    assertStringTrim("UNICODE", "asd", null, "asd");
-    assertStringTrim("UNICODE", "  asd  ", null, "asd");
-    assertStringTrim("UNICODE", " a世a ", null, "a世a");
-    assertStringTrim("UNICODE", "asd", "x", "asd");
-    assertStringTrim("UNICODE", "xxasdxx", "x", "asd");
-    assertStringTrim("UNICODE", "xa世ax", "x", "a世a");
-    assertStringTrim("UNICODE_CI", "", "", "");
-    assertStringTrim("UNICODE_CI", "", "xyz", "");
-    assertStringTrim("UNICODE_CI", "asd", "", "asd");
-    assertStringTrim("UNICODE_CI", "asd", null, "asd");
-    assertStringTrim("UNICODE_CI", "  asd  ", null, "asd");
-    assertStringTrim("UNICODE_CI", " a世a ", null, "a世a");
-    assertStringTrim("UNICODE_CI", "asd", "x", "asd");
-    assertStringTrim("UNICODE_CI", "xxasdxx", "x", "asd");
-    assertStringTrim("UNICODE_CI", "xa世ax", "x", "a世a");
+    assertStringTrim(UTF8_BINARY, "", "", "");
+    assertStringTrim(UTF8_BINARY, "", "xyz", "");
+    assertStringTrim(UTF8_BINARY, "asd", "", "asd");
+    assertStringTrim(UTF8_BINARY, "asd", null, "asd");
+    assertStringTrim(UTF8_BINARY, "  asd  ", null, "asd");
+    assertStringTrim(UTF8_BINARY, " a世a ", null, "a世a");
+    assertStringTrim(UTF8_BINARY, "asd", "x", "asd");
+    assertStringTrim(UTF8_BINARY, "xxasdxx", "x", "asd");
+    assertStringTrim(UTF8_BINARY, "xa世ax", "x", "a世a");
+    assertStringTrim(UTF8_LCASE, "", "", "");
+    assertStringTrim(UTF8_LCASE, "", "xyz", "");
+    assertStringTrim(UTF8_LCASE, "asd", "", "asd");
+    assertStringTrim(UTF8_LCASE, "asd", null, "asd");
+    assertStringTrim(UTF8_LCASE, "  asd  ", null, "asd");
+    assertStringTrim(UTF8_LCASE, " a世a ", null, "a世a");
+    assertStringTrim(UTF8_LCASE, "asd", "x", "asd");
+    assertStringTrim(UTF8_LCASE, "xxasdxx", "x", "asd");
+    assertStringTrim(UTF8_LCASE, "xa世ax", "x", "a世a");
+    assertStringTrim(UNICODE, "", "", "");
+    assertStringTrim(UNICODE, "", "xyz", "");
+    assertStringTrim(UNICODE, "asd", "", "asd");
+    assertStringTrim(UNICODE, "asd", null, "asd");
+    assertStringTrim(UNICODE, "  asd  ", null, "asd");
+    assertStringTrim(UNICODE, " a世a ", null, "a世a");
+    assertStringTrim(UNICODE, "asd", "x", "asd");
+    assertStringTrim(UNICODE, "xxasdxx", "x", "asd");
+    assertStringTrim(UNICODE, "xa世ax", "x", "a世a");
+    assertStringTrim(UNICODE_CI, "", "", "");
+    assertStringTrim(UNICODE_CI, "", "xyz", "");
+    assertStringTrim(UNICODE_CI, "asd", "", "asd");
+    assertStringTrim(UNICODE_CI, "asd", null, "asd");
+    assertStringTrim(UNICODE_CI, "  asd  ", null, "asd");
+    assertStringTrim(UNICODE_CI, " a世a ", null, "a世a");
+    assertStringTrim(UNICODE_CI, "asd", "x", "asd");
+    assertStringTrim(UNICODE_CI, "xxasdxx", "x", "asd");
+    assertStringTrim(UNICODE_CI, "xa世ax", "x", "a世a");
     // Case variation.
-    assertStringTrim("UTF8_BINARY", "asd", "A", "asd");
-    assertStringTrim("UTF8_BINARY", "ddsXXXaa", "asd", "XXX");
-    assertStringTrim("UTF8_BINARY", "ASD", "a", "ASD");
-    assertStringTrim("UTF8_LCASE", "asd", "A", "sd");
-    assertStringTrim("UTF8_LCASE", "ASD", "a", "SD");
-    assertStringTrim("UTF8_LCASE", "ddsXXXaa", "ASD", "XXX");
-    assertStringTrim("UNICODE", "asd", "A", "asd");
-    assertStringTrim("UNICODE", "ASD", "a", "ASD");
-    assertStringTrim("UNICODE", "ddsXXXaa", "asd", "XXX");
-    assertStringTrim("UNICODE_CI", "asd", "A", "sd");
-    assertStringTrim("UNICODE_CI", "ASD", "a", "SD");
-    assertStringTrim("UNICODE_CI", "ddsXXXaa", "ASD", "XXX");
+    assertStringTrim(UTF8_BINARY, "asd", "A", "asd");
+    assertStringTrim(UTF8_BINARY, "ddsXXXaa", "asd", "XXX");
+    assertStringTrim(UTF8_BINARY, "ASD", "a", "ASD");
+    assertStringTrim(UTF8_LCASE, "asd", "A", "sd");
+    assertStringTrim(UTF8_LCASE, "ASD", "a", "SD");
+    assertStringTrim(UTF8_LCASE, "ddsXXXaa", "ASD", "XXX");
+    assertStringTrim(UNICODE, "asd", "A", "asd");
+    assertStringTrim(UNICODE, "ASD", "a", "ASD");
+    assertStringTrim(UNICODE, "ddsXXXaa", "asd", "XXX");
+    assertStringTrim(UNICODE_CI, "asd", "A", "sd");
+    assertStringTrim(UNICODE_CI, "ASD", "a", "SD");
+    assertStringTrim(UNICODE_CI, "ddsXXXaa", "ASD", "XXX");
     assertStringTrim("SR_CI_AI", "cSCšćČXXXsčšČŠsć", "čš", "XXX");
     // One-to-many case mapping (e.g. Turkish dotted I)..
-    assertStringTrim("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ");
-    assertStringTrim("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß");
-    assertStringTrim("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "aaa");
-    assertStringTrim("UTF8_LCASE", "ẞaaaẞ", "ß", "aaa");
-    assertStringTrim("UTF8_LCASE", "ßaaaß", "ẞ", "aaa");
-    assertStringTrim("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "aaa");
-    assertStringTrim("UNICODE", "ẞaaaẞ", "ß", "ẞaaaẞ");
-    assertStringTrim("UNICODE", "ßaaaß", "ẞ", "ßaaaß");
-    assertStringTrim("UNICODE", "Ëaaaẞ", "Ëẞ", "aaa");
-    assertStringTrim("UNICODE_CI", "ẞaaaẞ", "ß", "aaa");
-    assertStringTrim("UNICODE_CI", "ßaaaß", "ẞ", "aaa");
-    assertStringTrim("UNICODE_CI", "Ëaaaẞ", "Ëẞ", "aaa");
+    assertStringTrim(UTF8_BINARY, "ẞaaaẞ", "ß", "ẞaaaẞ");
+    assertStringTrim(UTF8_BINARY, "ßaaaß", "ẞ", "ßaaaß");
+    assertStringTrim(UTF8_BINARY, "Ëaaaẞ", "Ëẞ", "aaa");
+    assertStringTrim(UTF8_LCASE, "ẞaaaẞ", "ß", "aaa");
+    assertStringTrim(UTF8_LCASE, "ßaaaß", "ẞ", "aaa");
+    assertStringTrim(UTF8_LCASE, "Ëaaaẞ", "Ëẞ", "aaa");
+    assertStringTrim(UNICODE, "ẞaaaẞ", "ß", "ẞaaaẞ");
+    assertStringTrim(UNICODE, "ßaaaß", "ẞ", "ßaaaß");
+    assertStringTrim(UNICODE, "Ëaaaẞ", "Ëẞ", "aaa");
+    assertStringTrim(UNICODE_CI, "ẞaaaẞ", "ß", "aaa");
+    assertStringTrim(UNICODE_CI, "ßaaaß", "ẞ", "aaa");
+    assertStringTrim(UNICODE_CI, "Ëaaaẞ", "Ëẞ", "aaa");
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertStringTrim("UTF8_BINARY", "i", "i", "");
-    assertStringTrim("UTF8_BINARY", "iii", "I", "iii");
-    assertStringTrim("UTF8_BINARY", "I", "iii", "I");
-    assertStringTrim("UTF8_BINARY", "ixi", "i", "x");
-    assertStringTrim("UTF8_BINARY", "i", "İ", "i");
-    assertStringTrim("UTF8_BINARY", "i\u0307", "İ", "i\u0307");
-    assertStringTrim("UTF8_BINARY", "ii\u0307", "İi", "\u0307");
-    assertStringTrim("UTF8_BINARY", "iii\u0307", "İi", "\u0307");
-    assertStringTrim("UTF8_BINARY", "iiii\u0307", "iİ", "\u0307");
-    assertStringTrim("UTF8_BINARY", "ii\u0307ii\u0307", "iİ", "\u0307ii\u0307");
-    assertStringTrim("UTF8_BINARY", "i\u0307", "i", "\u0307");
-    assertStringTrim("UTF8_BINARY", "i\u0307", "\u0307", "i");
-    assertStringTrim("UTF8_BINARY", "i\u0307", "i\u0307", "");
-    assertStringTrim("UTF8_BINARY", "i\u0307i\u0307", "i\u0307", "");
-    assertStringTrim("UTF8_BINARY", "i\u0307\u0307", "i\u0307", "");
-    assertStringTrim("UTF8_BINARY", "i\u0307i", "i\u0307", "");
-    assertStringTrim("UTF8_BINARY", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrim("UTF8_BINARY", "i\u0307İ", "i\u0307", "İ");
-    assertStringTrim("UTF8_BINARY", "i\u0307İ", "İ", "i\u0307");
-    assertStringTrim("UTF8_BINARY", "İ", "İ", "");
-    assertStringTrim("UTF8_BINARY", "IXi", "İ", "IXi");
-    assertStringTrim("UTF8_BINARY", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrim("UTF8_BINARY", "i\u0307x", "IXİ", "i\u0307x");
-    assertStringTrim("UTF8_BINARY", "i\u0307x", "ix\u0307İ", "");
-    assertStringTrim("UTF8_BINARY", "İ", "i", "İ");
-    assertStringTrim("UTF8_BINARY", "İ", "\u0307", "İ");
-    assertStringTrim("UTF8_BINARY", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrim("UTF8_BINARY", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrim("UTF8_BINARY", "xi\u0307", "\u0307IX", "xi");
-    assertStringTrim("UTF8_LCASE", "i", "i", "");
-    assertStringTrim("UTF8_LCASE", "iii", "I", "");
-    assertStringTrim("UTF8_LCASE", "I", "iii", "");
-    assertStringTrim("UTF8_LCASE", "ixi", "i", "x");
-    assertStringTrim("UTF8_LCASE", "i", "İ", "i");
-    assertStringTrim("UTF8_LCASE", "i\u0307", "İ", "");
-    assertStringTrim("UTF8_LCASE", "ii\u0307", "İi", "");
-    assertStringTrim("UTF8_LCASE", "iii\u0307", "İi", "");
-    assertStringTrim("UTF8_LCASE", "iiii\u0307", "iİ", "");
-    assertStringTrim("UTF8_LCASE", "ii\u0307ii\u0307", "iİ", "");
-    assertStringTrim("UTF8_LCASE", "i\u0307", "i", "\u0307");
-    assertStringTrim("UTF8_LCASE", "i\u0307", "\u0307", "i");
-    assertStringTrim("UTF8_LCASE", "i\u0307", "i\u0307", "");
-    assertStringTrim("UTF8_LCASE", "i\u0307i\u0307", "i\u0307", "");
-    assertStringTrim("UTF8_LCASE", "i\u0307\u0307", "i\u0307", "");
-    assertStringTrim("UTF8_LCASE", "i\u0307i", "i\u0307", "");
-    assertStringTrim("UTF8_LCASE", "i\u0307i", "İ", "i");
-    assertStringTrim("UTF8_LCASE", "i\u0307İ", "i\u0307", "İ");
-    assertStringTrim("UTF8_LCASE", "i\u0307İ", "İ", "");
-    assertStringTrim("UTF8_LCASE", "İ", "İ", "");
-    assertStringTrim("UTF8_LCASE", "IXi", "İ", "IXi");
-    assertStringTrim("UTF8_LCASE", "ix\u0307", "Ixİ", "\u0307");
-    assertStringTrim("UTF8_LCASE", "i\u0307x", "IXİ", "");
-    assertStringTrim("UTF8_LCASE", "i\u0307x", "I\u0307xİ", "");
-    assertStringTrim("UTF8_LCASE", "İ", "i", "İ");
-    assertStringTrim("UTF8_LCASE", "İ", "\u0307", "İ");
-    assertStringTrim("UTF8_LCASE", "Ixİ", "i\u0307", "xİ");
-    assertStringTrim("UTF8_LCASE", "IXİ", "ix\u0307", "İ");
-    assertStringTrim("UTF8_LCASE", "xi\u0307", "\u0307IX", "");
-    assertStringTrim("UNICODE", "i", "i", "");
-    assertStringTrim("UNICODE", "iii", "I", "iii");
-    assertStringTrim("UNICODE", "I", "iii", "I");
-    assertStringTrim("UNICODE", "ixi", "i", "x");
-    assertStringTrim("UNICODE", "i", "İ", "i");
-    assertStringTrim("UNICODE", "i\u0307", "İ", "i\u0307");
-    assertStringTrim("UNICODE", "ii\u0307", "İi", "i\u0307");
-    assertStringTrim("UNICODE", "iii\u0307", "İi", "i\u0307");
-    assertStringTrim("UNICODE", "iiii\u0307", "iİ", "i\u0307");
-    assertStringTrim("UNICODE", "ii\u0307ii\u0307", "iİ", "i\u0307ii\u0307");
-    assertStringTrim("UNICODE", "i\u0307", "i", "i\u0307");
-    assertStringTrim("UNICODE", "i\u0307", "\u0307", "i\u0307");
-    assertStringTrim("UNICODE", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrim("UNICODE", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
-    assertStringTrim("UNICODE", "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
-    assertStringTrim("UNICODE", "i\u0307i", "i\u0307", "i\u0307");
-    assertStringTrim("UNICODE", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrim("UNICODE", "i\u0307İ", "i\u0307", "i\u0307İ");
-    assertStringTrim("UNICODE", "i\u0307İ", "İ", "i\u0307");
-    assertStringTrim("UNICODE", "İ", "İ", "");
-    assertStringTrim("UNICODE", "IXi", "İ", "IXi");
-    assertStringTrim("UNICODE", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrim("UNICODE", "i\u0307x", "IXİ", "i\u0307x");
-    assertStringTrim("UNICODE", "i\u0307x", "ix\u0307İ", "i\u0307");
-    assertStringTrim("UNICODE", "İ", "i", "İ");
-    assertStringTrim("UNICODE", "İ", "\u0307", "İ");
-    assertStringTrim("UNICODE", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrim("UNICODE", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrim("UNICODE", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrim("UNICODE", "xi\u0307", "\u0307IX", "xi\u0307");
-    assertStringTrim("UNICODE_CI", "i", "i", "");
-    assertStringTrim("UNICODE_CI", "iii", "I", "");
-    assertStringTrim("UNICODE_CI", "I", "iii", "");
-    assertStringTrim("UNICODE_CI", "ixi", "i", "x");
-    assertStringTrim("UNICODE_CI", "i", "İ", "i");
-    assertStringTrim("UNICODE_CI", "i\u0307", "İ", "");
-    assertStringTrim("UNICODE_CI", "ii\u0307", "İi", "");
-    assertStringTrim("UNICODE_CI", "iii\u0307", "İi", "");
-    assertStringTrim("UNICODE_CI", "iiii\u0307", "iİ", "");
-    assertStringTrim("UNICODE_CI", "ii\u0307ii\u0307", "iİ", "");
-    assertStringTrim("UNICODE_CI", "i\u0307", "i", "i\u0307");
-    assertStringTrim("UNICODE_CI", "i\u0307", "\u0307", "i\u0307");
-    assertStringTrim("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrim("UNICODE_CI", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
-    assertStringTrim("UNICODE_CI", "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
-    assertStringTrim("UNICODE_CI", "i\u0307i", "i\u0307", "i\u0307");
-    assertStringTrim("UNICODE_CI", "i\u0307i", "İ", "i");
-    assertStringTrim("UNICODE_CI", "i\u0307İ", "i\u0307", "i\u0307İ");
-    assertStringTrim("UNICODE_CI", "i\u0307İ", "İ", "");
-    assertStringTrim("UNICODE_CI", "İ", "İ", "");
-    assertStringTrim("UNICODE_CI", "IXi", "İ", "IXi");
-    assertStringTrim("UNICODE_CI", "ix\u0307", "Ixİ", "x\u0307");
-    assertStringTrim("UNICODE_CI", "i\u0307x", "IXİ", "");
-    assertStringTrim("UNICODE_CI", "i\u0307x", "I\u0307xİ", "");
-    assertStringTrim("UNICODE_CI", "İ", "i", "İ");
-    assertStringTrim("UNICODE_CI", "İ", "\u0307", "İ");
-    assertStringTrim("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrim("UNICODE_CI", "Ixİ", "i\u0307", "xİ");
-    assertStringTrim("UNICODE_CI", "IXİ", "ix\u0307", "İ");
-    assertStringTrim("UNICODE_CI", "xi\u0307", "\u0307IX", "i\u0307");
+    assertStringTrim(UTF8_BINARY, "i", "i", "");
+    assertStringTrim(UTF8_BINARY, "iii", "I", "iii");
+    assertStringTrim(UTF8_BINARY, "I", "iii", "I");
+    assertStringTrim(UTF8_BINARY, "ixi", "i", "x");
+    assertStringTrim(UTF8_BINARY, "i", "İ", "i");
+    assertStringTrim(UTF8_BINARY, "i\u0307", "İ", "i\u0307");
+    assertStringTrim(UTF8_BINARY, "ii\u0307", "İi", "\u0307");
+    assertStringTrim(UTF8_BINARY, "iii\u0307", "İi", "\u0307");
+    assertStringTrim(UTF8_BINARY, "iiii\u0307", "iİ", "\u0307");
+    assertStringTrim(UTF8_BINARY, "ii\u0307ii\u0307", "iİ", "\u0307ii\u0307");
+    assertStringTrim(UTF8_BINARY, "i\u0307", "i", "\u0307");
+    assertStringTrim(UTF8_BINARY, "i\u0307", "\u0307", "i");
+    assertStringTrim(UTF8_BINARY, "i\u0307", "i\u0307", "");
+    assertStringTrim(UTF8_BINARY, "i\u0307i\u0307", "i\u0307", "");
+    assertStringTrim(UTF8_BINARY, "i\u0307\u0307", "i\u0307", "");
+    assertStringTrim(UTF8_BINARY, "i\u0307i", "i\u0307", "");
+    assertStringTrim(UTF8_BINARY, "i\u0307i", "İ", "i\u0307i");
+    assertStringTrim(UTF8_BINARY, "i\u0307İ", "i\u0307", "İ");
+    assertStringTrim(UTF8_BINARY, "i\u0307İ", "İ", "i\u0307");
+    assertStringTrim(UTF8_BINARY, "İ", "İ", "");
+    assertStringTrim(UTF8_BINARY, "IXi", "İ", "IXi");
+    assertStringTrim(UTF8_BINARY, "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrim(UTF8_BINARY, "i\u0307x", "IXİ", "i\u0307x");
+    assertStringTrim(UTF8_BINARY, "i\u0307x", "ix\u0307İ", "");
+    assertStringTrim(UTF8_BINARY, "İ", "i", "İ");
+    assertStringTrim(UTF8_BINARY, "İ", "\u0307", "İ");
+    assertStringTrim(UTF8_BINARY, "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrim(UTF8_BINARY, "IXİ", "ix\u0307", "IXİ");
+    assertStringTrim(UTF8_BINARY, "xi\u0307", "\u0307IX", "xi");
+    assertStringTrim(UTF8_LCASE, "i", "i", "");
+    assertStringTrim(UTF8_LCASE, "iii", "I", "");
+    assertStringTrim(UTF8_LCASE, "I", "iii", "");
+    assertStringTrim(UTF8_LCASE, "ixi", "i", "x");
+    assertStringTrim(UTF8_LCASE, "i", "İ", "i");
+    assertStringTrim(UTF8_LCASE, "i\u0307", "İ", "");
+    assertStringTrim(UTF8_LCASE, "ii\u0307", "İi", "");
+    assertStringTrim(UTF8_LCASE, "iii\u0307", "İi", "");
+    assertStringTrim(UTF8_LCASE, "iiii\u0307", "iİ", "");
+    assertStringTrim(UTF8_LCASE, "ii\u0307ii\u0307", "iİ", "");
+    assertStringTrim(UTF8_LCASE, "i\u0307", "i", "\u0307");
+    assertStringTrim(UTF8_LCASE, "i\u0307", "\u0307", "i");
+    assertStringTrim(UTF8_LCASE, "i\u0307", "i\u0307", "");
+    assertStringTrim(UTF8_LCASE, "i\u0307i\u0307", "i\u0307", "");
+    assertStringTrim(UTF8_LCASE, "i\u0307\u0307", "i\u0307", "");
+    assertStringTrim(UTF8_LCASE, "i\u0307i", "i\u0307", "");
+    assertStringTrim(UTF8_LCASE, "i\u0307i", "İ", "i");
+    assertStringTrim(UTF8_LCASE, "i\u0307İ", "i\u0307", "İ");
+    assertStringTrim(UTF8_LCASE, "i\u0307İ", "İ", "");
+    assertStringTrim(UTF8_LCASE, "İ", "İ", "");
+    assertStringTrim(UTF8_LCASE, "IXi", "İ", "IXi");
+    assertStringTrim(UTF8_LCASE, "ix\u0307", "Ixİ", "\u0307");
+    assertStringTrim(UTF8_LCASE, "i\u0307x", "IXİ", "");
+    assertStringTrim(UTF8_LCASE, "i\u0307x", "I\u0307xİ", "");
+    assertStringTrim(UTF8_LCASE, "İ", "i", "İ");
+    assertStringTrim(UTF8_LCASE, "İ", "\u0307", "İ");
+    assertStringTrim(UTF8_LCASE, "Ixİ", "i\u0307", "xİ");
+    assertStringTrim(UTF8_LCASE, "IXİ", "ix\u0307", "İ");
+    assertStringTrim(UTF8_LCASE, "xi\u0307", "\u0307IX", "");
+    assertStringTrim(UNICODE, "i", "i", "");
+    assertStringTrim(UNICODE, "iii", "I", "iii");
+    assertStringTrim(UNICODE, "I", "iii", "I");
+    assertStringTrim(UNICODE, "ixi", "i", "x");
+    assertStringTrim(UNICODE, "i", "İ", "i");
+    assertStringTrim(UNICODE, "i\u0307", "İ", "i\u0307");
+    assertStringTrim(UNICODE, "ii\u0307", "İi", "i\u0307");
+    assertStringTrim(UNICODE, "iii\u0307", "İi", "i\u0307");
+    assertStringTrim(UNICODE, "iiii\u0307", "iİ", "i\u0307");
+    assertStringTrim(UNICODE, "ii\u0307ii\u0307", "iİ", "i\u0307ii\u0307");
+    assertStringTrim(UNICODE, "i\u0307", "i", "i\u0307");
+    assertStringTrim(UNICODE, "i\u0307", "\u0307", "i\u0307");
+    assertStringTrim(UNICODE, "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrim(UNICODE, "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
+    assertStringTrim(UNICODE, "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
+    assertStringTrim(UNICODE, "i\u0307i", "i\u0307", "i\u0307");
+    assertStringTrim(UNICODE, "i\u0307i", "İ", "i\u0307i");
+    assertStringTrim(UNICODE, "i\u0307İ", "i\u0307", "i\u0307İ");
+    assertStringTrim(UNICODE, "i\u0307İ", "İ", "i\u0307");
+    assertStringTrim(UNICODE, "İ", "İ", "");
+    assertStringTrim(UNICODE, "IXi", "İ", "IXi");
+    assertStringTrim(UNICODE, "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrim(UNICODE, "i\u0307x", "IXİ", "i\u0307x");
+    assertStringTrim(UNICODE, "i\u0307x", "ix\u0307İ", "i\u0307");
+    assertStringTrim(UNICODE, "İ", "i", "İ");
+    assertStringTrim(UNICODE, "İ", "\u0307", "İ");
+    assertStringTrim(UNICODE, "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrim(UNICODE, "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrim(UNICODE, "IXİ", "ix\u0307", "IXİ");
+    assertStringTrim(UNICODE, "xi\u0307", "\u0307IX", "xi\u0307");
+    assertStringTrim(UNICODE_CI, "i", "i", "");
+    assertStringTrim(UNICODE_CI, "iii", "I", "");
+    assertStringTrim(UNICODE_CI, "I", "iii", "");
+    assertStringTrim(UNICODE_CI, "ixi", "i", "x");
+    assertStringTrim(UNICODE_CI, "i", "İ", "i");
+    assertStringTrim(UNICODE_CI, "i\u0307", "İ", "");
+    assertStringTrim(UNICODE_CI, "ii\u0307", "İi", "");
+    assertStringTrim(UNICODE_CI, "iii\u0307", "İi", "");
+    assertStringTrim(UNICODE_CI, "iiii\u0307", "iİ", "");
+    assertStringTrim(UNICODE_CI, "ii\u0307ii\u0307", "iİ", "");
+    assertStringTrim(UNICODE_CI, "i\u0307", "i", "i\u0307");
+    assertStringTrim(UNICODE_CI, "i\u0307", "\u0307", "i\u0307");
+    assertStringTrim(UNICODE_CI, "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrim(UNICODE_CI, "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
+    assertStringTrim(UNICODE_CI, "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
+    assertStringTrim(UNICODE_CI, "i\u0307i", "i\u0307", "i\u0307");
+    assertStringTrim(UNICODE_CI, "i\u0307i", "İ", "i");
+    assertStringTrim(UNICODE_CI, "i\u0307İ", "i\u0307", "i\u0307İ");
+    assertStringTrim(UNICODE_CI, "i\u0307İ", "İ", "");
+    assertStringTrim(UNICODE_CI, "İ", "İ", "");
+    assertStringTrim(UNICODE_CI, "IXi", "İ", "IXi");
+    assertStringTrim(UNICODE_CI, "ix\u0307", "Ixİ", "x\u0307");
+    assertStringTrim(UNICODE_CI, "i\u0307x", "IXİ", "");
+    assertStringTrim(UNICODE_CI, "i\u0307x", "I\u0307xİ", "");
+    assertStringTrim(UNICODE_CI, "İ", "i", "İ");
+    assertStringTrim(UNICODE_CI, "İ", "\u0307", "İ");
+    assertStringTrim(UNICODE_CI, "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrim(UNICODE_CI, "Ixİ", "i\u0307", "xİ");
+    assertStringTrim(UNICODE_CI, "IXİ", "ix\u0307", "İ");
+    assertStringTrim(UNICODE_CI, "xi\u0307", "\u0307IX", "i\u0307");
     // Conditional case mapping (e.g. Greek sigmas).
-    assertStringTrim("UTF8_BINARY", "ςxς", "σ", "ςxς");
-    assertStringTrim("UTF8_BINARY", "ςxς", "ς", "x");
-    assertStringTrim("UTF8_BINARY", "ςxς", "Σ", "ςxς");
-    assertStringTrim("UTF8_BINARY", "σxσ", "σ", "x");
-    assertStringTrim("UTF8_BINARY", "σxσ", "ς", "σxσ");
-    assertStringTrim("UTF8_BINARY", "σxσ", "Σ", "σxσ");
-    assertStringTrim("UTF8_BINARY", "ΣxΣ", "σ", "ΣxΣ");
-    assertStringTrim("UTF8_BINARY", "ΣxΣ", "ς", "ΣxΣ");
-    assertStringTrim("UTF8_BINARY", "ΣxΣ", "Σ", "x");
-    assertStringTrim("UTF8_LCASE", "ςxς", "σ", "x");
-    assertStringTrim("UTF8_LCASE", "ςxς", "ς", "x");
-    assertStringTrim("UTF8_LCASE", "ςxς", "Σ", "x");
-    assertStringTrim("UTF8_LCASE", "σxσ", "σ", "x");
-    assertStringTrim("UTF8_LCASE", "σxσ", "ς", "x");
-    assertStringTrim("UTF8_LCASE", "σxσ", "Σ", "x");
-    assertStringTrim("UTF8_LCASE", "ΣxΣ", "σ", "x");
-    assertStringTrim("UTF8_LCASE", "ΣxΣ", "ς", "x");
-    assertStringTrim("UTF8_LCASE", "ΣxΣ", "Σ", "x");
-    assertStringTrim("UNICODE", "ςxς", "σ", "ςxς");
-    assertStringTrim("UNICODE", "ςxς", "ς", "x");
-    assertStringTrim("UNICODE", "ςxς", "Σ", "ςxς");
-    assertStringTrim("UNICODE", "σxσ", "σ", "x");
-    assertStringTrim("UNICODE", "σxσ", "ς", "σxσ");
-    assertStringTrim("UNICODE", "σxσ", "Σ", "σxσ");
-    assertStringTrim("UNICODE", "ΣxΣ", "σ", "ΣxΣ");
-    assertStringTrim("UNICODE", "ΣxΣ", "ς", "ΣxΣ");
-    assertStringTrim("UNICODE", "ΣxΣ", "Σ", "x");
-    assertStringTrim("UNICODE_CI", "ςxς", "σ", "x");
-    assertStringTrim("UNICODE_CI", "ςxς", "ς", "x");
-    assertStringTrim("UNICODE_CI", "ςxς", "Σ", "x");
-    assertStringTrim("UNICODE_CI", "σxσ", "σ", "x");
-    assertStringTrim("UNICODE_CI", "σxσ", "ς", "x");
-    assertStringTrim("UNICODE_CI", "σxσ", "Σ", "x");
-    assertStringTrim("UNICODE_CI", "ΣxΣ", "σ", "x");
-    assertStringTrim("UNICODE_CI", "ΣxΣ", "ς", "x");
-    assertStringTrim("UNICODE_CI", "ΣxΣ", "Σ", "x");
+    assertStringTrim(UTF8_BINARY, "ςxς", "σ", "ςxς");
+    assertStringTrim(UTF8_BINARY, "ςxς", "ς", "x");
+    assertStringTrim(UTF8_BINARY, "ςxς", "Σ", "ςxς");
+    assertStringTrim(UTF8_BINARY, "σxσ", "σ", "x");
+    assertStringTrim(UTF8_BINARY, "σxσ", "ς", "σxσ");
+    assertStringTrim(UTF8_BINARY, "σxσ", "Σ", "σxσ");
+    assertStringTrim(UTF8_BINARY, "ΣxΣ", "σ", "ΣxΣ");
+    assertStringTrim(UTF8_BINARY, "ΣxΣ", "ς", "ΣxΣ");
+    assertStringTrim(UTF8_BINARY, "ΣxΣ", "Σ", "x");
+    assertStringTrim(UTF8_LCASE, "ςxς", "σ", "x");
+    assertStringTrim(UTF8_LCASE, "ςxς", "ς", "x");
+    assertStringTrim(UTF8_LCASE, "ςxς", "Σ", "x");
+    assertStringTrim(UTF8_LCASE, "σxσ", "σ", "x");
+    assertStringTrim(UTF8_LCASE, "σxσ", "ς", "x");
+    assertStringTrim(UTF8_LCASE, "σxσ", "Σ", "x");
+    assertStringTrim(UTF8_LCASE, "ΣxΣ", "σ", "x");
+    assertStringTrim(UTF8_LCASE, "ΣxΣ", "ς", "x");
+    assertStringTrim(UTF8_LCASE, "ΣxΣ", "Σ", "x");
+    assertStringTrim(UNICODE, "ςxς", "σ", "ςxς");
+    assertStringTrim(UNICODE, "ςxς", "ς", "x");
+    assertStringTrim(UNICODE, "ςxς", "Σ", "ςxς");
+    assertStringTrim(UNICODE, "σxσ", "σ", "x");
+    assertStringTrim(UNICODE, "σxσ", "ς", "σxσ");
+    assertStringTrim(UNICODE, "σxσ", "Σ", "σxσ");
+    assertStringTrim(UNICODE, "ΣxΣ", "σ", "ΣxΣ");
+    assertStringTrim(UNICODE, "ΣxΣ", "ς", "ΣxΣ");
+    assertStringTrim(UNICODE, "ΣxΣ", "Σ", "x");
+    assertStringTrim(UNICODE_CI, "ςxς", "σ", "x");
+    assertStringTrim(UNICODE_CI, "ςxς", "ς", "x");
+    assertStringTrim(UNICODE_CI, "ςxς", "Σ", "x");
+    assertStringTrim(UNICODE_CI, "σxσ", "σ", "x");
+    assertStringTrim(UNICODE_CI, "σxσ", "ς", "x");
+    assertStringTrim(UNICODE_CI, "σxσ", "Σ", "x");
+    assertStringTrim(UNICODE_CI, "ΣxΣ", "σ", "x");
+    assertStringTrim(UNICODE_CI, "ΣxΣ", "ς", "x");
+    assertStringTrim(UNICODE_CI, "ΣxΣ", "Σ", "x");
     // Unicode normalization.
-    assertStringTrim("UTF8_BINARY", "åβγδa\u030A", "å", "βγδa\u030A");
-    assertStringTrim("UTF8_LCASE", "åβγδa\u030A", "Å", "βγδa\u030A");
-    assertStringTrim("UNICODE", "åβγδa\u030A", "å", "βγδ");
-    assertStringTrim("UNICODE_CI", "åβγδa\u030A", "Å", "βγδ");
+    assertStringTrim(UTF8_BINARY, "åβγδa\u030A", "å", "βγδa\u030A");
+    assertStringTrim(UTF8_LCASE, "åβγδa\u030A", "Å", "βγδa\u030A");
+    assertStringTrim(UNICODE, "åβγδa\u030A", "å", "βγδ");
+    assertStringTrim(UNICODE_CI, "åβγδa\u030A", "Å", "βγδ");
     // Surrogate pairs.
-    assertStringTrim("UTF8_BINARY", "a🙃b🙃c", "🙃", "a🙃b🙃c");
-    assertStringTrim("UTF8_LCASE", "a🙃b🙃c", "🙃", "a🙃b🙃c");
-    assertStringTrim("UNICODE", "a🙃b🙃c", "🙃", "a🙃b🙃c");
-    assertStringTrim("UNICODE_CI", "a🙃b🙃c", "🙃", "a🙃b🙃c");
-    assertStringTrim("UTF8_BINARY", "a🙃b🙃c", "ac", "🙃b🙃");
-    assertStringTrim("UTF8_LCASE", "a🙃b🙃c", "ac", "🙃b🙃");
-    assertStringTrim("UNICODE", "a🙃b🙃c", "ac", "🙃b🙃");
-    assertStringTrim("UNICODE_CI", "a🙃b🙃c", "ac", "🙃b🙃");
-    assertStringTrim("UTF8_BINARY", "a🙃b🙃c", "a🙃c", "b");
-    assertStringTrim("UTF8_LCASE", "a🙃b🙃c", "a🙃c", "b");
-    assertStringTrim("UNICODE", "a🙃b🙃c", "a🙃c", "b");
-    assertStringTrim("UNICODE_CI", "a🙃b🙃c", "a🙃c", "b");
-    assertStringTrim("UTF8_BINARY", "a🙃b🙃c", "abc🙃", "");
-    assertStringTrim("UTF8_LCASE", "a🙃b🙃c", "abc🙃", "");
-    assertStringTrim("UNICODE", "a🙃b🙃c", "abc🙃", "");
-    assertStringTrim("UNICODE_CI", "a🙃b🙃c", "abc🙃", "");
-    assertStringTrim("UTF8_BINARY", "😀😆😃😄", "😀😄", "😆😃");
-    assertStringTrim("UTF8_LCASE", "😀😆😃😄", "😀😄", "😆😃");
-    assertStringTrim("UNICODE", "😀😆😃😄", "😀😄", "😆😃");
-    assertStringTrim("UNICODE_CI", "😀😆😃😄", "😀😄", "😆😃");
-    assertStringTrim("UTF8_BINARY", "😀😆😃😄", "😃😄", "😀😆");
-    assertStringTrim("UTF8_LCASE", "😀😆😃😄", "😃😄", "😀😆");
-    assertStringTrim("UNICODE", "😀😆😃😄", "😃😄", "😀😆");
-    assertStringTrim("UNICODE_CI", "😀😆😃😄", "😃😄", "😀😆");
-    assertStringTrim("UTF8_BINARY", "😀😆😃😄", "😀😆😃😄", "");
-    assertStringTrim("UTF8_LCASE", "😀😆😃😄", "😀😆😃😄", "");
-    assertStringTrim("UNICODE", "😀😆😃😄", "😀😆😃😄", "");
-    assertStringTrim("UNICODE_CI", "😀😆😃😄", "😀😆😃😄", "");
-    assertStringTrim("UTF8_BINARY", "𐐅", "𐐅", "");
-    assertStringTrim("UTF8_LCASE", "𐐅", "𐐅", "");
-    assertStringTrim("UNICODE", "𐐅", "𐐅", "");
-    assertStringTrim("UNICODE_CI", "𐐅", "𐐅", "");
-    assertStringTrim("UTF8_BINARY", "𐐅", "𐐭", "𐐅");
-    assertStringTrim("UTF8_LCASE", "𐐅", "𐐭", "");
-    assertStringTrim("UNICODE", "𐐅", "𐐭", "𐐅");
-    assertStringTrim("UNICODE_CI", "𐐅", "𐐭", "");
-    assertStringTrim("UTF8_BINARY", "𝔸", "𝔸", "");
-    assertStringTrim("UTF8_LCASE", "𝔸", "𝔸", "");
-    assertStringTrim("UNICODE", "𝔸", "𝔸", "");
-    assertStringTrim("UNICODE_CI", "𝔸", "𝔸", "");
-    assertStringTrim("UTF8_BINARY", "𝔸", "A", "𝔸");
-    assertStringTrim("UTF8_LCASE", "𝔸", "A", "𝔸");
-    assertStringTrim("UNICODE", "𝔸", "A", "𝔸");
-    assertStringTrim("UNICODE_CI", "𝔸", "A", "");
-    assertStringTrim("UTF8_BINARY", "𝔸", "a", "𝔸");
-    assertStringTrim("UTF8_LCASE", "𝔸", "a", "𝔸");
-    assertStringTrim("UNICODE", "𝔸", "a", "𝔸");
-    assertStringTrim("UNICODE_CI", "𝔸", "a", "");
+    assertStringTrim(UTF8_BINARY, "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrim(UTF8_LCASE, "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrim(UNICODE, "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrim(UNICODE_CI, "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrim(UTF8_BINARY, "a🙃b🙃c", "ac", "🙃b🙃");
+    assertStringTrim(UTF8_LCASE, "a🙃b🙃c", "ac", "🙃b🙃");
+    assertStringTrim(UNICODE, "a🙃b🙃c", "ac", "🙃b🙃");
+    assertStringTrim(UNICODE_CI, "a🙃b🙃c", "ac", "🙃b🙃");
+    assertStringTrim(UTF8_BINARY, "a🙃b🙃c", "a🙃c", "b");
+    assertStringTrim(UTF8_LCASE, "a🙃b🙃c", "a🙃c", "b");
+    assertStringTrim(UNICODE, "a🙃b🙃c", "a🙃c", "b");
+    assertStringTrim(UNICODE_CI, "a🙃b🙃c", "a🙃c", "b");
+    assertStringTrim(UTF8_BINARY, "a🙃b🙃c", "abc🙃", "");
+    assertStringTrim(UTF8_LCASE, "a🙃b🙃c", "abc🙃", "");
+    assertStringTrim(UNICODE, "a🙃b🙃c", "abc🙃", "");
+    assertStringTrim(UNICODE_CI, "a🙃b🙃c", "abc🙃", "");
+    assertStringTrim(UTF8_BINARY, "😀😆😃😄", "😀😄", "😆😃");
+    assertStringTrim(UTF8_LCASE, "😀😆😃😄", "😀😄", "😆😃");
+    assertStringTrim(UNICODE, "😀😆😃😄", "😀😄", "😆😃");
+    assertStringTrim(UNICODE_CI, "😀😆😃😄", "😀😄", "😆😃");
+    assertStringTrim(UTF8_BINARY, "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrim(UTF8_LCASE, "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrim(UNICODE, "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrim(UNICODE_CI, "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrim(UTF8_BINARY, "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrim(UTF8_LCASE, "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrim(UNICODE, "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrim(UNICODE_CI, "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrim(UTF8_BINARY, "𐐅", "𐐅", "");
+    assertStringTrim(UTF8_LCASE, "𐐅", "𐐅", "");
+    assertStringTrim(UNICODE, "𐐅", "𐐅", "");
+    assertStringTrim(UNICODE_CI, "𐐅", "𐐅", "");
+    assertStringTrim(UTF8_BINARY, "𐐅", "𐐭", "𐐅");
+    assertStringTrim(UTF8_LCASE, "𐐅", "𐐭", "");
+    assertStringTrim(UNICODE, "𐐅", "𐐭", "𐐅");
+    assertStringTrim(UNICODE_CI, "𐐅", "𐐭", "");
+    assertStringTrim(UTF8_BINARY, "𝔸", "𝔸", "");
+    assertStringTrim(UTF8_LCASE, "𝔸", "𝔸", "");
+    assertStringTrim(UNICODE, "𝔸", "𝔸", "");
+    assertStringTrim(UNICODE_CI, "𝔸", "𝔸", "");
+    assertStringTrim(UTF8_BINARY, "𝔸", "A", "𝔸");
+    assertStringTrim(UTF8_LCASE, "𝔸", "A", "𝔸");
+    assertStringTrim(UNICODE, "𝔸", "A", "𝔸");
+    assertStringTrim(UNICODE_CI, "𝔸", "A", "");
+    assertStringTrim(UTF8_BINARY, "𝔸", "a", "𝔸");
+    assertStringTrim(UTF8_LCASE, "𝔸", "a", "𝔸");
+    assertStringTrim(UNICODE, "𝔸", "a", "𝔸");
+    assertStringTrim(UNICODE_CI, "𝔸", "a", "");
   }
 
   /**
@@ -3078,277 +3079,277 @@ private void assertStringTrimLeft(String collationName, String sourceString, Str
   @Test
   public void testStringTrimLeft() throws SparkException {
     // Basic tests - UTF8_BINARY.
-    assertStringTrimLeft("UTF8_BINARY", "", "", "");
-    assertStringTrimLeft("UTF8_BINARY", "", "xyz", "");
-    assertStringTrimLeft("UTF8_BINARY", "asd", "", "asd");
-    assertStringTrimLeft("UTF8_BINARY", "asd", null, "asd");
-    assertStringTrimLeft("UTF8_BINARY", "  asd  ", null, "asd  ");
-    assertStringTrimLeft("UTF8_BINARY", " a世a ", null, "a世a ");
-    assertStringTrimLeft("UTF8_BINARY", "asd", "x", "asd");
-    assertStringTrimLeft("UTF8_BINARY", "xxasdxx", "x", "asdxx");
-    assertStringTrimLeft("UTF8_BINARY", "xa世ax", "x", "a世ax");
+    assertStringTrimLeft(UTF8_BINARY, "", "", "");
+    assertStringTrimLeft(UTF8_BINARY, "", "xyz", "");
+    assertStringTrimLeft(UTF8_BINARY, "asd", "", "asd");
+    assertStringTrimLeft(UTF8_BINARY, "asd", null, "asd");
+    assertStringTrimLeft(UTF8_BINARY, "  asd  ", null, "asd  ");
+    assertStringTrimLeft(UTF8_BINARY, " a世a ", null, "a世a ");
+    assertStringTrimLeft(UTF8_BINARY, "asd", "x", "asd");
+    assertStringTrimLeft(UTF8_BINARY, "xxasdxx", "x", "asdxx");
+    assertStringTrimLeft(UTF8_BINARY, "xa世ax", "x", "a世ax");
     // Basic tests - UTF8_LCASE.
-    assertStringTrimLeft("UTF8_LCASE", "", "", "");
-    assertStringTrimLeft("UTF8_LCASE", "", "xyz", "");
-    assertStringTrimLeft("UTF8_LCASE", "asd", "", "asd");
-    assertStringTrimLeft("UTF8_LCASE", "asd", null, "asd");
-    assertStringTrimLeft("UTF8_LCASE", "  asd  ", null, "asd  ");
-    assertStringTrimLeft("UTF8_LCASE", " a世a ", null, "a世a ");
-    assertStringTrimLeft("UTF8_LCASE", "asd", "x", "asd");
-    assertStringTrimLeft("UTF8_LCASE", "xxasdxx", "x", "asdxx");
-    assertStringTrimLeft("UTF8_LCASE", "xa世ax", "x", "a世ax");
+    assertStringTrimLeft(UTF8_LCASE, "", "", "");
+    assertStringTrimLeft(UTF8_LCASE, "", "xyz", "");
+    assertStringTrimLeft(UTF8_LCASE, "asd", "", "asd");
+    assertStringTrimLeft(UTF8_LCASE, "asd", null, "asd");
+    assertStringTrimLeft(UTF8_LCASE, "  asd  ", null, "asd  ");
+    assertStringTrimLeft(UTF8_LCASE, " a世a ", null, "a世a ");
+    assertStringTrimLeft(UTF8_LCASE, "asd", "x", "asd");
+    assertStringTrimLeft(UTF8_LCASE, "xxasdxx", "x", "asdxx");
+    assertStringTrimLeft(UTF8_LCASE, "xa世ax", "x", "a世ax");
     // Basic tests - UNICODE.
-    assertStringTrimLeft("UNICODE", "", "", "");
-    assertStringTrimLeft("UNICODE", "", "xyz", "");
-    assertStringTrimLeft("UNICODE", "asd", "", "asd");
-    assertStringTrimLeft("UNICODE", "asd", null, "asd");
-    assertStringTrimLeft("UNICODE", "  asd  ", null, "asd  ");
-    assertStringTrimLeft("UNICODE", " a世a ", null, "a世a ");
-    assertStringTrimLeft("UNICODE", "asd", "x", "asd");
-    assertStringTrimLeft("UNICODE", "xxasdxx", "x", "asdxx");
-    assertStringTrimLeft("UNICODE", "xa世ax", "x", "a世ax");
+    assertStringTrimLeft(UNICODE, "", "", "");
+    assertStringTrimLeft(UNICODE, "", "xyz", "");
+    assertStringTrimLeft(UNICODE, "asd", "", "asd");
+    assertStringTrimLeft(UNICODE, "asd", null, "asd");
+    assertStringTrimLeft(UNICODE, "  asd  ", null, "asd  ");
+    assertStringTrimLeft(UNICODE, " a世a ", null, "a世a ");
+    assertStringTrimLeft(UNICODE, "asd", "x", "asd");
+    assertStringTrimLeft(UNICODE, "xxasdxx", "x", "asdxx");
+    assertStringTrimLeft(UNICODE, "xa世ax", "x", "a世ax");
     // Basic tests - UNICODE_CI.
-    assertStringTrimLeft("UNICODE_CI", "", "", "");
-    assertStringTrimLeft("UNICODE_CI", "", "xyz", "");
-    assertStringTrimLeft("UNICODE_CI", "asd", "", "asd");
-    assertStringTrimLeft("UNICODE_CI", "asd", null, "asd");
-    assertStringTrimLeft("UNICODE_CI", "  asd  ", null, "asd  ");
-    assertStringTrimLeft("UNICODE_CI", " a世a ", null, "a世a ");
-    assertStringTrimLeft("UNICODE_CI", "asd", "x", "asd");
-    assertStringTrimLeft("UNICODE_CI", "xxasdxx", "x", "asdxx");
-    assertStringTrimLeft("UNICODE_CI", "xa世ax", "x", "a世ax");
+    assertStringTrimLeft(UNICODE_CI, "", "", "");
+    assertStringTrimLeft(UNICODE_CI, "", "xyz", "");
+    assertStringTrimLeft(UNICODE_CI, "asd", "", "asd");
+    assertStringTrimLeft(UNICODE_CI, "asd", null, "asd");
+    assertStringTrimLeft(UNICODE_CI, "  asd  ", null, "asd  ");
+    assertStringTrimLeft(UNICODE_CI, " a世a ", null, "a世a ");
+    assertStringTrimLeft(UNICODE_CI, "asd", "x", "asd");
+    assertStringTrimLeft(UNICODE_CI, "xxasdxx", "x", "asdxx");
+    assertStringTrimLeft(UNICODE_CI, "xa世ax", "x", "a世ax");
     // Case variation.
-    assertStringTrimLeft("UTF8_BINARY", "ddsXXXaa", "asd", "XXXaa");
-    assertStringTrimLeft("UTF8_LCASE", "ddsXXXaa", "aSd", "XXXaa");
-    assertStringTrimLeft("UNICODE", "ddsXXXaa", "asd", "XXXaa");
-    assertStringTrimLeft("UNICODE_CI", "ddsXXXaa", "aSd", "XXXaa");
+    assertStringTrimLeft(UTF8_BINARY, "ddsXXXaa", "asd", "XXXaa");
+    assertStringTrimLeft(UTF8_LCASE, "ddsXXXaa", "aSd", "XXXaa");
+    assertStringTrimLeft(UNICODE, "ddsXXXaa", "asd", "XXXaa");
+    assertStringTrimLeft(UNICODE_CI, "ddsXXXaa", "aSd", "XXXaa");
     // One-to-many case mapping (e.g. Turkish dotted I)..
-    assertStringTrimLeft("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ");
-    assertStringTrimLeft("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß");
-    assertStringTrimLeft("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "aaaẞ");
-    assertStringTrimLeft("UTF8_LCASE", "ẞaaaẞ", "ß", "aaaẞ");
-    assertStringTrimLeft("UTF8_LCASE", "ßaaaß", "ẞ", "aaaß");
-    assertStringTrimLeft("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "aaaẞ");
-    assertStringTrimLeft("UNICODE", "ẞaaaẞ", "ß", "ẞaaaẞ");
-    assertStringTrimLeft("UNICODE", "ßaaaß", "ẞ", "ßaaaß");
-    assertStringTrimLeft("UNICODE", "Ëaaaẞ", "Ëẞ", "aaaẞ");
-    assertStringTrimLeft("UNICODE_CI", "ẞaaaẞ", "ß", "aaaẞ");
-    assertStringTrimLeft("UNICODE_CI", "ßaaaß", "ẞ", "aaaß");
-    assertStringTrimLeft("UNICODE_CI", "Ëaaaẞ", "Ëẞ", "aaaẞ");
+    assertStringTrimLeft(UTF8_BINARY, "ẞaaaẞ", "ß", "ẞaaaẞ");
+    assertStringTrimLeft(UTF8_BINARY, "ßaaaß", "ẞ", "ßaaaß");
+    assertStringTrimLeft(UTF8_BINARY, "Ëaaaẞ", "Ëẞ", "aaaẞ");
+    assertStringTrimLeft(UTF8_LCASE, "ẞaaaẞ", "ß", "aaaẞ");
+    assertStringTrimLeft(UTF8_LCASE, "ßaaaß", "ẞ", "aaaß");
+    assertStringTrimLeft(UTF8_LCASE, "Ëaaaẞ", "Ëẞ", "aaaẞ");
+    assertStringTrimLeft(UNICODE, "ẞaaaẞ", "ß", "ẞaaaẞ");
+    assertStringTrimLeft(UNICODE, "ßaaaß", "ẞ", "ßaaaß");
+    assertStringTrimLeft(UNICODE, "Ëaaaẞ", "Ëẞ", "aaaẞ");
+    assertStringTrimLeft(UNICODE_CI, "ẞaaaẞ", "ß", "aaaẞ");
+    assertStringTrimLeft(UNICODE_CI, "ßaaaß", "ẞ", "aaaß");
+    assertStringTrimLeft(UNICODE_CI, "Ëaaaẞ", "Ëẞ", "aaaẞ");
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertStringTrimLeft("UTF8_BINARY", "i", "i", "");
-    assertStringTrimLeft("UTF8_BINARY", "iii", "I", "iii");
-    assertStringTrimLeft("UTF8_BINARY", "I", "iii", "I");
-    assertStringTrimLeft("UTF8_BINARY", "ixi", "i", "xi");
-    assertStringTrimLeft("UTF8_BINARY", "i", "İ", "i");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307", "İ", "i\u0307");
-    assertStringTrimLeft("UTF8_BINARY", "ii\u0307", "İi", "\u0307");
-    assertStringTrimLeft("UTF8_BINARY", "iii\u0307", "İi", "\u0307");
-    assertStringTrimLeft("UTF8_BINARY", "iiii\u0307", "iİ", "\u0307");
-    assertStringTrimLeft("UTF8_BINARY", "ii\u0307ii\u0307", "iİ", "\u0307ii\u0307");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307", "i", "\u0307");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307", "\u0307", "i\u0307");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307", "i\u0307", "");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307i\u0307", "i\u0307", "");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307\u0307", "i\u0307", "");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307i", "i\u0307", "");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307İ", "i\u0307", "İ");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307İ", "İ", "i\u0307İ");
-    assertStringTrimLeft("UTF8_BINARY", "İ", "İ", "");
-    assertStringTrimLeft("UTF8_BINARY", "IXi", "İ", "IXi");
-    assertStringTrimLeft("UTF8_BINARY", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307x", "IXİ", "i\u0307x");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307x", "ix\u0307İ", "");
-    assertStringTrimLeft("UTF8_BINARY", "İ", "i", "İ");
-    assertStringTrimLeft("UTF8_BINARY", "İ", "\u0307", "İ");
-    assertStringTrimLeft("UTF8_BINARY", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrimLeft("UTF8_BINARY", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrimLeft("UTF8_BINARY", "xi\u0307", "\u0307IX", "xi\u0307");
-    assertStringTrimLeft("UTF8_LCASE", "i", "i", "");
-    assertStringTrimLeft("UTF8_LCASE", "iii", "I", "");
-    assertStringTrimLeft("UTF8_LCASE", "I", "iii", "");
-    assertStringTrimLeft("UTF8_LCASE", "ixi", "i", "xi");
-    assertStringTrimLeft("UTF8_LCASE", "i", "İ", "i");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307", "İ", "");
-    assertStringTrimLeft("UTF8_LCASE", "ii\u0307", "İi", "");
-    assertStringTrimLeft("UTF8_LCASE", "iii\u0307", "İi", "");
-    assertStringTrimLeft("UTF8_LCASE", "iiii\u0307", "iİ", "");
-    assertStringTrimLeft("UTF8_LCASE", "ii\u0307ii\u0307", "iİ", "");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307", "i", "\u0307");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307", "\u0307", "i\u0307");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307", "i\u0307", "");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307i\u0307", "i\u0307", "");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307\u0307", "i\u0307", "");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307i", "i\u0307", "");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307i", "İ", "i");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307İ", "i\u0307", "İ");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307İ", "İ", "");
-    assertStringTrimLeft("UTF8_LCASE", "İ", "İ", "");
-    assertStringTrimLeft("UTF8_LCASE", "IXi", "İ", "IXi");
-    assertStringTrimLeft("UTF8_LCASE", "ix\u0307", "Ixİ", "\u0307");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307x", "IXİ", "");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307x", "I\u0307xİ", "");
-    assertStringTrimLeft("UTF8_LCASE", "İ", "i", "İ");
-    assertStringTrimLeft("UTF8_LCASE", "İ", "\u0307", "İ");
-    assertStringTrimLeft("UTF8_LCASE", "Ixİ", "i\u0307", "xİ");
-    assertStringTrimLeft("UTF8_LCASE", "IXİ", "ix\u0307", "İ");
-    assertStringTrimLeft("UTF8_LCASE", "xi\u0307", "\u0307IX", "");
-    assertStringTrimLeft("UNICODE", "i", "i", "");
-    assertStringTrimLeft("UNICODE", "iii", "I", "iii");
-    assertStringTrimLeft("UNICODE", "I", "iii", "I");
-    assertStringTrimLeft("UNICODE", "ixi", "i", "xi");
-    assertStringTrimLeft("UNICODE", "i", "İ", "i");
-    assertStringTrimLeft("UNICODE", "i\u0307", "İ", "i\u0307");
-    assertStringTrimLeft("UNICODE", "ii\u0307", "İi", "i\u0307");
-    assertStringTrimLeft("UNICODE", "iii\u0307", "İi", "i\u0307");
-    assertStringTrimLeft("UNICODE", "iiii\u0307", "iİ", "i\u0307");
-    assertStringTrimLeft("UNICODE", "ii\u0307ii\u0307", "iİ", "i\u0307ii\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307", "i", "i\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307", "\u0307", "i\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307i", "i\u0307", "i\u0307i");
-    assertStringTrimLeft("UNICODE", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrimLeft("UNICODE", "i\u0307İ", "i\u0307", "i\u0307İ");
-    assertStringTrimLeft("UNICODE", "i\u0307İ", "İ", "i\u0307İ");
-    assertStringTrimLeft("UNICODE", "İ", "İ", "");
-    assertStringTrimLeft("UNICODE", "IXi", "İ", "IXi");
-    assertStringTrimLeft("UNICODE", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307x", "IXİ", "i\u0307x");
-    assertStringTrimLeft("UNICODE", "i\u0307x", "ix\u0307İ", "i\u0307x");
-    assertStringTrimLeft("UNICODE", "İ", "i", "İ");
-    assertStringTrimLeft("UNICODE", "İ", "\u0307", "İ");
-    assertStringTrimLeft("UNICODE", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimLeft("UNICODE", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrimLeft("UNICODE", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrimLeft("UNICODE", "xi\u0307", "\u0307IX", "xi\u0307");
-    assertStringTrimLeft("UNICODE_CI", "i", "i", "");
-    assertStringTrimLeft("UNICODE_CI", "iii", "I", "");
-    assertStringTrimLeft("UNICODE_CI", "I", "iii", "");
-    assertStringTrimLeft("UNICODE_CI", "ixi", "i", "xi");
-    assertStringTrimLeft("UNICODE_CI", "i", "İ", "i");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307", "İ", "");
-    assertStringTrimLeft("UNICODE_CI", "ii\u0307", "İi", "");
-    assertStringTrimLeft("UNICODE_CI", "iii\u0307", "İi", "");
-    assertStringTrimLeft("UNICODE_CI", "iiii\u0307", "iİ", "");
-    assertStringTrimLeft("UNICODE_CI", "ii\u0307ii\u0307", "iİ", "");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307", "i", "i\u0307");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307", "\u0307", "i\u0307");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307i", "i\u0307", "i\u0307i");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307i", "İ", "i");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307İ", "i\u0307", "i\u0307İ");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307İ", "İ", "");
-    assertStringTrimLeft("UNICODE_CI", "İ", "İ", "");
-    assertStringTrimLeft("UNICODE_CI", "IXi", "İ", "IXi");
-    assertStringTrimLeft("UNICODE_CI", "ix\u0307", "Ixİ", "x\u0307");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307x", "IXİ", "");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307x", "I\u0307xİ", "");
-    assertStringTrimLeft("UNICODE_CI", "İ", "i", "İ");
-    assertStringTrimLeft("UNICODE_CI", "İ", "\u0307", "İ");
-    assertStringTrimLeft("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimLeft("UNICODE_CI", "Ixİ", "i\u0307", "xİ");
-    assertStringTrimLeft("UNICODE_CI", "IXİ", "ix\u0307", "İ");
-    assertStringTrimLeft("UNICODE_CI", "xi\u0307", "\u0307IX", "i\u0307");
+    assertStringTrimLeft(UTF8_BINARY, "i", "i", "");
+    assertStringTrimLeft(UTF8_BINARY, "iii", "I", "iii");
+    assertStringTrimLeft(UTF8_BINARY, "I", "iii", "I");
+    assertStringTrimLeft(UTF8_BINARY, "ixi", "i", "xi");
+    assertStringTrimLeft(UTF8_BINARY, "i", "İ", "i");
+    assertStringTrimLeft(UTF8_BINARY, "i\u0307", "İ", "i\u0307");
+    assertStringTrimLeft(UTF8_BINARY, "ii\u0307", "İi", "\u0307");
+    assertStringTrimLeft(UTF8_BINARY, "iii\u0307", "İi", "\u0307");
+    assertStringTrimLeft(UTF8_BINARY, "iiii\u0307", "iİ", "\u0307");
+    assertStringTrimLeft(UTF8_BINARY, "ii\u0307ii\u0307", "iİ", "\u0307ii\u0307");
+    assertStringTrimLeft(UTF8_BINARY, "i\u0307", "i", "\u0307");
+    assertStringTrimLeft(UTF8_BINARY, "i\u0307", "\u0307", "i\u0307");
+    assertStringTrimLeft(UTF8_BINARY, "i\u0307", "i\u0307", "");
+    assertStringTrimLeft(UTF8_BINARY, "i\u0307i\u0307", "i\u0307", "");
+    assertStringTrimLeft(UTF8_BINARY, "i\u0307\u0307", "i\u0307", "");
+    assertStringTrimLeft(UTF8_BINARY, "i\u0307i", "i\u0307", "");
+    assertStringTrimLeft(UTF8_BINARY, "i\u0307i", "İ", "i\u0307i");
+    assertStringTrimLeft(UTF8_BINARY, "i\u0307İ", "i\u0307", "İ");
+    assertStringTrimLeft(UTF8_BINARY, "i\u0307İ", "İ", "i\u0307İ");
+    assertStringTrimLeft(UTF8_BINARY, "İ", "İ", "");
+    assertStringTrimLeft(UTF8_BINARY, "IXi", "İ", "IXi");
+    assertStringTrimLeft(UTF8_BINARY, "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrimLeft(UTF8_BINARY, "i\u0307x", "IXİ", "i\u0307x");
+    assertStringTrimLeft(UTF8_BINARY, "i\u0307x", "ix\u0307İ", "");
+    assertStringTrimLeft(UTF8_BINARY, "İ", "i", "İ");
+    assertStringTrimLeft(UTF8_BINARY, "İ", "\u0307", "İ");
+    assertStringTrimLeft(UTF8_BINARY, "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrimLeft(UTF8_BINARY, "IXİ", "ix\u0307", "IXİ");
+    assertStringTrimLeft(UTF8_BINARY, "xi\u0307", "\u0307IX", "xi\u0307");
+    assertStringTrimLeft(UTF8_LCASE, "i", "i", "");
+    assertStringTrimLeft(UTF8_LCASE, "iii", "I", "");
+    assertStringTrimLeft(UTF8_LCASE, "I", "iii", "");
+    assertStringTrimLeft(UTF8_LCASE, "ixi", "i", "xi");
+    assertStringTrimLeft(UTF8_LCASE, "i", "İ", "i");
+    assertStringTrimLeft(UTF8_LCASE, "i\u0307", "İ", "");
+    assertStringTrimLeft(UTF8_LCASE, "ii\u0307", "İi", "");
+    assertStringTrimLeft(UTF8_LCASE, "iii\u0307", "İi", "");
+    assertStringTrimLeft(UTF8_LCASE, "iiii\u0307", "iİ", "");
+    assertStringTrimLeft(UTF8_LCASE, "ii\u0307ii\u0307", "iİ", "");
+    assertStringTrimLeft(UTF8_LCASE, "i\u0307", "i", "\u0307");
+    assertStringTrimLeft(UTF8_LCASE, "i\u0307", "\u0307", "i\u0307");
+    assertStringTrimLeft(UTF8_LCASE, "i\u0307", "i\u0307", "");
+    assertStringTrimLeft(UTF8_LCASE, "i\u0307i\u0307", "i\u0307", "");
+    assertStringTrimLeft(UTF8_LCASE, "i\u0307\u0307", "i\u0307", "");
+    assertStringTrimLeft(UTF8_LCASE, "i\u0307i", "i\u0307", "");
+    assertStringTrimLeft(UTF8_LCASE, "i\u0307i", "İ", "i");
+    assertStringTrimLeft(UTF8_LCASE, "i\u0307İ", "i\u0307", "İ");
+    assertStringTrimLeft(UTF8_LCASE, "i\u0307İ", "İ", "");
+    assertStringTrimLeft(UTF8_LCASE, "İ", "İ", "");
+    assertStringTrimLeft(UTF8_LCASE, "IXi", "İ", "IXi");
+    assertStringTrimLeft(UTF8_LCASE, "ix\u0307", "Ixİ", "\u0307");
+    assertStringTrimLeft(UTF8_LCASE, "i\u0307x", "IXİ", "");
+    assertStringTrimLeft(UTF8_LCASE, "i\u0307x", "I\u0307xİ", "");
+    assertStringTrimLeft(UTF8_LCASE, "İ", "i", "İ");
+    assertStringTrimLeft(UTF8_LCASE, "İ", "\u0307", "İ");
+    assertStringTrimLeft(UTF8_LCASE, "Ixİ", "i\u0307", "xİ");
+    assertStringTrimLeft(UTF8_LCASE, "IXİ", "ix\u0307", "İ");
+    assertStringTrimLeft(UTF8_LCASE, "xi\u0307", "\u0307IX", "");
+    assertStringTrimLeft(UNICODE, "i", "i", "");
+    assertStringTrimLeft(UNICODE, "iii", "I", "iii");
+    assertStringTrimLeft(UNICODE, "I", "iii", "I");
+    assertStringTrimLeft(UNICODE, "ixi", "i", "xi");
+    assertStringTrimLeft(UNICODE, "i", "İ", "i");
+    assertStringTrimLeft(UNICODE, "i\u0307", "İ", "i\u0307");
+    assertStringTrimLeft(UNICODE, "ii\u0307", "İi", "i\u0307");
+    assertStringTrimLeft(UNICODE, "iii\u0307", "İi", "i\u0307");
+    assertStringTrimLeft(UNICODE, "iiii\u0307", "iİ", "i\u0307");
+    assertStringTrimLeft(UNICODE, "ii\u0307ii\u0307", "iİ", "i\u0307ii\u0307");
+    assertStringTrimLeft(UNICODE, "i\u0307", "i", "i\u0307");
+    assertStringTrimLeft(UNICODE, "i\u0307", "\u0307", "i\u0307");
+    assertStringTrimLeft(UNICODE, "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrimLeft(UNICODE, "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
+    assertStringTrimLeft(UNICODE, "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
+    assertStringTrimLeft(UNICODE, "i\u0307i", "i\u0307", "i\u0307i");
+    assertStringTrimLeft(UNICODE, "i\u0307i", "İ", "i\u0307i");
+    assertStringTrimLeft(UNICODE, "i\u0307İ", "i\u0307", "i\u0307İ");
+    assertStringTrimLeft(UNICODE, "i\u0307İ", "İ", "i\u0307İ");
+    assertStringTrimLeft(UNICODE, "İ", "İ", "");
+    assertStringTrimLeft(UNICODE, "IXi", "İ", "IXi");
+    assertStringTrimLeft(UNICODE, "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrimLeft(UNICODE, "i\u0307x", "IXİ", "i\u0307x");
+    assertStringTrimLeft(UNICODE, "i\u0307x", "ix\u0307İ", "i\u0307x");
+    assertStringTrimLeft(UNICODE, "İ", "i", "İ");
+    assertStringTrimLeft(UNICODE, "İ", "\u0307", "İ");
+    assertStringTrimLeft(UNICODE, "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrimLeft(UNICODE, "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrimLeft(UNICODE, "IXİ", "ix\u0307", "IXİ");
+    assertStringTrimLeft(UNICODE, "xi\u0307", "\u0307IX", "xi\u0307");
+    assertStringTrimLeft(UNICODE_CI, "i", "i", "");
+    assertStringTrimLeft(UNICODE_CI, "iii", "I", "");
+    assertStringTrimLeft(UNICODE_CI, "I", "iii", "");
+    assertStringTrimLeft(UNICODE_CI, "ixi", "i", "xi");
+    assertStringTrimLeft(UNICODE_CI, "i", "İ", "i");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307", "İ", "");
+    assertStringTrimLeft(UNICODE_CI, "ii\u0307", "İi", "");
+    assertStringTrimLeft(UNICODE_CI, "iii\u0307", "İi", "");
+    assertStringTrimLeft(UNICODE_CI, "iiii\u0307", "iİ", "");
+    assertStringTrimLeft(UNICODE_CI, "ii\u0307ii\u0307", "iİ", "");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307", "i", "i\u0307");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307", "\u0307", "i\u0307");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307i", "i\u0307", "i\u0307i");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307i", "İ", "i");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307İ", "i\u0307", "i\u0307İ");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307İ", "İ", "");
+    assertStringTrimLeft(UNICODE_CI, "İ", "İ", "");
+    assertStringTrimLeft(UNICODE_CI, "IXi", "İ", "IXi");
+    assertStringTrimLeft(UNICODE_CI, "ix\u0307", "Ixİ", "x\u0307");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307x", "IXİ", "");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307x", "I\u0307xİ", "");
+    assertStringTrimLeft(UNICODE_CI, "İ", "i", "İ");
+    assertStringTrimLeft(UNICODE_CI, "İ", "\u0307", "İ");
+    assertStringTrimLeft(UNICODE_CI, "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrimLeft(UNICODE_CI, "Ixİ", "i\u0307", "xİ");
+    assertStringTrimLeft(UNICODE_CI, "IXİ", "ix\u0307", "İ");
+    assertStringTrimLeft(UNICODE_CI, "xi\u0307", "\u0307IX", "i\u0307");
     // Conditional case mapping (e.g. Greek sigmas).
-    assertStringTrimLeft("UTF8_BINARY", "ςxς", "σ", "ςxς");
-    assertStringTrimLeft("UTF8_BINARY", "ςxς", "ς", "xς");
-    assertStringTrimLeft("UTF8_BINARY", "ςxς", "Σ", "ςxς");
-    assertStringTrimLeft("UTF8_BINARY", "σxσ", "σ", "xσ");
-    assertStringTrimLeft("UTF8_BINARY", "σxσ", "ς", "σxσ");
-    assertStringTrimLeft("UTF8_BINARY", "σxσ", "Σ", "σxσ");
-    assertStringTrimLeft("UTF8_BINARY", "ΣxΣ", "σ", "ΣxΣ");
-    assertStringTrimLeft("UTF8_BINARY", "ΣxΣ", "ς", "ΣxΣ");
-    assertStringTrimLeft("UTF8_BINARY", "ΣxΣ", "Σ", "xΣ");
-    assertStringTrimLeft("UTF8_LCASE", "ςxς", "σ", "xς");
-    assertStringTrimLeft("UTF8_LCASE", "ςxς", "ς", "xς");
-    assertStringTrimLeft("UTF8_LCASE", "ςxς", "Σ", "xς");
-    assertStringTrimLeft("UTF8_LCASE", "σxσ", "σ", "xσ");
-    assertStringTrimLeft("UTF8_LCASE", "σxσ", "ς", "xσ");
-    assertStringTrimLeft("UTF8_LCASE", "σxσ", "Σ", "xσ");
-    assertStringTrimLeft("UTF8_LCASE", "ΣxΣ", "σ", "xΣ");
-    assertStringTrimLeft("UTF8_LCASE", "ΣxΣ", "ς", "xΣ");
-    assertStringTrimLeft("UTF8_LCASE", "ΣxΣ", "Σ", "xΣ");
-    assertStringTrimLeft("UNICODE", "ςxς", "σ", "ςxς");
-    assertStringTrimLeft("UNICODE", "ςxς", "ς", "xς");
-    assertStringTrimLeft("UNICODE", "ςxς", "Σ", "ςxς");
-    assertStringTrimLeft("UNICODE", "σxσ", "σ", "xσ");
-    assertStringTrimLeft("UNICODE", "σxσ", "ς", "σxσ");
-    assertStringTrimLeft("UNICODE", "σxσ", "Σ", "σxσ");
-    assertStringTrimLeft("UNICODE", "ΣxΣ", "σ", "ΣxΣ");
-    assertStringTrimLeft("UNICODE", "ΣxΣ", "ς", "ΣxΣ");
-    assertStringTrimLeft("UNICODE", "ΣxΣ", "Σ", "xΣ");
-    assertStringTrimLeft("UNICODE_CI", "ςxς", "σ", "xς");
-    assertStringTrimLeft("UNICODE_CI", "ςxς", "ς", "xς");
-    assertStringTrimLeft("UNICODE_CI", "ςxς", "Σ", "xς");
-    assertStringTrimLeft("UNICODE_CI", "σxσ", "σ", "xσ");
-    assertStringTrimLeft("UNICODE_CI", "σxσ", "ς", "xσ");
-    assertStringTrimLeft("UNICODE_CI", "σxσ", "Σ", "xσ");
-    assertStringTrimLeft("UNICODE_CI", "ΣxΣ", "σ", "xΣ");
-    assertStringTrimLeft("UNICODE_CI", "ΣxΣ", "ς", "xΣ");
-    assertStringTrimLeft("UNICODE_CI", "ΣxΣ", "Σ", "xΣ");
+    assertStringTrimLeft(UTF8_BINARY, "ςxς", "σ", "ςxς");
+    assertStringTrimLeft(UTF8_BINARY, "ςxς", "ς", "xς");
+    assertStringTrimLeft(UTF8_BINARY, "ςxς", "Σ", "ςxς");
+    assertStringTrimLeft(UTF8_BINARY, "σxσ", "σ", "xσ");
+    assertStringTrimLeft(UTF8_BINARY, "σxσ", "ς", "σxσ");
+    assertStringTrimLeft(UTF8_BINARY, "σxσ", "Σ", "σxσ");
+    assertStringTrimLeft(UTF8_BINARY, "ΣxΣ", "σ", "ΣxΣ");
+    assertStringTrimLeft(UTF8_BINARY, "ΣxΣ", "ς", "ΣxΣ");
+    assertStringTrimLeft(UTF8_BINARY, "ΣxΣ", "Σ", "xΣ");
+    assertStringTrimLeft(UTF8_LCASE, "ςxς", "σ", "xς");
+    assertStringTrimLeft(UTF8_LCASE, "ςxς", "ς", "xς");
+    assertStringTrimLeft(UTF8_LCASE, "ςxς", "Σ", "xς");
+    assertStringTrimLeft(UTF8_LCASE, "σxσ", "σ", "xσ");
+    assertStringTrimLeft(UTF8_LCASE, "σxσ", "ς", "xσ");
+    assertStringTrimLeft(UTF8_LCASE, "σxσ", "Σ", "xσ");
+    assertStringTrimLeft(UTF8_LCASE, "ΣxΣ", "σ", "xΣ");
+    assertStringTrimLeft(UTF8_LCASE, "ΣxΣ", "ς", "xΣ");
+    assertStringTrimLeft(UTF8_LCASE, "ΣxΣ", "Σ", "xΣ");
+    assertStringTrimLeft(UNICODE, "ςxς", "σ", "ςxς");
+    assertStringTrimLeft(UNICODE, "ςxς", "ς", "xς");
+    assertStringTrimLeft(UNICODE, "ςxς", "Σ", "ςxς");
+    assertStringTrimLeft(UNICODE, "σxσ", "σ", "xσ");
+    assertStringTrimLeft(UNICODE, "σxσ", "ς", "σxσ");
+    assertStringTrimLeft(UNICODE, "σxσ", "Σ", "σxσ");
+    assertStringTrimLeft(UNICODE, "ΣxΣ", "σ", "ΣxΣ");
+    assertStringTrimLeft(UNICODE, "ΣxΣ", "ς", "ΣxΣ");
+    assertStringTrimLeft(UNICODE, "ΣxΣ", "Σ", "xΣ");
+    assertStringTrimLeft(UNICODE_CI, "ςxς", "σ", "xς");
+    assertStringTrimLeft(UNICODE_CI, "ςxς", "ς", "xς");
+    assertStringTrimLeft(UNICODE_CI, "ςxς", "Σ", "xς");
+    assertStringTrimLeft(UNICODE_CI, "σxσ", "σ", "xσ");
+    assertStringTrimLeft(UNICODE_CI, "σxσ", "ς", "xσ");
+    assertStringTrimLeft(UNICODE_CI, "σxσ", "Σ", "xσ");
+    assertStringTrimLeft(UNICODE_CI, "ΣxΣ", "σ", "xΣ");
+    assertStringTrimLeft(UNICODE_CI, "ΣxΣ", "ς", "xΣ");
+    assertStringTrimLeft(UNICODE_CI, "ΣxΣ", "Σ", "xΣ");
     // Unicode normalization.
-    assertStringTrimLeft("UTF8_BINARY", "åβγδa\u030A", "å", "βγδa\u030A");
-    assertStringTrimLeft("UTF8_LCASE", "åβγδa\u030A", "Å", "βγδa\u030A");
-    assertStringTrimLeft("UNICODE", "åβγδa\u030A", "å", "βγδa\u030A");
-    assertStringTrimLeft("UNICODE_CI", "åβγδa\u030A", "Å", "βγδa\u030A");
+    assertStringTrimLeft(UTF8_BINARY, "åβγδa\u030A", "å", "βγδa\u030A");
+    assertStringTrimLeft(UTF8_LCASE, "åβγδa\u030A", "Å", "βγδa\u030A");
+    assertStringTrimLeft(UNICODE, "åβγδa\u030A", "å", "βγδa\u030A");
+    assertStringTrimLeft(UNICODE_CI, "åβγδa\u030A", "Å", "βγδa\u030A");
     // Surrogate pairs.
-    assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "🙃", "a🙃b🙃c");
-    assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "🙃", "a🙃b🙃c");
-    assertStringTrimLeft("UNICODE", "a🙃b🙃c", "🙃", "a🙃b🙃c");
-    assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "🙃", "a🙃b🙃c");
-    assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "a", "🙃b🙃c");
-    assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "a", "🙃b🙃c");
-    assertStringTrimLeft("UNICODE", "a🙃b🙃c", "a", "🙃b🙃c");
-    assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "a", "🙃b🙃c");
-    assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "a🙃", "b🙃c");
-    assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "a🙃", "b🙃c");
-    assertStringTrimLeft("UNICODE", "a🙃b🙃c", "a🙃", "b🙃c");
-    assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "a🙃", "b🙃c");
-    assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "a🙃b", "c");
-    assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "a🙃b", "c");
-    assertStringTrimLeft("UNICODE", "a🙃b🙃c", "a🙃b", "c");
-    assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "a🙃b", "c");
-    assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "abc🙃", "");
-    assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "abc🙃", "");
-    assertStringTrimLeft("UNICODE", "a🙃b🙃c", "abc🙃", "");
-    assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "abc🙃", "");
-    assertStringTrimLeft("UTF8_BINARY", "😀😆😃😄", "😆😃", "😀😆😃😄");
-    assertStringTrimLeft("UTF8_LCASE", "😀😆😃😄", "😆😃", "😀😆😃😄");
-    assertStringTrimLeft("UNICODE", "😀😆😃😄", "😆😃", "😀😆😃😄");
-    assertStringTrimLeft("UNICODE_CI", "😀😆😃😄", "😆😃", "😀😆😃😄");
-    assertStringTrimLeft("UTF8_BINARY", "😀😆😃😄", "😀😆", "😃😄");
-    assertStringTrimLeft("UTF8_LCASE", "😀😆😃😄", "😀😆", "😃😄");
-    assertStringTrimLeft("UNICODE", "😀😆😃😄", "😀😆", "😃😄");
-    assertStringTrimLeft("UNICODE_CI", "😀😆😃😄", "😀😆", "😃😄");
-    assertStringTrimLeft("UTF8_BINARY", "😀😆😃😄", "😀😆😃😄", "");
-    assertStringTrimLeft("UTF8_LCASE", "😀😆😃😄", "😀😆😃😄", "");
-    assertStringTrimLeft("UNICODE", "😀😆😃😄", "😀😆😃😄", "");
-    assertStringTrimLeft("UNICODE_CI", "😀😆😃😄", "😀😆😃😄", "");
-    assertStringTrimLeft("UTF8_BINARY", "𐐅", "𐐅", "");
-    assertStringTrimLeft("UTF8_LCASE", "𐐅", "𐐅", "");
-    assertStringTrimLeft("UNICODE", "𐐅", "𐐅", "");
-    assertStringTrimLeft("UNICODE_CI", "𐐅", "𐐅", "");
-    assertStringTrimLeft("UTF8_BINARY", "𐐅", "𐐭", "𐐅");
-    assertStringTrimLeft("UTF8_LCASE", "𐐅", "𐐭", "");
-    assertStringTrimLeft("UNICODE", "𐐅", "𐐭", "𐐅");
-    assertStringTrimLeft("UNICODE_CI", "𐐅", "𐐭", "");
-    assertStringTrimLeft("UTF8_BINARY", "𝔸", "𝔸", "");
-    assertStringTrimLeft("UTF8_LCASE", "𝔸", "𝔸", "");
-    assertStringTrimLeft("UNICODE", "𝔸", "𝔸", "");
-    assertStringTrimLeft("UNICODE_CI", "𝔸", "𝔸", "");
-    assertStringTrimLeft("UTF8_BINARY", "𝔸", "A", "𝔸");
-    assertStringTrimLeft("UTF8_LCASE", "𝔸", "A", "𝔸");
-    assertStringTrimLeft("UNICODE", "𝔸", "A", "𝔸");
-    assertStringTrimLeft("UNICODE_CI", "𝔸", "A", "");
-    assertStringTrimLeft("UTF8_BINARY", "𝔸", "a", "𝔸");
-    assertStringTrimLeft("UTF8_LCASE", "𝔸", "a", "𝔸");
-    assertStringTrimLeft("UNICODE", "𝔸", "a", "𝔸");
-    assertStringTrimLeft("UNICODE_CI", "𝔸", "a", "");
+    assertStringTrimLeft(UTF8_BINARY, "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimLeft(UTF8_LCASE, "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimLeft(UNICODE, "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimLeft(UNICODE_CI, "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimLeft(UTF8_BINARY, "a🙃b🙃c", "a", "🙃b🙃c");
+    assertStringTrimLeft(UTF8_LCASE, "a🙃b🙃c", "a", "🙃b🙃c");
+    assertStringTrimLeft(UNICODE, "a🙃b🙃c", "a", "🙃b🙃c");
+    assertStringTrimLeft(UNICODE_CI, "a🙃b🙃c", "a", "🙃b🙃c");
+    assertStringTrimLeft(UTF8_BINARY, "a🙃b🙃c", "a🙃", "b🙃c");
+    assertStringTrimLeft(UTF8_LCASE, "a🙃b🙃c", "a🙃", "b🙃c");
+    assertStringTrimLeft(UNICODE, "a🙃b🙃c", "a🙃", "b🙃c");
+    assertStringTrimLeft(UNICODE_CI, "a🙃b🙃c", "a🙃", "b🙃c");
+    assertStringTrimLeft(UTF8_BINARY, "a🙃b🙃c", "a🙃b", "c");
+    assertStringTrimLeft(UTF8_LCASE, "a🙃b🙃c", "a🙃b", "c");
+    assertStringTrimLeft(UNICODE, "a🙃b🙃c", "a🙃b", "c");
+    assertStringTrimLeft(UNICODE_CI, "a🙃b🙃c", "a🙃b", "c");
+    assertStringTrimLeft(UTF8_BINARY, "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimLeft(UTF8_LCASE, "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimLeft(UNICODE, "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimLeft(UNICODE_CI, "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimLeft(UTF8_BINARY, "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimLeft(UTF8_LCASE, "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimLeft(UNICODE, "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimLeft(UNICODE_CI, "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimLeft(UTF8_BINARY, "😀😆😃😄", "😀😆", "😃😄");
+    assertStringTrimLeft(UTF8_LCASE, "😀😆😃😄", "😀😆", "😃😄");
+    assertStringTrimLeft(UNICODE, "😀😆😃😄", "😀😆", "😃😄");
+    assertStringTrimLeft(UNICODE_CI, "😀😆😃😄", "😀😆", "😃😄");
+    assertStringTrimLeft(UTF8_BINARY, "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimLeft(UTF8_LCASE, "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimLeft(UNICODE, "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimLeft(UNICODE_CI, "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimLeft(UTF8_BINARY, "𐐅", "𐐅", "");
+    assertStringTrimLeft(UTF8_LCASE, "𐐅", "𐐅", "");
+    assertStringTrimLeft(UNICODE, "𐐅", "𐐅", "");
+    assertStringTrimLeft(UNICODE_CI, "𐐅", "𐐅", "");
+    assertStringTrimLeft(UTF8_BINARY, "𐐅", "𐐭", "𐐅");
+    assertStringTrimLeft(UTF8_LCASE, "𐐅", "𐐭", "");
+    assertStringTrimLeft(UNICODE, "𐐅", "𐐭", "𐐅");
+    assertStringTrimLeft(UNICODE_CI, "𐐅", "𐐭", "");
+    assertStringTrimLeft(UTF8_BINARY, "𝔸", "𝔸", "");
+    assertStringTrimLeft(UTF8_LCASE, "𝔸", "𝔸", "");
+    assertStringTrimLeft(UNICODE, "𝔸", "𝔸", "");
+    assertStringTrimLeft(UNICODE_CI, "𝔸", "𝔸", "");
+    assertStringTrimLeft(UTF8_BINARY, "𝔸", "A", "𝔸");
+    assertStringTrimLeft(UTF8_LCASE, "𝔸", "A", "𝔸");
+    assertStringTrimLeft(UNICODE, "𝔸", "A", "𝔸");
+    assertStringTrimLeft(UNICODE_CI, "𝔸", "A", "");
+    assertStringTrimLeft(UTF8_BINARY, "𝔸", "a", "𝔸");
+    assertStringTrimLeft(UTF8_LCASE, "𝔸", "a", "𝔸");
+    assertStringTrimLeft(UNICODE, "𝔸", "a", "𝔸");
+    assertStringTrimLeft(UNICODE_CI, "𝔸", "a", "");
   }
 
   /**
@@ -3378,274 +3379,274 @@ private void assertStringTrimRight(String collationName, String sourceString, St
   @Test
   public void testStringTrimRight() throws SparkException {
     // Basic tests.
-    assertStringTrimRight("UTF8_BINARY", "", "", "");
-    assertStringTrimRight("UTF8_BINARY", "", "xyz", "");
-    assertStringTrimRight("UTF8_BINARY", "asd", "", "asd");
-    assertStringTrimRight("UTF8_BINARY", "asd", null, "asd");
-    assertStringTrimRight("UTF8_BINARY", "  asd  ", null, "  asd");
-    assertStringTrimRight("UTF8_BINARY", " a世a ", null, " a世a");
-    assertStringTrimRight("UTF8_BINARY", "asd", "x", "asd");
-    assertStringTrimRight("UTF8_BINARY", "xxasdxx", "x", "xxasd");
-    assertStringTrimRight("UTF8_BINARY", "xa世ax", "x", "xa世a");
-    assertStringTrimRight("UTF8_LCASE", "", "", "");
-    assertStringTrimRight("UTF8_LCASE", "", "xyz", "");
-    assertStringTrimRight("UTF8_LCASE", "asd", "", "asd");
-    assertStringTrimRight("UTF8_LCASE", "asd", null, "asd");
-    assertStringTrimRight("UTF8_LCASE", "  asd  ", null, "  asd");
-    assertStringTrimRight("UTF8_LCASE", " a世a ", null, " a世a");
-    assertStringTrimRight("UTF8_LCASE", "asd", "x", "asd");
-    assertStringTrimRight("UTF8_LCASE", "xxasdxx", "x", "xxasd");
-    assertStringTrimRight("UTF8_LCASE", "xa世ax", "x", "xa世a");
-    assertStringTrimRight("UNICODE", "", "", "");
-    assertStringTrimRight("UNICODE", "", "xyz", "");
-    assertStringTrimRight("UNICODE", "asd", "", "asd");
-    assertStringTrimRight("UNICODE", "asd", null, "asd");
-    assertStringTrimRight("UNICODE", "  asd  ", null, "  asd");
-    assertStringTrimRight("UNICODE", " a世a ", null, " a世a");
-    assertStringTrimRight("UNICODE", "asd", "x", "asd");
-    assertStringTrimRight("UNICODE", "xxasdxx", "x", "xxasd");
-    assertStringTrimRight("UNICODE", "xa世ax", "x", "xa世a");
-    assertStringTrimRight("UNICODE_CI", "", "", "");
-    assertStringTrimRight("UNICODE_CI", "", "xyz", "");
-    assertStringTrimRight("UNICODE_CI", "asd", "", "asd");
-    assertStringTrimRight("UNICODE_CI", "asd", null, "asd");
-    assertStringTrimRight("UNICODE_CI", "  asd  ", null, "  asd");
-    assertStringTrimRight("UNICODE_CI", " a世a ", null, " a世a");
-    assertStringTrimRight("UNICODE_CI", "asd", "x", "asd");
-    assertStringTrimRight("UNICODE_CI", "xxasdxx", "x", "xxasd");
-    assertStringTrimRight("UNICODE_CI", "xa世ax", "x", "xa世a");
+    assertStringTrimRight(UTF8_BINARY, "", "", "");
+    assertStringTrimRight(UTF8_BINARY, "", "xyz", "");
+    assertStringTrimRight(UTF8_BINARY, "asd", "", "asd");
+    assertStringTrimRight(UTF8_BINARY, "asd", null, "asd");
+    assertStringTrimRight(UTF8_BINARY, "  asd  ", null, "  asd");
+    assertStringTrimRight(UTF8_BINARY, " a世a ", null, " a世a");
+    assertStringTrimRight(UTF8_BINARY, "asd", "x", "asd");
+    assertStringTrimRight(UTF8_BINARY, "xxasdxx", "x", "xxasd");
+    assertStringTrimRight(UTF8_BINARY, "xa世ax", "x", "xa世a");
+    assertStringTrimRight(UTF8_LCASE, "", "", "");
+    assertStringTrimRight(UTF8_LCASE, "", "xyz", "");
+    assertStringTrimRight(UTF8_LCASE, "asd", "", "asd");
+    assertStringTrimRight(UTF8_LCASE, "asd", null, "asd");
+    assertStringTrimRight(UTF8_LCASE, "  asd  ", null, "  asd");
+    assertStringTrimRight(UTF8_LCASE, " a世a ", null, " a世a");
+    assertStringTrimRight(UTF8_LCASE, "asd", "x", "asd");
+    assertStringTrimRight(UTF8_LCASE, "xxasdxx", "x", "xxasd");
+    assertStringTrimRight(UTF8_LCASE, "xa世ax", "x", "xa世a");
+    assertStringTrimRight(UNICODE, "", "", "");
+    assertStringTrimRight(UNICODE, "", "xyz", "");
+    assertStringTrimRight(UNICODE, "asd", "", "asd");
+    assertStringTrimRight(UNICODE, "asd", null, "asd");
+    assertStringTrimRight(UNICODE, "  asd  ", null, "  asd");
+    assertStringTrimRight(UNICODE, " a世a ", null, " a世a");
+    assertStringTrimRight(UNICODE, "asd", "x", "asd");
+    assertStringTrimRight(UNICODE, "xxasdxx", "x", "xxasd");
+    assertStringTrimRight(UNICODE, "xa世ax", "x", "xa世a");
+    assertStringTrimRight(UNICODE_CI, "", "", "");
+    assertStringTrimRight(UNICODE_CI, "", "xyz", "");
+    assertStringTrimRight(UNICODE_CI, "asd", "", "asd");
+    assertStringTrimRight(UNICODE_CI, "asd", null, "asd");
+    assertStringTrimRight(UNICODE_CI, "  asd  ", null, "  asd");
+    assertStringTrimRight(UNICODE_CI, " a世a ", null, " a世a");
+    assertStringTrimRight(UNICODE_CI, "asd", "x", "asd");
+    assertStringTrimRight(UNICODE_CI, "xxasdxx", "x", "xxasd");
+    assertStringTrimRight(UNICODE_CI, "xa世ax", "x", "xa世a");
     // Case variation.
-    assertStringTrimRight("UTF8_BINARY", "ddsXXXaa", "asd", "ddsXXX");
-    assertStringTrimRight("UTF8_LCASE", "ddsXXXaa", "AsD", "ddsXXX");
-    assertStringTrimRight("UNICODE", "ddsXXXaa", "asd", "ddsXXX");
-    assertStringTrimRight("UNICODE_CI", "ddsXXXaa", "AsD", "ddsXXX");
+    assertStringTrimRight(UTF8_BINARY, "ddsXXXaa", "asd", "ddsXXX");
+    assertStringTrimRight(UTF8_LCASE, "ddsXXXaa", "AsD", "ddsXXX");
+    assertStringTrimRight(UNICODE, "ddsXXXaa", "asd", "ddsXXX");
+    assertStringTrimRight(UNICODE_CI, "ddsXXXaa", "AsD", "ddsXXX");
     // One-to-many case mapping (e.g. Turkish dotted I)..
-    assertStringTrimRight("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ");
-    assertStringTrimRight("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß");
-    assertStringTrimRight("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "Ëaaa");
-    assertStringTrimRight("UTF8_LCASE", "ẞaaaẞ", "ß", "ẞaaa");
-    assertStringTrimRight("UTF8_LCASE", "ßaaaß", "ẞ", "ßaaa");
-    assertStringTrimRight("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "Ëaaa");
-    assertStringTrimRight("UNICODE", "ẞaaaẞ", "ß", "ẞaaaẞ");
-    assertStringTrimRight("UNICODE", "ßaaaß", "ẞ", "ßaaaß");
-    assertStringTrimRight("UNICODE", "Ëaaaẞ", "Ëẞ", "Ëaaa");
-    assertStringTrimRight("UNICODE_CI", "ẞaaaẞ", "ß", "ẞaaa");
-    assertStringTrimRight("UNICODE_CI", "ßaaaß", "ẞ", "ßaaa");
-    assertStringTrimRight("UNICODE_CI", "Ëaaaẞ", "Ëẞ", "Ëaaa");
+    assertStringTrimRight(UTF8_BINARY, "ẞaaaẞ", "ß", "ẞaaaẞ");
+    assertStringTrimRight(UTF8_BINARY, "ßaaaß", "ẞ", "ßaaaß");
+    assertStringTrimRight(UTF8_BINARY, "Ëaaaẞ", "Ëẞ", "Ëaaa");
+    assertStringTrimRight(UTF8_LCASE, "ẞaaaẞ", "ß", "ẞaaa");
+    assertStringTrimRight(UTF8_LCASE, "ßaaaß", "ẞ", "ßaaa");
+    assertStringTrimRight(UTF8_LCASE, "Ëaaaẞ", "Ëẞ", "Ëaaa");
+    assertStringTrimRight(UNICODE, "ẞaaaẞ", "ß", "ẞaaaẞ");
+    assertStringTrimRight(UNICODE, "ßaaaß", "ẞ", "ßaaaß");
+    assertStringTrimRight(UNICODE, "Ëaaaẞ", "Ëẞ", "Ëaaa");
+    assertStringTrimRight(UNICODE_CI, "ẞaaaẞ", "ß", "ẞaaa");
+    assertStringTrimRight(UNICODE_CI, "ßaaaß", "ẞ", "ßaaa");
+    assertStringTrimRight(UNICODE_CI, "Ëaaaẞ", "Ëẞ", "Ëaaa");
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertStringTrimRight("UTF8_BINARY", "i", "i", "");
-    assertStringTrimRight("UTF8_BINARY", "iii", "I", "iii");
-    assertStringTrimRight("UTF8_BINARY", "I", "iii", "I");
-    assertStringTrimRight("UTF8_BINARY", "ixi", "i", "ix");
-    assertStringTrimRight("UTF8_BINARY", "i", "İ", "i");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307", "İ", "i\u0307");
-    assertStringTrimRight("UTF8_BINARY", "ii\u0307", "İi", "ii\u0307");
-    assertStringTrimRight("UTF8_BINARY", "iii\u0307", "İi", "iii\u0307");
-    assertStringTrimRight("UTF8_BINARY", "iiii\u0307", "iİ", "iiii\u0307");
-    assertStringTrimRight("UTF8_BINARY", "ii\u0307ii\u0307", "iİ", "ii\u0307ii\u0307");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307", "i", "i\u0307");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307", "\u0307", "i");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307", "i\u0307", "");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307i\u0307", "i\u0307", "");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307\u0307", "i\u0307", "");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307i", "i\u0307", "");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307İ", "i\u0307", "i\u0307İ");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307İ", "İ", "i\u0307");
-    assertStringTrimRight("UTF8_BINARY", "İ", "İ", "");
-    assertStringTrimRight("UTF8_BINARY", "IXi", "İ", "IXi");
-    assertStringTrimRight("UTF8_BINARY", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307x", "IXİ", "i\u0307x");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307x", "ix\u0307İ", "");
-    assertStringTrimRight("UTF8_BINARY", "İ", "i", "İ");
-    assertStringTrimRight("UTF8_BINARY", "İ", "\u0307", "İ");
-    assertStringTrimRight("UTF8_BINARY", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrimRight("UTF8_BINARY", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrimRight("UTF8_BINARY", "xi\u0307", "\u0307IX", "xi");
-    assertStringTrimRight("UTF8_LCASE", "i", "i", "");
-    assertStringTrimRight("UTF8_LCASE", "iii", "I", "");
-    assertStringTrimRight("UTF8_LCASE", "I", "iii", "");
-    assertStringTrimRight("UTF8_LCASE", "ixi", "i", "ix");
-    assertStringTrimRight("UTF8_LCASE", "i", "İ", "i");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307", "İ", "");
-    assertStringTrimRight("UTF8_LCASE", "ii\u0307", "İi", "");
-    assertStringTrimRight("UTF8_LCASE", "iii\u0307", "İi", "");
-    assertStringTrimRight("UTF8_LCASE", "iiii\u0307", "iİ", "");
-    assertStringTrimRight("UTF8_LCASE", "ii\u0307ii\u0307", "iİ", "");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307", "i", "i\u0307");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307", "\u0307", "i");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307", "i\u0307", "");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307i\u0307", "i\u0307", "");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307\u0307", "i\u0307", "");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307i", "i\u0307", "");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307İ", "i\u0307", "i\u0307İ");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307İ", "İ", "");
-    assertStringTrimRight("UTF8_LCASE", "İ", "İ", "");
-    assertStringTrimRight("UTF8_LCASE", "IXi", "İ", "IXi");
-    assertStringTrimRight("UTF8_LCASE", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307x", "IXİ", "");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307x", "I\u0307xİ", "");
-    assertStringTrimRight("UTF8_LCASE", "İ", "i", "İ");
-    assertStringTrimRight("UTF8_LCASE", "İ", "\u0307", "İ");
-    assertStringTrimRight("UTF8_LCASE", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrimRight("UTF8_LCASE", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrimRight("UTF8_LCASE", "xi\u0307", "\u0307IX", "");
-    assertStringTrimRight("UNICODE", "i", "i", "");
-    assertStringTrimRight("UNICODE", "iii", "I", "iii");
-    assertStringTrimRight("UNICODE", "I", "iii", "I");
-    assertStringTrimRight("UNICODE", "ixi", "i", "ix");
-    assertStringTrimRight("UNICODE", "i", "İ", "i");
-    assertStringTrimRight("UNICODE", "i\u0307", "İ", "i\u0307");
-    assertStringTrimRight("UTF8_BINARY", "ii\u0307", "İi", "ii\u0307");
-    assertStringTrimRight("UTF8_BINARY", "iii\u0307", "İi", "iii\u0307");
-    assertStringTrimRight("UTF8_BINARY", "iiii\u0307", "iİ", "iiii\u0307");
-    assertStringTrimRight("UTF8_BINARY", "ii\u0307ii\u0307", "iİ", "ii\u0307ii\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307", "i", "i\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307", "\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307i", "i\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrimRight("UNICODE", "i\u0307İ", "i\u0307", "i\u0307İ");
-    assertStringTrimRight("UNICODE", "i\u0307İ", "İ", "i\u0307");
-    assertStringTrimRight("UNICODE", "İ", "İ", "");
-    assertStringTrimRight("UNICODE", "IXi", "İ", "IXi");
-    assertStringTrimRight("UNICODE", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307x", "IXİ", "i\u0307x");
-    assertStringTrimRight("UNICODE", "i\u0307x", "ix\u0307İ", "i\u0307");
-    assertStringTrimRight("UNICODE", "İ", "i", "İ");
-    assertStringTrimRight("UNICODE", "İ", "\u0307", "İ");
-    assertStringTrimRight("UNICODE", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrimRight("UNICODE", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrimRight("UNICODE", "xi\u0307", "\u0307IX", "xi\u0307");
-    assertStringTrimRight("UNICODE_CI", "i", "i", "");
-    assertStringTrimRight("UNICODE_CI", "iii", "I", "");
-    assertStringTrimRight("UNICODE_CI", "I", "iii", "");
-    assertStringTrimRight("UNICODE_CI", "ixi", "i", "ix");
-    assertStringTrimRight("UNICODE_CI", "i", "İ", "i");
-    assertStringTrimRight("UNICODE_CI", "i\u0307", "İ", "");
-    assertStringTrimRight("UNICODE_CI", "ii\u0307", "İi", "");
-    assertStringTrimRight("UNICODE_CI", "iii\u0307", "İi", "");
-    assertStringTrimRight("UNICODE_CI", "iiii\u0307", "iİ", "");
-    assertStringTrimRight("UNICODE_CI", "ii\u0307ii\u0307", "iİ", "");
-    assertStringTrimRight("UNICODE_CI", "i\u0307", "i", "i\u0307");
-    assertStringTrimRight("UNICODE_CI", "i\u0307", "\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE_CI", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
-    assertStringTrimRight("UNICODE_CI", "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
-    assertStringTrimRight("UNICODE_CI", "i\u0307i", "i\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE_CI", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrimRight("UNICODE_CI", "i\u0307İ", "i\u0307", "i\u0307İ");
-    assertStringTrimRight("UNICODE_CI", "i\u0307İ", "İ", "");
-    assertStringTrimRight("UNICODE_CI", "İ", "İ", "");
-    assertStringTrimRight("UNICODE_CI", "IXi", "İ", "IXi");
-    assertStringTrimRight("UNICODE_CI", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrimRight("UNICODE_CI", "i\u0307x", "IXİ", "");
-    assertStringTrimRight("UNICODE_CI", "i\u0307x", "I\u0307xİ", "");
-    assertStringTrimRight("UNICODE_CI", "İ", "i", "İ");
-    assertStringTrimRight("UNICODE_CI", "İ", "\u0307", "İ");
-    assertStringTrimRight("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE_CI", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrimRight("UNICODE_CI", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrimRight("UNICODE_CI", "xi\u0307", "\u0307IX", "xi\u0307");
+    assertStringTrimRight(UTF8_BINARY, "i", "i", "");
+    assertStringTrimRight(UTF8_BINARY, "iii", "I", "iii");
+    assertStringTrimRight(UTF8_BINARY, "I", "iii", "I");
+    assertStringTrimRight(UTF8_BINARY, "ixi", "i", "ix");
+    assertStringTrimRight(UTF8_BINARY, "i", "İ", "i");
+    assertStringTrimRight(UTF8_BINARY, "i\u0307", "İ", "i\u0307");
+    assertStringTrimRight(UTF8_BINARY, "ii\u0307", "İi", "ii\u0307");
+    assertStringTrimRight(UTF8_BINARY, "iii\u0307", "İi", "iii\u0307");
+    assertStringTrimRight(UTF8_BINARY, "iiii\u0307", "iİ", "iiii\u0307");
+    assertStringTrimRight(UTF8_BINARY, "ii\u0307ii\u0307", "iİ", "ii\u0307ii\u0307");
+    assertStringTrimRight(UTF8_BINARY, "i\u0307", "i", "i\u0307");
+    assertStringTrimRight(UTF8_BINARY, "i\u0307", "\u0307", "i");
+    assertStringTrimRight(UTF8_BINARY, "i\u0307", "i\u0307", "");
+    assertStringTrimRight(UTF8_BINARY, "i\u0307i\u0307", "i\u0307", "");
+    assertStringTrimRight(UTF8_BINARY, "i\u0307\u0307", "i\u0307", "");
+    assertStringTrimRight(UTF8_BINARY, "i\u0307i", "i\u0307", "");
+    assertStringTrimRight(UTF8_BINARY, "i\u0307i", "İ", "i\u0307i");
+    assertStringTrimRight(UTF8_BINARY, "i\u0307İ", "i\u0307", "i\u0307İ");
+    assertStringTrimRight(UTF8_BINARY, "i\u0307İ", "İ", "i\u0307");
+    assertStringTrimRight(UTF8_BINARY, "İ", "İ", "");
+    assertStringTrimRight(UTF8_BINARY, "IXi", "İ", "IXi");
+    assertStringTrimRight(UTF8_BINARY, "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrimRight(UTF8_BINARY, "i\u0307x", "IXİ", "i\u0307x");
+    assertStringTrimRight(UTF8_BINARY, "i\u0307x", "ix\u0307İ", "");
+    assertStringTrimRight(UTF8_BINARY, "İ", "i", "İ");
+    assertStringTrimRight(UTF8_BINARY, "İ", "\u0307", "İ");
+    assertStringTrimRight(UTF8_BINARY, "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrimRight(UTF8_BINARY, "IXİ", "ix\u0307", "IXİ");
+    assertStringTrimRight(UTF8_BINARY, "xi\u0307", "\u0307IX", "xi");
+    assertStringTrimRight(UTF8_LCASE, "i", "i", "");
+    assertStringTrimRight(UTF8_LCASE, "iii", "I", "");
+    assertStringTrimRight(UTF8_LCASE, "I", "iii", "");
+    assertStringTrimRight(UTF8_LCASE, "ixi", "i", "ix");
+    assertStringTrimRight(UTF8_LCASE, "i", "İ", "i");
+    assertStringTrimRight(UTF8_LCASE, "i\u0307", "İ", "");
+    assertStringTrimRight(UTF8_LCASE, "ii\u0307", "İi", "");
+    assertStringTrimRight(UTF8_LCASE, "iii\u0307", "İi", "");
+    assertStringTrimRight(UTF8_LCASE, "iiii\u0307", "iİ", "");
+    assertStringTrimRight(UTF8_LCASE, "ii\u0307ii\u0307", "iİ", "");
+    assertStringTrimRight(UTF8_LCASE, "i\u0307", "i", "i\u0307");
+    assertStringTrimRight(UTF8_LCASE, "i\u0307", "\u0307", "i");
+    assertStringTrimRight(UTF8_LCASE, "i\u0307", "i\u0307", "");
+    assertStringTrimRight(UTF8_LCASE, "i\u0307i\u0307", "i\u0307", "");
+    assertStringTrimRight(UTF8_LCASE, "i\u0307\u0307", "i\u0307", "");
+    assertStringTrimRight(UTF8_LCASE, "i\u0307i", "i\u0307", "");
+    assertStringTrimRight(UTF8_LCASE, "i\u0307i", "İ", "i\u0307i");
+    assertStringTrimRight(UTF8_LCASE, "i\u0307İ", "i\u0307", "i\u0307İ");
+    assertStringTrimRight(UTF8_LCASE, "i\u0307İ", "İ", "");
+    assertStringTrimRight(UTF8_LCASE, "İ", "İ", "");
+    assertStringTrimRight(UTF8_LCASE, "IXi", "İ", "IXi");
+    assertStringTrimRight(UTF8_LCASE, "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrimRight(UTF8_LCASE, "i\u0307x", "IXİ", "");
+    assertStringTrimRight(UTF8_LCASE, "i\u0307x", "I\u0307xİ", "");
+    assertStringTrimRight(UTF8_LCASE, "İ", "i", "İ");
+    assertStringTrimRight(UTF8_LCASE, "İ", "\u0307", "İ");
+    assertStringTrimRight(UTF8_LCASE, "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrimRight(UTF8_LCASE, "IXİ", "ix\u0307", "IXİ");
+    assertStringTrimRight(UTF8_LCASE, "xi\u0307", "\u0307IX", "");
+    assertStringTrimRight(UNICODE, "i", "i", "");
+    assertStringTrimRight(UNICODE, "iii", "I", "iii");
+    assertStringTrimRight(UNICODE, "I", "iii", "I");
+    assertStringTrimRight(UNICODE, "ixi", "i", "ix");
+    assertStringTrimRight(UNICODE, "i", "İ", "i");
+    assertStringTrimRight(UNICODE, "i\u0307", "İ", "i\u0307");
+    assertStringTrimRight(UTF8_BINARY, "ii\u0307", "İi", "ii\u0307");
+    assertStringTrimRight(UTF8_BINARY, "iii\u0307", "İi", "iii\u0307");
+    assertStringTrimRight(UTF8_BINARY, "iiii\u0307", "iİ", "iiii\u0307");
+    assertStringTrimRight(UTF8_BINARY, "ii\u0307ii\u0307", "iİ", "ii\u0307ii\u0307");
+    assertStringTrimRight(UNICODE, "i\u0307", "i", "i\u0307");
+    assertStringTrimRight(UNICODE, "i\u0307", "\u0307", "i\u0307");
+    assertStringTrimRight(UNICODE, "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrimRight(UNICODE, "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
+    assertStringTrimRight(UNICODE, "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
+    assertStringTrimRight(UNICODE, "i\u0307i", "i\u0307", "i\u0307");
+    assertStringTrimRight(UNICODE, "i\u0307i", "İ", "i\u0307i");
+    assertStringTrimRight(UNICODE, "i\u0307İ", "i\u0307", "i\u0307İ");
+    assertStringTrimRight(UNICODE, "i\u0307İ", "İ", "i\u0307");
+    assertStringTrimRight(UNICODE, "İ", "İ", "");
+    assertStringTrimRight(UNICODE, "IXi", "İ", "IXi");
+    assertStringTrimRight(UNICODE, "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrimRight(UNICODE, "i\u0307x", "IXİ", "i\u0307x");
+    assertStringTrimRight(UNICODE, "i\u0307x", "ix\u0307İ", "i\u0307");
+    assertStringTrimRight(UNICODE, "İ", "i", "İ");
+    assertStringTrimRight(UNICODE, "İ", "\u0307", "İ");
+    assertStringTrimRight(UNICODE, "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrimRight(UNICODE, "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrimRight(UNICODE, "IXİ", "ix\u0307", "IXİ");
+    assertStringTrimRight(UNICODE, "xi\u0307", "\u0307IX", "xi\u0307");
+    assertStringTrimRight(UNICODE_CI, "i", "i", "");
+    assertStringTrimRight(UNICODE_CI, "iii", "I", "");
+    assertStringTrimRight(UNICODE_CI, "I", "iii", "");
+    assertStringTrimRight(UNICODE_CI, "ixi", "i", "ix");
+    assertStringTrimRight(UNICODE_CI, "i", "İ", "i");
+    assertStringTrimRight(UNICODE_CI, "i\u0307", "İ", "");
+    assertStringTrimRight(UNICODE_CI, "ii\u0307", "İi", "");
+    assertStringTrimRight(UNICODE_CI, "iii\u0307", "İi", "");
+    assertStringTrimRight(UNICODE_CI, "iiii\u0307", "iİ", "");
+    assertStringTrimRight(UNICODE_CI, "ii\u0307ii\u0307", "iİ", "");
+    assertStringTrimRight(UNICODE_CI, "i\u0307", "i", "i\u0307");
+    assertStringTrimRight(UNICODE_CI, "i\u0307", "\u0307", "i\u0307");
+    assertStringTrimRight(UNICODE_CI, "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrimRight(UNICODE_CI, "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
+    assertStringTrimRight(UNICODE_CI, "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
+    assertStringTrimRight(UNICODE_CI, "i\u0307i", "i\u0307", "i\u0307");
+    assertStringTrimRight(UNICODE_CI, "i\u0307i", "İ", "i\u0307i");
+    assertStringTrimRight(UNICODE_CI, "i\u0307İ", "i\u0307", "i\u0307İ");
+    assertStringTrimRight(UNICODE_CI, "i\u0307İ", "İ", "");
+    assertStringTrimRight(UNICODE_CI, "İ", "İ", "");
+    assertStringTrimRight(UNICODE_CI, "IXi", "İ", "IXi");
+    assertStringTrimRight(UNICODE_CI, "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrimRight(UNICODE_CI, "i\u0307x", "IXİ", "");
+    assertStringTrimRight(UNICODE_CI, "i\u0307x", "I\u0307xİ", "");
+    assertStringTrimRight(UNICODE_CI, "İ", "i", "İ");
+    assertStringTrimRight(UNICODE_CI, "İ", "\u0307", "İ");
+    assertStringTrimRight(UNICODE_CI, "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrimRight(UNICODE_CI, "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrimRight(UNICODE_CI, "IXİ", "ix\u0307", "IXİ");
+    assertStringTrimRight(UNICODE_CI, "xi\u0307", "\u0307IX", "xi\u0307");
     // Conditional case mapping (e.g. Greek sigmas).
-    assertStringTrimRight("UTF8_BINARY", "ςxς", "σ", "ςxς");
-    assertStringTrimRight("UTF8_BINARY", "ςxς", "ς", "ςx");
-    assertStringTrimRight("UTF8_BINARY", "ςxς", "Σ", "ςxς");
-    assertStringTrimRight("UTF8_BINARY", "σxσ", "σ", "σx");
-    assertStringTrimRight("UTF8_BINARY", "σxσ", "ς", "σxσ");
-    assertStringTrimRight("UTF8_BINARY", "σxσ", "Σ", "σxσ");
-    assertStringTrimRight("UTF8_BINARY", "ΣxΣ", "σ", "ΣxΣ");
-    assertStringTrimRight("UTF8_BINARY", "ΣxΣ", "ς", "ΣxΣ");
-    assertStringTrimRight("UTF8_BINARY", "ΣxΣ", "Σ", "Σx");
-    assertStringTrimRight("UTF8_LCASE", "ςxς", "σ", "ςx");
-    assertStringTrimRight("UTF8_LCASE", "ςxς", "ς", "ςx");
-    assertStringTrimRight("UTF8_LCASE", "ςxς", "Σ", "ςx");
-    assertStringTrimRight("UTF8_LCASE", "σxσ", "σ", "σx");
-    assertStringTrimRight("UTF8_LCASE", "σxσ", "ς", "σx");
-    assertStringTrimRight("UTF8_LCASE", "σxσ", "Σ", "σx");
-    assertStringTrimRight("UTF8_LCASE", "ΣxΣ", "σ", "Σx");
-    assertStringTrimRight("UTF8_LCASE", "ΣxΣ", "ς", "Σx");
-    assertStringTrimRight("UTF8_LCASE", "ΣxΣ", "Σ", "Σx");
-    assertStringTrimRight("UNICODE", "ςxς", "σ", "ςxς");
-    assertStringTrimRight("UNICODE", "ςxς", "ς", "ςx");
-    assertStringTrimRight("UNICODE", "ςxς", "Σ", "ςxς");
-    assertStringTrimRight("UNICODE", "σxσ", "σ", "σx");
-    assertStringTrimRight("UNICODE", "σxσ", "ς", "σxσ");
-    assertStringTrimRight("UNICODE", "σxσ", "Σ", "σxσ");
-    assertStringTrimRight("UNICODE", "ΣxΣ", "σ", "ΣxΣ");
-    assertStringTrimRight("UNICODE", "ΣxΣ", "ς", "ΣxΣ");
-    assertStringTrimRight("UNICODE", "ΣxΣ", "Σ", "Σx");
-    assertStringTrimRight("UNICODE_CI", "ςxς", "σ", "ςx");
-    assertStringTrimRight("UNICODE_CI", "ςxς", "ς", "ςx");
-    assertStringTrimRight("UNICODE_CI", "ςxς", "Σ", "ςx");
-    assertStringTrimRight("UNICODE_CI", "σxσ", "σ", "σx");
-    assertStringTrimRight("UNICODE_CI", "σxσ", "ς", "σx");
-    assertStringTrimRight("UNICODE_CI", "σxσ", "Σ", "σx");
-    assertStringTrimRight("UNICODE_CI", "ΣxΣ", "σ", "Σx");
-    assertStringTrimRight("UNICODE_CI", "ΣxΣ", "ς", "Σx");
-    assertStringTrimRight("UNICODE_CI", "ΣxΣ", "Σ", "Σx");
+    assertStringTrimRight(UTF8_BINARY, "ςxς", "σ", "ςxς");
+    assertStringTrimRight(UTF8_BINARY, "ςxς", "ς", "ςx");
+    assertStringTrimRight(UTF8_BINARY, "ςxς", "Σ", "ςxς");
+    assertStringTrimRight(UTF8_BINARY, "σxσ", "σ", "σx");
+    assertStringTrimRight(UTF8_BINARY, "σxσ", "ς", "σxσ");
+    assertStringTrimRight(UTF8_BINARY, "σxσ", "Σ", "σxσ");
+    assertStringTrimRight(UTF8_BINARY, "ΣxΣ", "σ", "ΣxΣ");
+    assertStringTrimRight(UTF8_BINARY, "ΣxΣ", "ς", "ΣxΣ");
+    assertStringTrimRight(UTF8_BINARY, "ΣxΣ", "Σ", "Σx");
+    assertStringTrimRight(UTF8_LCASE, "ςxς", "σ", "ςx");
+    assertStringTrimRight(UTF8_LCASE, "ςxς", "ς", "ςx");
+    assertStringTrimRight(UTF8_LCASE, "ςxς", "Σ", "ςx");
+    assertStringTrimRight(UTF8_LCASE, "σxσ", "σ", "σx");
+    assertStringTrimRight(UTF8_LCASE, "σxσ", "ς", "σx");
+    assertStringTrimRight(UTF8_LCASE, "σxσ", "Σ", "σx");
+    assertStringTrimRight(UTF8_LCASE, "ΣxΣ", "σ", "Σx");
+    assertStringTrimRight(UTF8_LCASE, "ΣxΣ", "ς", "Σx");
+    assertStringTrimRight(UTF8_LCASE, "ΣxΣ", "Σ", "Σx");
+    assertStringTrimRight(UNICODE, "ςxς", "σ", "ςxς");
+    assertStringTrimRight(UNICODE, "ςxς", "ς", "ςx");
+    assertStringTrimRight(UNICODE, "ςxς", "Σ", "ςxς");
+    assertStringTrimRight(UNICODE, "σxσ", "σ", "σx");
+    assertStringTrimRight(UNICODE, "σxσ", "ς", "σxσ");
+    assertStringTrimRight(UNICODE, "σxσ", "Σ", "σxσ");
+    assertStringTrimRight(UNICODE, "ΣxΣ", "σ", "ΣxΣ");
+    assertStringTrimRight(UNICODE, "ΣxΣ", "ς", "ΣxΣ");
+    assertStringTrimRight(UNICODE, "ΣxΣ", "Σ", "Σx");
+    assertStringTrimRight(UNICODE_CI, "ςxς", "σ", "ςx");
+    assertStringTrimRight(UNICODE_CI, "ςxς", "ς", "ςx");
+    assertStringTrimRight(UNICODE_CI, "ςxς", "Σ", "ςx");
+    assertStringTrimRight(UNICODE_CI, "σxσ", "σ", "σx");
+    assertStringTrimRight(UNICODE_CI, "σxσ", "ς", "σx");
+    assertStringTrimRight(UNICODE_CI, "σxσ", "Σ", "σx");
+    assertStringTrimRight(UNICODE_CI, "ΣxΣ", "σ", "Σx");
+    assertStringTrimRight(UNICODE_CI, "ΣxΣ", "ς", "Σx");
+    assertStringTrimRight(UNICODE_CI, "ΣxΣ", "Σ", "Σx");
     // Unicode normalization.
-    assertStringTrimRight("UTF8_BINARY", "åβγδa\u030A", "å", "åβγδa\u030A");
-    assertStringTrimRight("UTF8_LCASE", "åβγδa\u030A", "Å", "åβγδa\u030A");
-    assertStringTrimRight("UNICODE", "åβγδa\u030A", "å", "åβγδ");
-    assertStringTrimRight("UNICODE_CI", "åβγδa\u030A", "Å", "åβγδ");
+    assertStringTrimRight(UTF8_BINARY, "åβγδa\u030A", "å", "åβγδa\u030A");
+    assertStringTrimRight(UTF8_LCASE, "åβγδa\u030A", "Å", "åβγδa\u030A");
+    assertStringTrimRight(UNICODE, "åβγδa\u030A", "å", "åβγδ");
+    assertStringTrimRight(UNICODE_CI, "åβγδa\u030A", "Å", "åβγδ");
     // Surrogate pairs.
-    assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "🙃", "a🙃b🙃c");
-    assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "🙃", "a🙃b🙃c");
-    assertStringTrimRight("UNICODE", "a🙃b🙃c", "🙃", "a🙃b🙃c");
-    assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "🙃", "a🙃b🙃c");
-    assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "c", "a🙃b🙃");
-    assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "c", "a🙃b🙃");
-    assertStringTrimRight("UNICODE", "a🙃b🙃c", "c", "a🙃b🙃");
-    assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "c", "a🙃b🙃");
-    assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "c🙃", "a🙃b");
-    assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "c🙃", "a🙃b");
-    assertStringTrimRight("UNICODE", "a🙃b🙃c", "c🙃", "a🙃b");
-    assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "c🙃", "a🙃b");
-    assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "c🙃b", "a");
-    assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "c🙃b", "a");
-    assertStringTrimRight("UNICODE", "a🙃b🙃c", "c🙃b", "a");
-    assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "c🙃b", "a");
-    assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "abc🙃", "");
-    assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "abc🙃", "");
-    assertStringTrimRight("UNICODE", "a🙃b🙃c", "abc🙃", "");
-    assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "abc🙃", "");
-    assertStringTrimRight("UTF8_BINARY", "😀😆😃😄", "😆😃", "😀😆😃😄");
-    assertStringTrimRight("UTF8_LCASE", "😀😆😃😄", "😆😃", "😀😆😃😄");
-    assertStringTrimRight("UNICODE", "😀😆😃😄", "😆😃", "😀😆😃😄");
-    assertStringTrimRight("UNICODE_CI", "😀😆😃😄", "😆😃", "😀😆😃😄");
-    assertStringTrimRight("UTF8_BINARY", "😀😆😃😄", "😃😄", "😀😆");
-    assertStringTrimRight("UTF8_LCASE", "😀😆😃😄", "😃😄", "😀😆");
-    assertStringTrimRight("UNICODE", "😀😆😃😄", "😃😄", "😀😆");
-    assertStringTrimRight("UNICODE_CI", "😀😆😃😄", "😃😄", "😀😆");
-    assertStringTrimRight("UTF8_BINARY", "😀😆😃😄", "😀😆😃😄", "");
-    assertStringTrimRight("UTF8_LCASE", "😀😆😃😄", "😀😆😃😄", "");
-    assertStringTrimRight("UNICODE", "😀😆😃😄", "😀😆😃😄", "");
-    assertStringTrimRight("UNICODE_CI", "😀😆😃😄", "😀😆😃😄", "");
-    assertStringTrimRight("UTF8_BINARY", "𐐅", "𐐅", "");
-    assertStringTrimRight("UTF8_LCASE", "𐐅", "𐐅", "");
-    assertStringTrimRight("UNICODE", "𐐅", "𐐅", "");
-    assertStringTrimRight("UNICODE_CI", "𐐅", "𐐅", "");
-    assertStringTrimRight("UTF8_BINARY", "𐐅", "𐐭", "𐐅");
-    assertStringTrimRight("UTF8_LCASE", "𐐅", "𐐭", "");
-    assertStringTrimRight("UNICODE", "𐐅", "𐐭", "𐐅");
-    assertStringTrimRight("UNICODE_CI", "𐐅", "𐐭", "");
-    assertStringTrimRight("UTF8_BINARY", "𝔸", "𝔸", "");
-    assertStringTrimRight("UTF8_LCASE", "𝔸", "𝔸", "");
-    assertStringTrimRight("UNICODE", "𝔸", "𝔸", "");
-    assertStringTrimRight("UNICODE_CI", "𝔸", "𝔸", "");
-    assertStringTrimRight("UTF8_BINARY", "𝔸", "A", "𝔸");
-    assertStringTrimRight("UTF8_LCASE", "𝔸", "A", "𝔸");
-    assertStringTrimRight("UNICODE", "𝔸", "A", "𝔸");
-    assertStringTrimRight("UNICODE_CI", "𝔸", "A", "");
-    assertStringTrimRight("UTF8_BINARY", "𝔸", "a", "𝔸");
-    assertStringTrimRight("UTF8_LCASE", "𝔸", "a", "𝔸");
-    assertStringTrimRight("UNICODE", "𝔸", "a", "𝔸");
-    assertStringTrimRight("UNICODE_CI", "𝔸", "a", "");
+    assertStringTrimRight(UTF8_BINARY, "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimRight(UTF8_LCASE, "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimRight(UNICODE, "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimRight(UNICODE_CI, "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimRight(UTF8_BINARY, "a🙃b🙃c", "c", "a🙃b🙃");
+    assertStringTrimRight(UTF8_LCASE, "a🙃b🙃c", "c", "a🙃b🙃");
+    assertStringTrimRight(UNICODE, "a🙃b🙃c", "c", "a🙃b🙃");
+    assertStringTrimRight(UNICODE_CI, "a🙃b🙃c", "c", "a🙃b🙃");
+    assertStringTrimRight(UTF8_BINARY, "a🙃b🙃c", "c🙃", "a🙃b");
+    assertStringTrimRight(UTF8_LCASE, "a🙃b🙃c", "c🙃", "a🙃b");
+    assertStringTrimRight(UNICODE, "a🙃b🙃c", "c🙃", "a🙃b");
+    assertStringTrimRight(UNICODE_CI, "a🙃b🙃c", "c🙃", "a🙃b");
+    assertStringTrimRight(UTF8_BINARY, "a🙃b🙃c", "c🙃b", "a");
+    assertStringTrimRight(UTF8_LCASE, "a🙃b🙃c", "c🙃b", "a");
+    assertStringTrimRight(UNICODE, "a🙃b🙃c", "c🙃b", "a");
+    assertStringTrimRight(UNICODE_CI, "a🙃b🙃c", "c🙃b", "a");
+    assertStringTrimRight(UTF8_BINARY, "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimRight(UTF8_LCASE, "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimRight(UNICODE, "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimRight(UNICODE_CI, "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimRight(UTF8_BINARY, "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimRight(UTF8_LCASE, "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimRight(UNICODE, "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimRight(UNICODE_CI, "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimRight(UTF8_BINARY, "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrimRight(UTF8_LCASE, "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrimRight(UNICODE, "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrimRight(UNICODE_CI, "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrimRight(UTF8_BINARY, "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimRight(UTF8_LCASE, "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimRight(UNICODE, "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimRight(UNICODE_CI, "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimRight(UTF8_BINARY, "𐐅", "𐐅", "");
+    assertStringTrimRight(UTF8_LCASE, "𐐅", "𐐅", "");
+    assertStringTrimRight(UNICODE, "𐐅", "𐐅", "");
+    assertStringTrimRight(UNICODE_CI, "𐐅", "𐐅", "");
+    assertStringTrimRight(UTF8_BINARY, "𐐅", "𐐭", "𐐅");
+    assertStringTrimRight(UTF8_LCASE, "𐐅", "𐐭", "");
+    assertStringTrimRight(UNICODE, "𐐅", "𐐭", "𐐅");
+    assertStringTrimRight(UNICODE_CI, "𐐅", "𐐭", "");
+    assertStringTrimRight(UTF8_BINARY, "𝔸", "𝔸", "");
+    assertStringTrimRight(UTF8_LCASE, "𝔸", "𝔸", "");
+    assertStringTrimRight(UNICODE, "𝔸", "𝔸", "");
+    assertStringTrimRight(UNICODE_CI, "𝔸", "𝔸", "");
+    assertStringTrimRight(UTF8_BINARY, "𝔸", "A", "𝔸");
+    assertStringTrimRight(UTF8_LCASE, "𝔸", "A", "𝔸");
+    assertStringTrimRight(UNICODE, "𝔸", "A", "𝔸");
+    assertStringTrimRight(UNICODE_CI, "𝔸", "A", "");
+    assertStringTrimRight(UTF8_BINARY, "𝔸", "a", "𝔸");
+    assertStringTrimRight(UTF8_LCASE, "𝔸", "a", "𝔸");
+    assertStringTrimRight(UNICODE, "𝔸", "a", "𝔸");
+    assertStringTrimRight(UNICODE_CI, "𝔸", "a", "");
   }
 
   /**
@@ -3664,211 +3665,211 @@ private void assertStringTranslate(String inputString, String matchingString,
   @Test
   public void testStringTranslate() throws SparkException {
     // Empty strings.
-    assertStringTranslate("", "", "", "UTF8_BINARY", "");
-    assertStringTranslate("", "", "", "UTF8_LCASE", "");
-    assertStringTranslate("", "", "", "UNICODE", "");
-    assertStringTranslate("", "", "", "UNICODE_CI", "");
-    assertStringTranslate("abc", "", "", "UTF8_BINARY", "abc");
-    assertStringTranslate("abc", "", "", "UTF8_LCASE", "abc");
-    assertStringTranslate("abc", "", "", "UNICODE", "abc");
-    assertStringTranslate("abc", "", "", "UNICODE_CI", "abc");
-    assertStringTranslate("", "b", "", "UTF8_BINARY", "");
-    assertStringTranslate("", "b", "", "UTF8_LCASE", "");
-    assertStringTranslate("", "b", "", "UNICODE", "");
-    assertStringTranslate("", "b", "", "UNICODE_CI", "");
-    assertStringTranslate("", "", "x", "UTF8_BINARY", "");
-    assertStringTranslate("", "", "x", "UTF8_LCASE", "");
-    assertStringTranslate("", "", "x", "UNICODE", "");
-    assertStringTranslate("", "", "x", "UNICODE_CI", "");
-    assertStringTranslate("abc", "b", "", "UTF8_BINARY", "ac");
-    assertStringTranslate("abc", "b", "", "UTF8_LCASE", "ac");
-    assertStringTranslate("abc", "b", "", "UNICODE", "ac");
-    assertStringTranslate("abc", "b", "", "UNICODE_CI", "ac");
-    assertStringTranslate("abc", "", "x", "UTF8_BINARY", "abc");
-    assertStringTranslate("abc", "", "x", "UTF8_LCASE", "abc");
-    assertStringTranslate("abc", "", "x", "UNICODE", "abc");
-    assertStringTranslate("abc", "", "x", "UNICODE_CI", "abc");
-    assertStringTranslate("", "b", "x", "UTF8_BINARY", "");
-    assertStringTranslate("", "b", "x", "UTF8_LCASE", "");
-    assertStringTranslate("", "b", "x", "UNICODE", "");
-    assertStringTranslate("", "b", "x", "UNICODE_CI", "");
+    assertStringTranslate("", "", "", UTF8_BINARY, "");
+    assertStringTranslate("", "", "", UTF8_LCASE, "");
+    assertStringTranslate("", "", "", UNICODE, "");
+    assertStringTranslate("", "", "", UNICODE_CI, "");
+    assertStringTranslate("abc", "", "", UTF8_BINARY, "abc");
+    assertStringTranslate("abc", "", "", UTF8_LCASE, "abc");
+    assertStringTranslate("abc", "", "", UNICODE, "abc");
+    assertStringTranslate("abc", "", "", UNICODE_CI, "abc");
+    assertStringTranslate("", "b", "", UTF8_BINARY, "");
+    assertStringTranslate("", "b", "", UTF8_LCASE, "");
+    assertStringTranslate("", "b", "", UNICODE, "");
+    assertStringTranslate("", "b", "", UNICODE_CI, "");
+    assertStringTranslate("", "", "x", UTF8_BINARY, "");
+    assertStringTranslate("", "", "x", UTF8_LCASE, "");
+    assertStringTranslate("", "", "x", UNICODE, "");
+    assertStringTranslate("", "", "x", UNICODE_CI, "");
+    assertStringTranslate("abc", "b", "", UTF8_BINARY, "ac");
+    assertStringTranslate("abc", "b", "", UTF8_LCASE, "ac");
+    assertStringTranslate("abc", "b", "", UNICODE, "ac");
+    assertStringTranslate("abc", "b", "", UNICODE_CI, "ac");
+    assertStringTranslate("abc", "", "x", UTF8_BINARY, "abc");
+    assertStringTranslate("abc", "", "x", UTF8_LCASE, "abc");
+    assertStringTranslate("abc", "", "x", UNICODE, "abc");
+    assertStringTranslate("abc", "", "x", UNICODE_CI, "abc");
+    assertStringTranslate("", "b", "x", UTF8_BINARY, "");
+    assertStringTranslate("", "b", "x", UTF8_LCASE, "");
+    assertStringTranslate("", "b", "x", UNICODE, "");
+    assertStringTranslate("", "b", "x", UNICODE_CI, "");
     // Basic tests.
-    assertStringTranslate("abc", "b", "x", "UTF8_BINARY", "axc");
-    assertStringTranslate("abc", "b", "x", "UTF8_LCASE", "axc");
-    assertStringTranslate("abc", "b", "x", "UNICODE", "axc");
-    assertStringTranslate("abc", "b", "x", "UNICODE_CI", "axc");
-    assertStringTranslate("Translate", "Rnlt", "12", "UTF8_BINARY", "Tra2sae");
-    assertStringTranslate("Translate", "Rnlt", "12", "UTF8_LCASE", "1a2sae");
-    assertStringTranslate("Translate", "Rnlt", "12", "UNICODE", "Tra2sae");
-    assertStringTranslate("Translate", "Rnlt", "12", "UNICODE_CI", "1a2sae");
-    assertStringTranslate("Translate", "Rn", "1234", "UTF8_BINARY", "Tra2slate");
-    assertStringTranslate("Translate", "Rn", "1234", "UTF8_LCASE", "T1a2slate");
-    assertStringTranslate("Translate", "Rn", "1234", "UNICODE", "Tra2slate");
-    assertStringTranslate("Translate", "Rn", "1234", "UNICODE_CI", "T1a2slate");
-    assertStringTranslate("Translate", "Rnlt", "1234", "UTF8_BINARY", "Tra2s3a4e");
-    assertStringTranslate("Translate", "Rnlt", "1234", "UTF8_LCASE", "41a2s3a4e");
-    assertStringTranslate("Translate", "Rnlt", "1234", "UNICODE", "Tra2s3a4e");
-    assertStringTranslate("Translate", "Rnlt", "1234", "UNICODE_CI", "41a2s3a4e");
-    assertStringTranslate("TRanslate", "rnlt", "XxXx", "UTF8_BINARY", "TRaxsXaxe");
-    assertStringTranslate("TRanslate", "rnlt", "XxXx", "UTF8_LCASE", "xXaxsXaxe");
-    assertStringTranslate("TRanslate", "rnlt", "XxXx", "UNICODE", "TRaxsXaxe");
-    assertStringTranslate("TRanslate", "rnlt", "XxXx", "UNICODE_CI", "xXaxsXaxe");
-    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UTF8_BINARY", "TxaxsXaxeX");
-    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UTF8_LCASE", "xxaxsXaxex");
-    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UNICODE", "TxaxsXaxeX");
-    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UNICODE_CI", "xxaxsXaxex");
-    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UTF8_BINARY", "TXaxsXaxex");
-    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UTF8_LCASE", "xXaxsXaxeX");
-    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UNICODE", "TXaxsXaxex");
-    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UNICODE_CI", "xXaxsXaxeX");
-    assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UTF8_BINARY", "test大千世AX大千世A");
-    assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UTF8_LCASE", "test大千世AB大千世A");
-    assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UNICODE", "test大千世AX大千世A");
-    assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UNICODE_CI", "test大千世AB大千世A");
-    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UTF8_BINARY", "大千世界test大千世界");
-    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UTF8_LCASE", "大千世界abca大千世界");
-    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UNICODE", "大千世界test大千世界");
-    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UNICODE_CI", "大千世界abca大千世界");
-    assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UTF8_BINARY", "Oeso大千世界大千世界");
-    assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UTF8_LCASE", "oeso大千世界大千世界");
-    assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UNICODE", "Oeso大千世界大千世界");
-    assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UNICODE_CI", "oeso大千世界大千世界");
-    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UTF8_BINARY", "大千世界大千世界oesO");
-    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UTF8_LCASE", "大千世界大千世界OesO");
-    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UNICODE", "大千世界大千世界oesO");
-    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UNICODE_CI", "大千世界大千世界OesO");
-    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UTF8_BINARY", "世世世界世世世界tesT");
-    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UTF8_LCASE", "世世世界世世世界tesT");
-    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UNICODE", "世世世界世世世界tesT");
-    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UNICODE_CI", "世世世界世世世界tesT");
-    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UTF8_BINARY", "Tr4234e");
-    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UTF8_LCASE", "14234e");
-    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UNICODE", "Tr4234e");
-    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UNICODE_CI", "14234e");
-    assertStringTranslate("Translate", "Rnlt", "123495834634", "UTF8_BINARY", "Tra2s3a4e");
-    assertStringTranslate("Translate", "Rnlt", "123495834634", "UTF8_LCASE", "41a2s3a4e");
-    assertStringTranslate("Translate", "Rnlt", "123495834634", "UNICODE", "Tra2s3a4e");
-    assertStringTranslate("Translate", "Rnlt", "123495834634", "UNICODE_CI", "41a2s3a4e");
-    assertStringTranslate("abcdef", "abcde", "123", "UTF8_BINARY", "123f");
-    assertStringTranslate("abcdef", "abcde", "123", "UTF8_LCASE", "123f");
-    assertStringTranslate("abcdef", "abcde", "123", "UNICODE", "123f");
-    assertStringTranslate("abcdef", "abcde", "123", "UNICODE_CI", "123f");
+    assertStringTranslate("abc", "b", "x", UTF8_BINARY, "axc");
+    assertStringTranslate("abc", "b", "x", UTF8_LCASE, "axc");
+    assertStringTranslate("abc", "b", "x", UNICODE, "axc");
+    assertStringTranslate("abc", "b", "x", UNICODE_CI, "axc");
+    assertStringTranslate("Translate", "Rnlt", "12", UTF8_BINARY, "Tra2sae");
+    assertStringTranslate("Translate", "Rnlt", "12", UTF8_LCASE, "1a2sae");
+    assertStringTranslate("Translate", "Rnlt", "12", UNICODE, "Tra2sae");
+    assertStringTranslate("Translate", "Rnlt", "12", UNICODE_CI, "1a2sae");
+    assertStringTranslate("Translate", "Rn", "1234", UTF8_BINARY, "Tra2slate");
+    assertStringTranslate("Translate", "Rn", "1234", UTF8_LCASE, "T1a2slate");
+    assertStringTranslate("Translate", "Rn", "1234", UNICODE, "Tra2slate");
+    assertStringTranslate("Translate", "Rn", "1234", UNICODE_CI, "T1a2slate");
+    assertStringTranslate("Translate", "Rnlt", "1234", UTF8_BINARY, "Tra2s3a4e");
+    assertStringTranslate("Translate", "Rnlt", "1234", UTF8_LCASE, "41a2s3a4e");
+    assertStringTranslate("Translate", "Rnlt", "1234", UNICODE, "Tra2s3a4e");
+    assertStringTranslate("Translate", "Rnlt", "1234", UNICODE_CI, "41a2s3a4e");
+    assertStringTranslate("TRanslate", "rnlt", "XxXx", UTF8_BINARY, "TRaxsXaxe");
+    assertStringTranslate("TRanslate", "rnlt", "XxXx", UTF8_LCASE, "xXaxsXaxe");
+    assertStringTranslate("TRanslate", "rnlt", "XxXx", UNICODE, "TRaxsXaxe");
+    assertStringTranslate("TRanslate", "rnlt", "XxXx", UNICODE_CI, "xXaxsXaxe");
+    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", UTF8_BINARY, "TxaxsXaxeX");
+    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", UTF8_LCASE, "xxaxsXaxex");
+    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", UNICODE, "TxaxsXaxeX");
+    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", UNICODE_CI, "xxaxsXaxex");
+    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", UTF8_BINARY, "TXaxsXaxex");
+    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", UTF8_LCASE, "xXaxsXaxeX");
+    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", UNICODE, "TXaxsXaxex");
+    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", UNICODE_CI, "xXaxsXaxeX");
+    assertStringTranslate("test大千世界X大千世界", "界x", "AB", UTF8_BINARY, "test大千世AX大千世A");
+    assertStringTranslate("test大千世界X大千世界", "界x", "AB", UTF8_LCASE, "test大千世AB大千世A");
+    assertStringTranslate("test大千世界X大千世界", "界x", "AB", UNICODE, "test大千世AX大千世A");
+    assertStringTranslate("test大千世界X大千世界", "界x", "AB", UNICODE_CI, "test大千世AB大千世A");
+    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", UTF8_BINARY, "大千世界test大千世界");
+    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", UTF8_LCASE, "大千世界abca大千世界");
+    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", UNICODE, "大千世界test大千世界");
+    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", UNICODE_CI, "大千世界abca大千世界");
+    assertStringTranslate("Test大千世界大千世界", "tT", "oO", UTF8_BINARY, "Oeso大千世界大千世界");
+    assertStringTranslate("Test大千世界大千世界", "tT", "oO", UTF8_LCASE, "oeso大千世界大千世界");
+    assertStringTranslate("Test大千世界大千世界", "tT", "oO", UNICODE, "Oeso大千世界大千世界");
+    assertStringTranslate("Test大千世界大千世界", "tT", "oO", UNICODE_CI, "oeso大千世界大千世界");
+    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", UTF8_BINARY, "大千世界大千世界oesO");
+    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", UTF8_LCASE, "大千世界大千世界OesO");
+    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", UNICODE, "大千世界大千世界oesO");
+    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", UNICODE_CI, "大千世界大千世界OesO");
+    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", UTF8_BINARY, "世世世界世世世界tesT");
+    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", UTF8_LCASE, "世世世界世世世界tesT");
+    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", UNICODE, "世世世界世世世界tesT");
+    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", UNICODE_CI, "世世世界世世世界tesT");
+    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", UTF8_BINARY, "Tr4234e");
+    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", UTF8_LCASE, "14234e");
+    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", UNICODE, "Tr4234e");
+    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", UNICODE_CI, "14234e");
+    assertStringTranslate("Translate", "Rnlt", "123495834634", UTF8_BINARY, "Tra2s3a4e");
+    assertStringTranslate("Translate", "Rnlt", "123495834634", UTF8_LCASE, "41a2s3a4e");
+    assertStringTranslate("Translate", "Rnlt", "123495834634", UNICODE, "Tra2s3a4e");
+    assertStringTranslate("Translate", "Rnlt", "123495834634", UNICODE_CI, "41a2s3a4e");
+    assertStringTranslate("abcdef", "abcde", "123", UTF8_BINARY, "123f");
+    assertStringTranslate("abcdef", "abcde", "123", UTF8_LCASE, "123f");
+    assertStringTranslate("abcdef", "abcde", "123", UNICODE, "123f");
+    assertStringTranslate("abcdef", "abcde", "123", UNICODE_CI, "123f");
     assertStringTranslate("abcdëÈêf", "ÊèË", "123", "AF_CI", "abcd321f");
     // One-to-many case mapping (e.g. Turkish dotted I).
-    assertStringTranslate("İ", "i\u0307", "xy", "UTF8_BINARY", "İ");
-    assertStringTranslate("İ", "i\u0307", "xy", "UTF8_LCASE", "İ");
-    assertStringTranslate("İ", "i\u0307", "xy", "UNICODE", "İ");
-    assertStringTranslate("İ", "i\u0307", "xy", "UNICODE_CI", "İ");
-    assertStringTranslate("i\u0307", "İ", "xy", "UTF8_BINARY", "i\u0307");
-    assertStringTranslate("i\u0307", "İ", "xy", "UTF8_LCASE", "x");
-    assertStringTranslate("i\u0307", "İ", "xy", "UNICODE", "i\u0307");
-    assertStringTranslate("i\u0307", "İ", "xy", "UNICODE_CI", "x");
-    assertStringTranslate("i\u030A", "İ", "x", "UTF8_BINARY", "i\u030A");
-    assertStringTranslate("i\u030A", "İ", "x", "UTF8_LCASE", "i\u030A");
-    assertStringTranslate("i\u030A", "İ", "x", "UNICODE", "i\u030A");
-    assertStringTranslate("i\u030A", "İ", "x", "UNICODE_CI", "i\u030A");
-    assertStringTranslate("i\u030A", "İi", "xy", "UTF8_BINARY", "y\u030A");
-    assertStringTranslate("i\u030A", "İi", "xy", "UTF8_LCASE", "y\u030A");
-    assertStringTranslate("i\u030A", "İi", "xy", "UNICODE", "i\u030A");
-    assertStringTranslate("i\u030A", "İi", "xy", "UNICODE_CI", "i\u030A");
-    assertStringTranslate("İi\u0307", "İi\u0307", "123", "UTF8_BINARY", "123");
-    assertStringTranslate("İi\u0307", "İi\u0307", "123", "UTF8_LCASE", "11");
-    assertStringTranslate("İi\u0307", "İi\u0307", "123", "UNICODE", "1i\u0307");
-    assertStringTranslate("İi\u0307", "İi\u0307", "123", "UNICODE_CI", "11");
-    assertStringTranslate("İi\u0307", "İyz", "123", "UTF8_BINARY", "1i\u0307");
-    assertStringTranslate("İi\u0307", "İyz", "123", "UTF8_LCASE", "11");
-    assertStringTranslate("İi\u0307", "İyz", "123", "UNICODE", "1i\u0307");
-    assertStringTranslate("İi\u0307", "İyz", "123", "UNICODE_CI", "11");
-    assertStringTranslate("İi\u0307", "xi\u0307", "123", "UTF8_BINARY", "İ23");
-    assertStringTranslate("İi\u0307", "xi\u0307", "123", "UTF8_LCASE", "İ23");
-    assertStringTranslate("İi\u0307", "xi\u0307", "123", "UNICODE", "İi\u0307");
-    assertStringTranslate("İi\u0307", "xi\u0307", "123", "UNICODE_CI", "İi\u0307");
-    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UTF8_BINARY", "12bc3");
-    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UTF8_LCASE", "12bc3");
-    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UNICODE", "3bc3");
-    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UNICODE_CI", "3bc3");
-    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UTF8_BINARY", "a2bcå");
-    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UTF8_LCASE", "12bc3");
-    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UNICODE", "a\u030Abcå");
-    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UNICODE_CI", "3bc3");
-    assertStringTranslate("a\u030AβφδI\u0307", "Iİaå", "1234", "UTF8_BINARY", "3\u030Aβφδ1\u0307");
-    assertStringTranslate("A\u030Aβφδi\u0307", "Iİaå", "1234", "UTF8_LCASE", "3\u030Aβφδ2");
-    assertStringTranslate("a\u030AβφδI\u0307", "Iİaå", "1234", "UNICODE", "4βφδ2");
-    assertStringTranslate("A\u030Aβφδi\u0307", "Iİaå", "1234", "UNICODE_CI", "4βφδ2");
+    assertStringTranslate("İ", "i\u0307", "xy", UTF8_BINARY, "İ");
+    assertStringTranslate("İ", "i\u0307", "xy", UTF8_LCASE, "İ");
+    assertStringTranslate("İ", "i\u0307", "xy", UNICODE, "İ");
+    assertStringTranslate("İ", "i\u0307", "xy", UNICODE_CI, "İ");
+    assertStringTranslate("i\u0307", "İ", "xy", UTF8_BINARY, "i\u0307");
+    assertStringTranslate("i\u0307", "İ", "xy", UTF8_LCASE, "x");
+    assertStringTranslate("i\u0307", "İ", "xy", UNICODE, "i\u0307");
+    assertStringTranslate("i\u0307", "İ", "xy", UNICODE_CI, "x");
+    assertStringTranslate("i\u030A", "İ", "x", UTF8_BINARY, "i\u030A");
+    assertStringTranslate("i\u030A", "İ", "x", UTF8_LCASE, "i\u030A");
+    assertStringTranslate("i\u030A", "İ", "x", UNICODE, "i\u030A");
+    assertStringTranslate("i\u030A", "İ", "x", UNICODE_CI, "i\u030A");
+    assertStringTranslate("i\u030A", "İi", "xy", UTF8_BINARY, "y\u030A");
+    assertStringTranslate("i\u030A", "İi", "xy", UTF8_LCASE, "y\u030A");
+    assertStringTranslate("i\u030A", "İi", "xy", UNICODE, "i\u030A");
+    assertStringTranslate("i\u030A", "İi", "xy", UNICODE_CI, "i\u030A");
+    assertStringTranslate("İi\u0307", "İi\u0307", "123", UTF8_BINARY, "123");
+    assertStringTranslate("İi\u0307", "İi\u0307", "123", UTF8_LCASE, "11");
+    assertStringTranslate("İi\u0307", "İi\u0307", "123", UNICODE, "1i\u0307");
+    assertStringTranslate("İi\u0307", "İi\u0307", "123", UNICODE_CI, "11");
+    assertStringTranslate("İi\u0307", "İyz", "123", UTF8_BINARY, "1i\u0307");
+    assertStringTranslate("İi\u0307", "İyz", "123", UTF8_LCASE, "11");
+    assertStringTranslate("İi\u0307", "İyz", "123", UNICODE, "1i\u0307");
+    assertStringTranslate("İi\u0307", "İyz", "123", UNICODE_CI, "11");
+    assertStringTranslate("İi\u0307", "xi\u0307", "123", UTF8_BINARY, "İ23");
+    assertStringTranslate("İi\u0307", "xi\u0307", "123", UTF8_LCASE, "İ23");
+    assertStringTranslate("İi\u0307", "xi\u0307", "123", UNICODE, "İi\u0307");
+    assertStringTranslate("İi\u0307", "xi\u0307", "123", UNICODE_CI, "İi\u0307");
+    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", UTF8_BINARY, "12bc3");
+    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", UTF8_LCASE, "12bc3");
+    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", UNICODE, "3bc3");
+    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", UNICODE_CI, "3bc3");
+    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", UTF8_BINARY, "a2bcå");
+    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", UTF8_LCASE, "12bc3");
+    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", UNICODE, "a\u030Abcå");
+    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", UNICODE_CI, "3bc3");
+    assertStringTranslate("a\u030AβφδI\u0307", "Iİaå", "1234", UTF8_BINARY, "3\u030Aβφδ1\u0307");
+    assertStringTranslate("A\u030Aβφδi\u0307", "Iİaå", "1234", UTF8_LCASE, "3\u030Aβφδ2");
+    assertStringTranslate("a\u030AβφδI\u0307", "Iİaå", "1234", UNICODE, "4βφδ2");
+    assertStringTranslate("A\u030Aβφδi\u0307", "Iİaå", "1234", UNICODE_CI, "4βφδ2");
     // Conditional case mapping (e.g. Greek sigmas).
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", "UTF8_BINARY", "σΥσΤΗΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", "UTF8_LCASE", "σισΤιΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", "UNICODE", "σΥσΤΗΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", "UNICODE_CI", "σισΤιΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UTF8_LCASE", "σισΤιΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UNICODE_CI", "σισΤιΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UTF8_LCASE", "σισΤιΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UNICODE_CI", "σισΤιΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UTF8_LCASE", "ςιςΤιΜΑΤΙΚΟς");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UNICODE_CI", "ςιςΤιΜΑΤΙΚΟς");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UTF8_BINARY", "ςΥςΤΗΜΑΤΙΚΟς");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UTF8_LCASE", "ςιςΤιΜΑΤΙΚΟς");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UNICODE", "ςΥςΤΗΜΑΤΙΚΟς");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UNICODE_CI", "ςιςΤιΜΑΤΙΚΟς");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UTF8_LCASE", "ςιςΤιΜΑΤΙΚΟς");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UNICODE_CI", "ςιςΤιΜΑΤΙΚΟς");
-    assertStringTranslate("συστηματικος", "Συη", "σιι", "UTF8_BINARY", "σιστιματικος");
-    assertStringTranslate("συστηματικος", "Συη", "σιι", "UTF8_LCASE", "σιστιματικοσ");
-    assertStringTranslate("συστηματικος", "Συη", "σιι", "UNICODE", "σιστιματικος");
-    assertStringTranslate("συστηματικος", "Συη", "σιι", "UNICODE_CI", "σιστιματικοσ");
-    assertStringTranslate("συστηματικος", "συη", "σιι", "UTF8_BINARY", "σιστιματικος");
-    assertStringTranslate("συστηματικος", "συη", "σιι", "UTF8_LCASE", "σιστιματικοσ");
-    assertStringTranslate("συστηματικος", "συη", "σιι", "UNICODE", "σιστιματικος");
-    assertStringTranslate("συστηματικος", "συη", "σιι", "UNICODE_CI", "σιστιματικοσ");
-    assertStringTranslate("συστηματικος", "ςυη", "σιι", "UTF8_BINARY", "σιστιματικοσ");
-    assertStringTranslate("συστηματικος", "ςυη", "σιι", "UTF8_LCASE", "σιστιματικοσ");
-    assertStringTranslate("συστηματικος", "ςυη", "σιι", "UNICODE", "σιστιματικοσ");
-    assertStringTranslate("συστηματικος", "ςυη", "σιι", "UNICODE_CI", "σιστιματικοσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", UTF8_BINARY, "σΥσΤΗΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", UTF8_LCASE, "σισΤιΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", UNICODE, "σΥσΤΗΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", UNICODE_CI, "σισΤιΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", UTF8_BINARY, "ΣΥΣΤΗΜΑΤΙΚΟΣ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", UTF8_LCASE, "σισΤιΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", UNICODE, "ΣΥΣΤΗΜΑΤΙΚΟΣ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", UNICODE_CI, "σισΤιΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", UTF8_BINARY, "ΣΥΣΤΗΜΑΤΙΚΟΣ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", UTF8_LCASE, "σισΤιΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", UNICODE, "ΣΥΣΤΗΜΑΤΙΚΟΣ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", UNICODE_CI, "σισΤιΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", UTF8_BINARY, "ΣΥΣΤΗΜΑΤΙΚΟΣ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", UTF8_LCASE, "ςιςΤιΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", UNICODE, "ΣΥΣΤΗΜΑΤΙΚΟΣ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", UNICODE_CI, "ςιςΤιΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", UTF8_BINARY, "ςΥςΤΗΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", UTF8_LCASE, "ςιςΤιΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", UNICODE, "ςΥςΤΗΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", UNICODE_CI, "ςιςΤιΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", UTF8_BINARY, "ΣΥΣΤΗΜΑΤΙΚΟΣ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", UTF8_LCASE, "ςιςΤιΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", UNICODE, "ΣΥΣΤΗΜΑΤΙΚΟΣ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", UNICODE_CI, "ςιςΤιΜΑΤΙΚΟς");
+    assertStringTranslate("συστηματικος", "Συη", "σιι", UTF8_BINARY, "σιστιματικος");
+    assertStringTranslate("συστηματικος", "Συη", "σιι", UTF8_LCASE, "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "Συη", "σιι", UNICODE, "σιστιματικος");
+    assertStringTranslate("συστηματικος", "Συη", "σιι", UNICODE_CI, "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "συη", "σιι", UTF8_BINARY, "σιστιματικος");
+    assertStringTranslate("συστηματικος", "συη", "σιι", UTF8_LCASE, "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "συη", "σιι", UNICODE, "σιστιματικος");
+    assertStringTranslate("συστηματικος", "συη", "σιι", UNICODE_CI, "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "ςυη", "σιι", UTF8_BINARY, "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "ςυη", "σιι", UTF8_LCASE, "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "ςυη", "σιι", UNICODE, "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "ςυη", "σιι", UNICODE_CI, "σιστιματικοσ");
     // Surrogate pairs.
-    assertStringTranslate("a🙃b🙃c", "a", "x", "UTF8_BINARY", "x🙃b🙃c");
-    assertStringTranslate("a🙃b🙃c", "a🙃", "xy", "UTF8_BINARY", "xybyc");
-    assertStringTranslate("a🙃b🙃c", "a🙃b", "xyz", "UTF8_BINARY", "xyzyc");
-    assertStringTranslate("a🙃b🙃c", "a🙃bc", "xyzw", "UTF8_BINARY", "xyzyw");
-    assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", "UTF8_BINARY", "😀😂😃😅");
-    assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", "UTF8_LCASE", "😀😂😃😅");
-    assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", "UNICODE", "😀😂😃😅");
-    assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", "UNICODE_CI", "😀😂😃😅");
-    assertStringTranslate("𐐅", "𐐅", "x", "UTF8_BINARY", "x");
-    assertStringTranslate("𐐅", "𐐅", "x", "UTF8_LCASE", "x");
-    assertStringTranslate("𐐅", "𐐅", "x", "UNICODE", "x");
-    assertStringTranslate("𐐅", "𐐅", "x", "UNICODE_CI", "x");
-    assertStringTranslate("𐐅", "𐐭", "x", "UTF8_BINARY", "𐐅");
-    assertStringTranslate("𐐅", "𐐭", "x", "UTF8_LCASE", "x");
-    assertStringTranslate("𐐅", "𐐭", "x", "UNICODE", "𐐅");
-    assertStringTranslate("𐐅", "𐐭", "x", "UNICODE_CI", "x");
-    assertStringTranslate("A", "A", "𐐅", "UTF8_BINARY", "𐐅");
-    assertStringTranslate("A", "A", "𐐅", "UTF8_LCASE", "𐐅");
-    assertStringTranslate("A", "A", "𐐅", "UNICODE", "𐐅");
-    assertStringTranslate("A", "A", "𐐅", "UNICODE_CI", "𐐅");
-    assertStringTranslate("A", "a", "𐐅", "UTF8_BINARY", "A");
-    assertStringTranslate("A", "a", "𐐅", "UTF8_LCASE", "𐐅");
-    assertStringTranslate("A", "a", "𐐅", "UNICODE", "A");
-    assertStringTranslate("A", "a", "𐐅", "UNICODE_CI", "𐐅");
-    assertStringTranslate("a", "A", "𐐅", "UTF8_BINARY", "a");
-    assertStringTranslate("a", "A", "𐐅", "UTF8_LCASE", "𐐅");
-    assertStringTranslate("a", "A", "𐐅", "UNICODE", "a");
-    assertStringTranslate("a", "A", "𐐅", "UNICODE_CI", "𐐅");
-    assertStringTranslate("𝔸", "𝔸", "x", "UTF8_BINARY", "x");
-    assertStringTranslate("𝔸", "𝔸", "x", "UTF8_LCASE", "x");
-    assertStringTranslate("𝔸", "𝔸", "x", "UNICODE", "x");
-    assertStringTranslate("𝔸", "𝔸", "x", "UNICODE_CI", "x");
-    assertStringTranslate("𝔸", "𝕒", "x", "UTF8_BINARY", "𝔸");
-    assertStringTranslate("𝔸", "𝕒", "x", "UTF8_LCASE", "𝔸");
-    assertStringTranslate("𝔸", "𝕒", "x", "UNICODE", "𝔸");
-    assertStringTranslate("𝔸", "𝕒", "x", "UNICODE_CI", "x");
+    assertStringTranslate("a🙃b🙃c", "a", "x", UTF8_BINARY, "x🙃b🙃c");
+    assertStringTranslate("a🙃b🙃c", "a🙃", "xy", UTF8_BINARY, "xybyc");
+    assertStringTranslate("a🙃b🙃c", "a🙃b", "xyz", UTF8_BINARY, "xyzyc");
+    assertStringTranslate("a🙃b🙃c", "a🙃bc", "xyzw", UTF8_BINARY, "xyzyw");
+    assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", UTF8_BINARY, "😀😂😃😅");
+    assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", UTF8_LCASE, "😀😂😃😅");
+    assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", UNICODE, "😀😂😃😅");
+    assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", UNICODE_CI, "😀😂😃😅");
+    assertStringTranslate("𐐅", "𐐅", "x", UTF8_BINARY, "x");
+    assertStringTranslate("𐐅", "𐐅", "x", UTF8_LCASE, "x");
+    assertStringTranslate("𐐅", "𐐅", "x", UNICODE, "x");
+    assertStringTranslate("𐐅", "𐐅", "x", UNICODE_CI, "x");
+    assertStringTranslate("𐐅", "𐐭", "x", UTF8_BINARY, "𐐅");
+    assertStringTranslate("𐐅", "𐐭", "x", UTF8_LCASE, "x");
+    assertStringTranslate("𐐅", "𐐭", "x", UNICODE, "𐐅");
+    assertStringTranslate("𐐅", "𐐭", "x", UNICODE_CI, "x");
+    assertStringTranslate("A", "A", "𐐅", UTF8_BINARY, "𐐅");
+    assertStringTranslate("A", "A", "𐐅", UTF8_LCASE, "𐐅");
+    assertStringTranslate("A", "A", "𐐅", UNICODE, "𐐅");
+    assertStringTranslate("A", "A", "𐐅", UNICODE_CI, "𐐅");
+    assertStringTranslate("A", "a", "𐐅", UTF8_BINARY, "A");
+    assertStringTranslate("A", "a", "𐐅", UTF8_LCASE, "𐐅");
+    assertStringTranslate("A", "a", "𐐅", UNICODE, "A");
+    assertStringTranslate("A", "a", "𐐅", UNICODE_CI, "𐐅");
+    assertStringTranslate("a", "A", "𐐅", UTF8_BINARY, "a");
+    assertStringTranslate("a", "A", "𐐅", UTF8_LCASE, "𐐅");
+    assertStringTranslate("a", "A", "𐐅", UNICODE, "a");
+    assertStringTranslate("a", "A", "𐐅", UNICODE_CI, "𐐅");
+    assertStringTranslate("𝔸", "𝔸", "x", UTF8_BINARY, "x");
+    assertStringTranslate("𝔸", "𝔸", "x", UTF8_LCASE, "x");
+    assertStringTranslate("𝔸", "𝔸", "x", UNICODE, "x");
+    assertStringTranslate("𝔸", "𝔸", "x", UNICODE_CI, "x");
+    assertStringTranslate("𝔸", "𝕒", "x", UTF8_BINARY, "𝔸");
+    assertStringTranslate("𝔸", "𝕒", "x", UTF8_LCASE, "𝔸");
+    assertStringTranslate("𝔸", "𝕒", "x", UNICODE, "𝔸");
+    assertStringTranslate("𝔸", "𝕒", "x", UNICODE_CI, "x");
   }
 
   private Map<String, String> buildDict(String matching, String replace) {
diff --git a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
index 90dddc2cb08c1..3482c6addfee3 100644
--- a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -22,6 +22,7 @@
 import java.nio.channels.ReadableByteChannel;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
+import java.nio.file.LinkOption;
 import java.nio.file.attribute.BasicFileAttributes;
 import java.util.*;
 import java.util.concurrent.TimeUnit;
@@ -125,10 +126,11 @@ public static void deleteRecursively(File file, FilenameFilter filter) throws IO
   private static void deleteRecursivelyUsingJavaIO(
       File file,
       FilenameFilter filter) throws IOException {
-    if (!file.exists()) return;
-    BasicFileAttributes fileAttributes =
-      Files.readAttributes(file.toPath(), BasicFileAttributes.class);
-    if (fileAttributes.isDirectory() && !isSymlink(file)) {
+    BasicFileAttributes fileAttributes = readFileAttributes(file);
+    // SPARK-50716: If the file attributes are null, that is, the file attributes cannot be read,
+    // or if the file does not exist and is not a broken symbolic link, then return directly.
+    if (fileAttributes == null || (!file.exists() && !fileAttributes.isSymbolicLink())) return;
+    if (fileAttributes.isDirectory()) {
       IOException savedIOException = null;
       for (File child : listFilesSafely(file, filter)) {
         try {
@@ -143,8 +145,8 @@ private static void deleteRecursivelyUsingJavaIO(
       }
     }
 
-    // Delete file only when it's a normal file or an empty directory.
-    if (fileAttributes.isRegularFile() ||
+    // Delete file only when it's a normal file, a symbolic link, or an empty directory.
+    if (fileAttributes.isRegularFile() || fileAttributes.isSymbolicLink() ||
       (fileAttributes.isDirectory() && listFilesSafely(file, null).length == 0)) {
       boolean deleted = file.delete();
       // Delete can also fail if the file simply did not exist.
@@ -154,6 +156,18 @@ private static void deleteRecursivelyUsingJavaIO(
     }
   }
 
+  /**
+   * Reads basic attributes of a given file, of return null if an I/O error occurs.
+   */
+  private static BasicFileAttributes readFileAttributes(File file) {
+    try {
+      return Files.readAttributes(
+        file.toPath(), BasicFileAttributes.class, LinkOption.NOFOLLOW_LINKS);
+    } catch (IOException e) {
+      return null;
+    }
+  }
+
   private static void deleteRecursivelyUsingUnixNative(File file) throws IOException {
     ProcessBuilder builder = new ProcessBuilder("rm", "-rf", file.getAbsolutePath());
     Process process = null;
@@ -192,17 +206,6 @@ private static File[] listFilesSafely(File file, FilenameFilter filter) throws I
     }
   }
 
-  private static boolean isSymlink(File file) throws IOException {
-    Objects.requireNonNull(file);
-    File fileInCanonicalDir = null;
-    if (file.getParent() == null) {
-      fileInCanonicalDir = file;
-    } else {
-      fileInCanonicalDir = new File(file.getParentFile().getCanonicalFile(), file.getName());
-    }
-    return !fileInCanonicalDir.getCanonicalFile().equals(fileInCanonicalDir.getAbsoluteFile());
-  }
-
   private static final Map<String, TimeUnit> timeSuffixes;
 
   private static final Map<String, ByteUnit> byteSuffixes;
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
index 94513cca1023f..44d69b6675937 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -70,6 +70,12 @@
     ],
     "sqlState" : "42000"
   },
+  "AMBIGUOUS_RESOLVER_EXTENSION" : {
+    "message" : [
+      "The single-pass analyzer cannot process this query or command because the extension choice for <operator> is ambiguous: <extensions>."
+    ],
+    "sqlState" : "XX000"
+  },
   "ARITHMETIC_OVERFLOW" : {
     "message" : [
       "<message>.<alternative> If necessary set <config> to \"false\" to bypass this error."
@@ -233,6 +239,11 @@
       "An error occurred during loading state."
     ],
     "subClass" : {
+      "CANNOT_FIND_BASE_SNAPSHOT_CHECKPOINT" : {
+        "message" : [
+          "Cannot find a base snapshot checkpoint with lineage: <lineage>."
+        ]
+      },
       "CANNOT_READ_CHECKPOINT" : {
         "message" : [
           "Cannot read RocksDB checkpoint metadata. Expected <expectedVersion>, but found <actualVersion>."
@@ -275,7 +286,7 @@
       },
       "INVALID_CHANGE_LOG_READER_VERSION" : {
         "message" : [
-          "The change log reader version cannot be <version>."
+          "The change log reader version cannot be <version>. The checkpoint probably is from a future Spark version, please upgrade your Spark."
         ]
       },
       "INVALID_CHANGE_LOG_WRITER_VERSION" : {
@@ -366,7 +377,7 @@
   },
   "CANNOT_PARSE_TIMESTAMP" : {
     "message" : [
-      "<message>. If necessary set <ansiConfig> to \"false\" to bypass this error."
+      "<message>. Use <func> to tolerate invalid input string and return NULL instead."
     ],
     "sqlState" : "22007"
   },
@@ -741,12 +752,36 @@
     },
     "sqlState" : "56K00"
   },
+  "CONNECT_ML" : {
+    "message" : [
+      "Generic Spark Connect ML error."
+    ],
+    "subClass" : {
+      "ATTRIBUTE_NOT_ALLOWED" : {
+        "message" : [
+          "<attribute> is not allowed to be accessed."
+        ]
+      },
+      "UNSUPPORTED_EXCEPTION" : {
+        "message" : [
+          "<message>"
+        ]
+      }
+    },
+    "sqlState" : "XX000"
+  },
   "CONVERSION_INVALID_INPUT" : {
     "message" : [
       "The value <str> (<fmt>) cannot be converted to <targetType> because it is malformed. Correct the value as per the syntax, or change its format. Use <suggestion> to tolerate malformed input and return NULL instead."
     ],
     "sqlState" : "22018"
   },
+  "CORRUPTED_CATALOG_FUNCTION" : {
+    "message" : [
+      "Cannot convert the catalog function '<identifier>' into a SQL function due to corrupted function information in catalog. If the function is not a SQL function, please make sure the class name '<className>' is loadable."
+    ],
+    "sqlState" : "0A000"
+  },
   "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS" : {
     "message" : [
       "Not allowed to create the permanent view <name> without explicitly assigning an alias for the expression <attr>."
@@ -1144,6 +1179,13 @@
     ],
     "sqlState" : "42623"
   },
+  "DESCRIBE_JSON_NOT_EXTENDED" : {
+    "message" : [
+      "DESCRIBE TABLE ... AS JSON only supported when [EXTENDED|FORMATTED] is specified.",
+      "For example: DESCRIBE EXTENDED <tableName> AS JSON is supported but DESCRIBE <tableName> AS JSON is not."
+    ],
+    "sqlState" : "0A000"
+  },
   "DISTINCT_WINDOW_FUNCTION_UNSUPPORTED" : {
     "message" : [
       "Distinct window functions are not supported: <windowExpr>."
@@ -1218,6 +1260,18 @@
     },
     "sqlState" : "4274K"
   },
+  "DUPLICATE_ROUTINE_PARAMETER_NAMES" : {
+    "message" : [
+      "Found duplicate name(s) in the parameter list of the user-defined routine <routineName>: <names>."
+    ],
+    "sqlState" : "42734"
+  },
+  "DUPLICATE_ROUTINE_RETURNS_COLUMNS" : {
+    "message" : [
+      "Found duplicate column(s) in the RETURNS clause column list of the user-defined routine <routineName>: <columns>."
+    ],
+    "sqlState" : "42711"
+  },
   "EMITTING_ROWS_OLDER_THAN_WATERMARK_NOT_ALLOWED" : {
     "message" : [
       "Previous node emitted a row with eventTime=<emittedRowEventTime> which is older than current_watermark_value=<currentWatermark>",
@@ -1476,6 +1530,11 @@
         "message" : [
           "Data type mismatches when reading Parquet column <column>. Expected Spark type <expectedType>, actual Parquet type <actualType>."
         ]
+      },
+      "UNSUPPORTED_FILE_SYSTEM" : {
+        "message" : [
+          "The file system <fileSystemClass> hasn't implemented <method>."
+        ]
       }
     },
     "sqlState" : "KD001"
@@ -1649,6 +1708,39 @@
     ],
     "sqlState" : "22000"
   },
+  "HYBRID_ANALYZER_EXCEPTION" : {
+    "message" : [
+      "An failure occurred when attempting to resolve a query or command with both the legacy fixed-point analyzer as well as the single-pass resolver."
+    ],
+    "subClass" : {
+      "FIXED_POINT_FAILED_SINGLE_PASS_SUCCEEDED" : {
+        "message" : [
+          "Fixed-point resolution failed, but single-pass resolution succeeded.",
+          "Single-pass analyzer output:",
+          "<singlePassOutput>"
+        ]
+      },
+      "LOGICAL_PLAN_COMPARISON_MISMATCH" : {
+        "message" : [
+          "Outputs of fixed-point and single-pass analyzers do not match.",
+          "Fixed-point analyzer output:",
+          "<fixedPointOutput>",
+          "Single-pass analyzer output:",
+          "<singlePassOutput>"
+        ]
+      },
+      "OUTPUT_SCHEMA_COMPARISON_MISMATCH" : {
+        "message" : [
+          "Output schemas of fixed-point and single-pass analyzers do not match.",
+          "Fixed-point analyzer output schema:",
+          "<fixedPointOutputSchema>",
+          "Single-pass analyzer output schema:",
+          "<singlePassOutputSchema>"
+        ]
+      }
+    },
+    "sqlState" : "XX000"
+  },
   "IDENTIFIER_TOO_MANY_NAME_PARTS" : {
     "message" : [
       "<identifier> is not a valid identifier as it has more than 2 name parts."
@@ -2082,13 +2174,13 @@
   },
   "INVALID_ARRAY_INDEX" : {
     "message" : [
-      "The index <indexValue> is out of bounds. The array has <arraySize> elements. Use the SQL function `get()` to tolerate accessing element at invalid index and return NULL instead. If necessary set <ansiConfig> to \"false\" to bypass this error."
+      "The index <indexValue> is out of bounds. The array has <arraySize> elements. Use the SQL function `get()` to tolerate accessing element at invalid index and return NULL instead."
     ],
     "sqlState" : "22003"
   },
   "INVALID_ARRAY_INDEX_IN_ELEMENT_AT" : {
     "message" : [
-      "The index <indexValue> is out of bounds. The array has <arraySize> elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set <ansiConfig> to \"false\" to bypass this error."
+      "The index <indexValue> is out of bounds. The array has <arraySize> elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead."
     ],
     "sqlState" : "22003"
   },
@@ -2627,29 +2719,6 @@
     ],
     "sqlState" : "22006"
   },
-  "INVALID_INVERSE_DISTRIBUTION_FUNCTION" : {
-    "message" : [
-      "Invalid inverse distribution function <funcName>."
-    ],
-    "subClass" : {
-      "DISTINCT_UNSUPPORTED" : {
-        "message" : [
-          "Cannot use DISTINCT with WITHIN GROUP."
-        ]
-      },
-      "WITHIN_GROUP_MISSING" : {
-        "message" : [
-          "WITHIN GROUP is required for inverse distribution function."
-        ]
-      },
-      "WRONG_NUM_ORDERINGS" : {
-        "message" : [
-          "Requires <expectedNum> orderings in WITHIN GROUP but got <actualNum>."
-        ]
-      }
-    },
-    "sqlState" : "42K0K"
-  },
   "INVALID_JAVA_IDENTIFIER_AS_FIELD_NAME" : {
     "message" : [
       "<fieldName> is not a valid identifier of Java and cannot be used as field name",
@@ -2713,6 +2782,11 @@
         "message" : [
           "ITERATE statement cannot be used with a label that belongs to a compound (BEGIN...END) body."
         ]
+      },
+      "QUALIFIED_LABEL_NAME" : {
+        "message" : [
+          "Label cannot be qualified."
+        ]
       }
     },
     "sqlState" : "42K0L"
@@ -2908,6 +2982,11 @@
           "Unsupported dtype: <invalidValue>. Valid values: float64, float32."
         ]
       },
+      "EXTENSION" : {
+        "message" : [
+          "Invalid extension: <invalidValue>. Extension is limited to exactly 3 letters (e.g. csv, tsv, etc...)"
+        ]
+      },
       "INTEGER" : {
         "message" : [
           "expects an integer literal, but got <invalidValue>."
@@ -3070,6 +3149,13 @@
     ],
     "sqlState" : "42K08"
   },
+  "INVALID_SQL_FUNCTION_PLAN_STRUCTURE" : {
+    "message" : [
+      "Invalid SQL function plan structure",
+      "<plan>"
+    ],
+    "sqlState" : "XXKD0"
+  },
   "INVALID_SQL_SYNTAX" : {
     "message" : [
       "Invalid SQL syntax:"
@@ -3364,6 +3450,34 @@
     ],
     "sqlState" : "42601"
   },
+  "INVALID_WITHIN_GROUP_EXPRESSION" : {
+    "message" : [
+      "Invalid function <funcName> with WITHIN GROUP."
+    ],
+    "subClass" : {
+      "DISTINCT_UNSUPPORTED" : {
+        "message" : [
+          "The function does not support DISTINCT with WITHIN GROUP."
+        ]
+      },
+      "MISMATCH_WITH_DISTINCT_INPUT" : {
+        "message" : [
+          "The function is invoked with DISTINCT and WITHIN GROUP but expressions <funcArg> and <orderingExpr> do not match. The WITHIN GROUP ordering expression must be picked from the function inputs."
+        ]
+      },
+      "WITHIN_GROUP_MISSING" : {
+        "message" : [
+          "WITHIN GROUP is required for the function."
+        ]
+      },
+      "WRONG_NUM_ORDERINGS" : {
+        "message" : [
+          "The function requires <expectedNum> orderings in WITHIN GROUP but got <actualNum>."
+        ]
+      }
+    },
+    "sqlState" : "42K0K"
+  },
   "INVALID_WRITER_COMMIT_MESSAGE" : {
     "message" : [
       "The data source writer has generated an invalid number of commit messages. Expected exactly one writer commit message from each task, but received <detail>."
@@ -3564,6 +3678,12 @@
     ],
     "sqlState" : "42710"
   },
+  "MULTI_ALIAS_WITHOUT_GENERATOR" : {
+    "message" : [
+      "Multi part aliasing (<names>) is not supported with <expr> as it is not a generator function."
+    ],
+    "sqlState" : "42K0E"
+  },
   "MULTI_SOURCES_UNSUPPORTED_FOR_EXPRESSION" : {
     "message" : [
       "The expression <expr> does not support more than one source."
@@ -4086,6 +4206,18 @@
     ],
     "sqlState" : "38000"
   },
+  "RECURSIVE_CTE_IN_LEGACY_MODE" : {
+    "message" : [
+      "Recursive definitions cannot be used in legacy CTE precedence mode (spark.sql.legacy.ctePrecedencePolicy=LEGACY)."
+    ],
+    "sqlState" : "42836"
+  },
+  "RECURSIVE_CTE_WHEN_INLINING_IS_FORCED" : {
+    "message" : [
+      "Recursive definitions cannot be used when CTE inlining is forced."
+    ],
+    "sqlState" : "42836"
+  },
   "RECURSIVE_PROTOBUF_SCHEMA" : {
     "message" : [
       "Found recursive reference in Protobuf schema, which can not be processed by Spark by default: <fieldDescriptor>. try setting the option `recursive.fields.max.depth` 1 to 10. Going beyond 10 levels of recursion is not allowed."
@@ -4623,6 +4755,12 @@
     ],
     "sqlState" : "42P01"
   },
+  "TABLE_VALUED_ARGUMENTS_NOT_YET_IMPLEMENTED_FOR_SQL_FUNCTIONS" : {
+    "message" : [
+      "Cannot <action> SQL user-defined function <functionName> with TABLE arguments because this functionality is not yet implemented."
+    ],
+    "sqlState" : "0A000"
+  },
   "TABLE_VALUED_FUNCTION_FAILED_TO_ANALYZE_IN_PYTHON" : {
     "message" : [
       "Failed to analyze the Python user defined table function: <msg>"
@@ -4748,12 +4886,6 @@
     ],
     "sqlState" : "42KD9"
   },
-  "UNANALYZABLE_EXPRESSION" : {
-    "message" : [
-      "The plan contains an unanalyzable expression <expr> that holds the analysis."
-    ],
-    "sqlState" : "03000"
-  },
   "UNBOUND_SQL_PARAMETER" : {
     "message" : [
       "Found the unbound parameter: <name>. Please, fix `args` and provide a mapping of the parameter to either a SQL literal or collection constructor functions such as `map()`, `array()`, `struct()`."
@@ -5036,11 +5168,6 @@
         "message" : [
           "Access to the SparkContext."
         ]
-      },
-      "SESSION_SQL_CONTEXT" : {
-        "message" : [
-          "Access to the SparkSession SQL Context."
-        ]
       }
     },
     "sqlState" : "0A000"
@@ -5189,6 +5316,11 @@
           "The SQL pipe operator syntax using |> does not support <clauses>."
         ]
       },
+      "COLLATIONS_IN_MAP_KEYS" : {
+        "message" : [
+          "Collated strings for keys of maps"
+        ]
+      },
       "COMBINATION_QUERY_RESULT_CLAUSES" : {
         "message" : [
           "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY."
@@ -5199,6 +5331,11 @@
           "Attach a comment to the namespace <namespace>."
         ]
       },
+      "DESC_TABLE_COLUMN_JSON" : {
+        "message" : [
+          "DESC TABLE COLUMN AS JSON not supported for individual columns."
+        ]
+      },
       "DESC_TABLE_COLUMN_PARTITION" : {
         "message" : [
           "DESC TABLE COLUMN for a specific partition."
@@ -5244,6 +5381,11 @@
           "Referencing lateral column alias <lca> in the aggregate query both with window expressions and with having clause. Please rewrite the aggregate query by removing the having clause or removing lateral alias reference in the SELECT list."
         ]
       },
+      "LATERAL_COLUMN_ALIAS_IN_GENERATOR" : {
+        "message" : [
+          "Referencing a lateral column alias <lca> in generator expression <generatorExpr>."
+        ]
+      },
       "LATERAL_COLUMN_ALIAS_IN_GROUP_BY" : {
         "message" : [
           "Referencing a lateral column alias via GROUP BY alias/ALL is not supported yet."
@@ -5274,6 +5416,11 @@
           "The target JDBC server hosting table <tableName> does not support ALTER TABLE with multiple actions. Split the ALTER TABLE up into individual actions to avoid this error."
         ]
       },
+      "OBJECT_LEVEL_COLLATIONS" : {
+        "message" : [
+          "Default collation for the specified object."
+        ]
+      },
       "ORC_TYPE_CAST" : {
         "message" : [
           "Unable to convert <orcType> of Orc to data type <toType>."
@@ -5294,6 +5441,11 @@
           "Parameter markers are not allowed in <statement>."
         ]
       },
+      "PARTITION_BY_VARIANT" : {
+        "message" : [
+          "Cannot use VARIANT producing expressions to partition a DataFrame, but the type of expression <expr> is <dataType>."
+        ]
+      },
       "PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED" : {
         "message" : [
           "Invalid partitioning: <cols> is missing or is in a map or array."
@@ -5333,12 +5485,12 @@
         "message" : [
           "Queries from raw JSON/CSV/XML files are disallowed when the",
           "referenced columns only include the internal corrupt record column",
-          "(named _corrupt_record by default). For example:",
-          "spark.read.schema(schema).json(file).filter($\"_corrupt_record\".isNotNull).count()",
-          "and spark.read.schema(schema).json(file).select(\"_corrupt_record\").show().",
+          "(named `_corrupt_record` by default). For example:",
+          "`spark.read.schema(schema).json(file).filter($\"_corrupt_record\".isNotNull).count()`",
+          "and `spark.read.schema(schema).json(file).select(\"_corrupt_record\").show()`.",
           "Instead, you can cache or save the parsed results and then send the same query.",
-          "For example, val df = spark.read.schema(schema).json(file).cache() and then",
-          "df.filter($\"_corrupt_record\".isNotNull).count()."
+          "For example, `val df = spark.read.schema(schema).json(file).cache()` and then",
+          "`df.filter($\"_corrupt_record\".isNotNull).count()`."
         ]
       },
       "REMOVE_NAMESPACE_COMMENT" : {
@@ -5361,6 +5513,11 @@
           "Cannot have MAP type columns in DataFrame which calls set operations (INTERSECT, EXCEPT, etc.), but the type of column <colName> is <dataType>."
         ]
       },
+      "SET_OPERATION_ON_VARIANT_TYPE" : {
+        "message" : [
+          "Cannot have VARIANT type columns in DataFrame which calls set operations (INTERSECT, EXCEPT, etc.), but the type of column <colName> is <dataType>."
+        ]
+      },
       "SET_PROPERTIES_AND_DBPROPERTIES" : {
         "message" : [
           "set PROPERTIES and DBPROPERTIES at the same time."
@@ -5381,6 +5538,11 @@
           "SQL Scripting is under development and not all features are supported. SQL Scripting enables users to write procedural SQL including control flow and error handling. To enable existing features set <sqlScriptingEnabled> to `true`."
         ]
       },
+      "SQL_SCRIPTING_WITH_POSITIONAL_PARAMETERS" : {
+        "message" : [
+          "Positional parameters are not supported with SQL Scripting."
+        ]
+      },
       "STATE_STORE_MULTIPLE_COLUMN_FAMILIES" : {
         "message" : [
           "Creating multiple column families with <stateStoreProvider> is not supported."
@@ -5619,6 +5781,18 @@
     },
     "sqlState" : "0A000"
   },
+  "UNSUPPORTED_SINGLE_PASS_ANALYZER_FEATURE" : {
+    "message" : [
+      "The single-pass analyzer cannot process this query or command because it does not yet support <feature>."
+    ],
+    "sqlState" : "0A000"
+  },
+  "UNSUPPORTED_SQL_UDF_USAGE" : {
+    "message" : [
+      "Using SQL function <functionName> in <nodeName> is not supported."
+    ],
+    "sqlState" : "0A000"
+  },
   "UNSUPPORTED_STREAMING_OPERATOR_WITHOUT_WATERMARK" : {
     "message" : [
       "<outputMode> output mode not supported for <statefulOperator> on streaming DataFrames/DataSets without watermark."
@@ -5735,6 +5909,59 @@
     ],
     "sqlState" : "42K0E"
   },
+  "USER_DEFINED_FUNCTIONS" : {
+    "message" : [
+      "User defined function is invalid:"
+    ],
+    "subClass" : {
+      "CANNOT_CONTAIN_COMPLEX_FUNCTIONS" : {
+        "message" : [
+          "SQL scalar function cannot contain aggregate/window/generate functions: <queryText>"
+        ]
+      },
+      "CANNOT_REPLACE_NON_SQL_UDF_WITH_SQL_UDF" : {
+        "message" : [
+          "Cannot replace the non-SQL function <name> with a SQL function."
+        ]
+      },
+      "NOT_A_VALID_DEFAULT_EXPRESSION" : {
+        "message" : [
+          "The DEFAULT expression of `<functionName>`.`<parameterName>` is not supported because it contains a subquery."
+        ]
+      },
+      "NOT_A_VALID_DEFAULT_PARAMETER_POSITION" : {
+        "message" : [
+          "In routine `<functionName>` parameter `<parameterName>` with DEFAULT must not be followed by parameter `<nextParameterName>` without DEFAULT."
+        ]
+      },
+      "NOT_NULL_ON_FUNCTION_PARAMETERS" : {
+        "message" : [
+          "Cannot specify NOT NULL on function parameters: <input>"
+        ]
+      },
+      "RETURN_COLUMN_COUNT_MISMATCH" : {
+        "message" : [
+          "The number of columns produced by the RETURN clause (num: `<outputSize>`) does not match the number of column names specified by the RETURNS clause (num: `<returnParamSize>`) of <name>."
+        ]
+      },
+      "ROUTINE_PROPERTY_TOO_LARGE" : {
+        "message" : [
+          "Cannot convert user defined routine <name> to catalog function: routine properties are too large."
+        ]
+      },
+      "SQL_TABLE_UDF_BODY_MUST_BE_A_QUERY" : {
+        "message" : [
+          "SQL table function <name> body must be a query."
+        ]
+      },
+      "SQL_TABLE_UDF_MISSING_COLUMN_NAMES" : {
+        "message" : [
+          "The relation returned by the query in the CREATE FUNCTION statement for <functionName> with RETURNS TABLE clause lacks explicit names for one or more output columns; please rewrite the function body to provide explicit column names or add column names to the RETURNS TABLE clause, and re-run the command."
+        ]
+      }
+    },
+    "sqlState" : "42601"
+  },
   "USER_RAISED_EXCEPTION" : {
     "message" : [
       "<errorMessage>"
@@ -5855,7 +6082,7 @@
   },
   "XML_ROW_TAG_MISSING" : {
     "message" : [
-      "<rowTag> option is required for reading files in XML format."
+      "<rowTag> option is required for reading/writing files in XML format."
     ],
     "sqlState" : "42KDF"
   },
@@ -6655,11 +6882,6 @@
       "Decimal scale (<scale>) cannot be greater than precision (<precision>)."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1231" : {
-    "message" : [
-      "<key> is not a valid partition column in table <tblName>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1232" : {
     "message" : [
       "Partition spec is invalid. The spec (<specKeys>) must match the partition spec (<partitionColumnNames>) defined in table '<tableName>'."
diff --git a/common/utils/src/main/resources/org/apache/spark/log4j2-defaults.properties b/common/utils/src/main/resources/org/apache/spark/log4j2-defaults.properties
index 9be86b650d091..777c5f2b25915 100644
--- a/common/utils/src/main/resources/org/apache/spark/log4j2-defaults.properties
+++ b/common/utils/src/main/resources/org/apache/spark/log4j2-defaults.properties
@@ -22,8 +22,8 @@ rootLogger.appenderRef.stdout.ref = console
 appender.console.type = Console
 appender.console.name = console
 appender.console.target = SYSTEM_ERR
-appender.console.layout.type = JsonTemplateLayout
-appender.console.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
 
 # Settings to quiet third party logs that are too verbose
 logger.jetty.name = org.sparkproject.jetty
diff --git a/common/utils/src/main/resources/org/apache/spark/log4j2-pattern-layout-defaults.properties b/common/utils/src/main/resources/org/apache/spark/log4j2-json-layout.properties
similarity index 94%
rename from common/utils/src/main/resources/org/apache/spark/log4j2-pattern-layout-defaults.properties
rename to common/utils/src/main/resources/org/apache/spark/log4j2-json-layout.properties
index 777c5f2b25915..9be86b650d091 100644
--- a/common/utils/src/main/resources/org/apache/spark/log4j2-pattern-layout-defaults.properties
+++ b/common/utils/src/main/resources/org/apache/spark/log4j2-json-layout.properties
@@ -22,8 +22,8 @@ rootLogger.appenderRef.stdout.ref = console
 appender.console.type = Console
 appender.console.name = console
 appender.console.target = SYSTEM_ERR
-appender.console.layout.type = PatternLayout
-appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
+appender.console.layout.type = JsonTemplateLayout
+appender.console.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json
 
 # Settings to quiet third party logs that are too verbose
 logger.jetty.name = org.sparkproject.jetty
diff --git a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
index c365797cec690..c3a1af68d1c82 100644
--- a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
@@ -94,6 +94,7 @@ private[spark] object LogKeys {
   case object BATCH_TIMESTAMP extends LogKey
   case object BATCH_WRITE extends LogKey
   case object BIND_ADDRESS extends LogKey
+  case object BLOCK_GENERATOR_STATUS extends LogKey
   case object BLOCK_ID extends LogKey
   case object BLOCK_IDS extends LogKey
   case object BLOCK_MANAGER_ID extends LogKey
@@ -241,6 +242,8 @@ private[spark] object LogKeys {
   case object EXECUTOR_ID extends LogKey
   case object EXECUTOR_IDS extends LogKey
   case object EXECUTOR_LAUNCH_COMMANDS extends LogKey
+  case object EXECUTOR_MEMORY_OFFHEAP extends LogKey
+  case object EXECUTOR_MEMORY_OVERHEAD_SIZE extends LogKey
   case object EXECUTOR_MEMORY_SIZE extends LogKey
   case object EXECUTOR_RESOURCES extends LogKey
   case object EXECUTOR_SHUFFLE_INFO extends LogKey
@@ -348,9 +351,12 @@ private[spark] object LogKeys {
   case object KEYTAB extends LogKey
   case object KEYTAB_FILE extends LogKey
   case object KILL_EXECUTORS extends LogKey
+  case object KINESIS_REASON extends LogKey
   case object LABEL_COLUMN extends LogKey
   case object LARGEST_CLUSTER_INDEX extends LogKey
   case object LAST_ACCESS_TIME extends LogKey
+  case object LAST_COMMITTED_CHECKPOINT_ID extends LogKey
+  case object LAST_COMMIT_BASED_CHECKPOINT_ID extends LogKey
   case object LAST_VALID_TIME extends LogKey
   case object LATEST_BATCH_ID extends LogKey
   case object LATEST_COMMITTED_BATCH_ID extends LogKey
@@ -359,8 +365,10 @@ private[spark] object LogKeys {
   case object LEFT_EXPR extends LogKey
   case object LEFT_LOGICAL_PLAN_STATS_SIZE_IN_BYTES extends LogKey
   case object LINE extends LogKey
+  case object LINEAGE extends LogKey
   case object LINE_NUM extends LogKey
   case object LISTENER extends LogKey
+  case object LOADED_CHECKPOINT_ID extends LogKey
   case object LOADED_VERSION extends LogKey
   case object LOAD_FACTOR extends LogKey
   case object LOAD_TIME extends LogKey
@@ -542,7 +550,7 @@ private[spark] object LogKeys {
   case object NUM_RULE_OF_RUNS extends LogKey
   case object NUM_SEQUENCES extends LogKey
   case object NUM_SLOTS extends LogKey
-  case object NUM_SPILL_INFOS extends LogKey
+  case object NUM_SPILLS extends LogKey
   case object NUM_SPILL_WRITERS extends LogKey
   case object NUM_SUB_DIRS extends LogKey
   case object NUM_SUCCESSFUL_TASKS extends LogKey
@@ -695,6 +703,7 @@ private[spark] object LogKeys {
   case object RULE_EXECUTOR_NAME extends LogKey
   case object RULE_NAME extends LogKey
   case object RUN_ID extends LogKey
+  case object RUN_ID_STRING extends LogKey
   case object SCALA_VERSION extends LogKey
   case object SCALING_DOWN_RATIO extends LogKey
   case object SCALING_UP_RATIO extends LogKey
@@ -717,6 +726,7 @@ private[spark] object LogKeys {
   case object SHUFFLE_DB_BACKEND_KEY extends LogKey
   case object SHUFFLE_DB_BACKEND_NAME extends LogKey
   case object SHUFFLE_ID extends LogKey
+  case object SHUFFLE_IDS extends LogKey
   case object SHUFFLE_MERGE_ID extends LogKey
   case object SHUFFLE_MERGE_RECOVERY_FILE extends LogKey
   case object SHUFFLE_SERVICE_CONF_OVERLAY_URL extends LogKey
@@ -747,14 +757,18 @@ private[spark] object LogKeys {
   case object STAGE extends LogKey
   case object STAGES extends LogKey
   case object STAGE_ATTEMPT extends LogKey
+  case object STAGE_ATTEMPT_ID extends LogKey
   case object STAGE_ID extends LogKey
   case object STAGE_NAME extends LogKey
   case object START_INDEX extends LogKey
   case object START_TIME extends LogKey
   case object STATEMENT_ID extends LogKey
   case object STATE_NAME extends LogKey
+  case object STATE_STORE_COORDINATOR extends LogKey
   case object STATE_STORE_ID extends LogKey
   case object STATE_STORE_PROVIDER extends LogKey
+  case object STATE_STORE_PROVIDER_ID extends LogKey
+  case object STATE_STORE_PROVIDER_IDS extends LogKey
   case object STATE_STORE_VERSION extends LogKey
   case object STATS extends LogKey
   case object STATUS extends LogKey
@@ -878,6 +892,7 @@ private[spark] object LogKeys {
   case object WRITE_JOB_UUID extends LogKey
   case object XML_SCHEDULING_MODE extends LogKey
   case object XSD_PATH extends LogKey
+  case object YARN_RESOURCE extends LogKey
   case object YOUNG_GENERATION_GC extends LogKey
   case object ZERO_TIME extends LogKey
 }
diff --git a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
index 7471b764bd2b3..4b60cb20f0732 100644
--- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -337,9 +337,9 @@ trait Logging {
       if (Logging.defaultSparkLog4jConfig || Logging.islog4j2DefaultConfigured()) {
         Logging.defaultSparkLog4jConfig = true
         val defaultLogProps = if (Logging.isStructuredLoggingEnabled) {
-          "org/apache/spark/log4j2-defaults.properties"
+          "org/apache/spark/log4j2-json-layout.properties"
         } else {
-          "org/apache/spark/log4j2-pattern-layout-defaults.properties"
+          "org/apache/spark/log4j2-defaults.properties"
         }
         Option(SparkClassUtils.getSparkClassLoader.getResource(defaultLogProps)) match {
           case Some(url) =>
@@ -398,7 +398,7 @@ private[spark] object Logging {
   @volatile private var initialized = false
   @volatile private var defaultRootLevel: Level = null
   @volatile private var defaultSparkLog4jConfig = false
-  @volatile private var structuredLoggingEnabled = true
+  @volatile private var structuredLoggingEnabled = false
   @volatile private[spark] var sparkShellThresholdLevel: Level = null
   @volatile private[spark] var setLogLevelPrinted: Boolean = false
 
diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala b/common/utils/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
similarity index 99%
rename from core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
rename to common/utils/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
index f50cc0f88842a..d3e975d1782f0 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
@@ -24,7 +24,7 @@ import scala.util.matching.Regex
 
 import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.network.util.{ByteUnit, JavaUtils}
-import org.apache.spark.util.Utils
+import org.apache.spark.util.SparkStringUtils
 
 private object ConfigHelpers {
 
@@ -47,7 +47,7 @@ private object ConfigHelpers {
   }
 
   def stringToSeq[T](str: String, converter: String => T): Seq[T] = {
-    Utils.stringToSeq(str).map(converter)
+    SparkStringUtils.stringToSeq(str).map(converter)
   }
 
   def seqToString[T](v: Seq[T], stringConverter: T => String): String = {
diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala b/common/utils/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala
similarity index 100%
rename from core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala
rename to common/utils/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala
diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigProvider.scala b/common/utils/src/main/scala/org/apache/spark/internal/config/ConfigProvider.scala
similarity index 78%
rename from core/src/main/scala/org/apache/spark/internal/config/ConfigProvider.scala
rename to common/utils/src/main/scala/org/apache/spark/internal/config/ConfigProvider.scala
index 392f9d56e7f51..fef019ef1f560 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/ConfigProvider.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/config/ConfigProvider.scala
@@ -19,8 +19,6 @@ package org.apache.spark.internal.config
 
 import java.util.{Map => JMap}
 
-import org.apache.spark.SparkConf
-
 /**
  * A source of configuration values.
  */
@@ -47,18 +45,3 @@ private[spark] class MapProvider(conf: JMap[String, String]) extends ConfigProvi
   override def get(key: String): Option[String] = Option(conf.get(key))
 
 }
-
-/**
- * A config provider that only reads Spark config keys.
- */
-private[spark] class SparkConfigProvider(conf: JMap[String, String]) extends ConfigProvider {
-
-  override def get(key: String): Option[String] = {
-    if (key.startsWith("spark.")) {
-      Option(conf.get(key)).orElse(SparkConf.getDeprecatedConfig(key, conf))
-    } else {
-      None
-    }
-  }
-
-}
diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala b/common/utils/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala
similarity index 100%
rename from core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala
rename to common/utils/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala
diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkStringUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkStringUtils.scala
new file mode 100644
index 0000000000000..6915f373b84e5
--- /dev/null
+++ b/common/utils/src/main/scala/org/apache/spark/util/SparkStringUtils.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+trait SparkStringUtils {
+  def stringToSeq(str: String): Seq[String] = {
+    import org.apache.spark.util.ArrayImplicits._
+    str.split(",").map(_.trim()).filter(_.nonEmpty).toImmutableArraySeq
+  }
+}
+
+object SparkStringUtils extends SparkStringUtils
diff --git a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java b/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
index 6959fe11820ff..1fab167adfeb0 100644
--- a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
+++ b/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
@@ -21,11 +21,27 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import org.apache.logging.log4j.Level;
 
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+
+import org.apache.spark.internal.Logging$;
 import org.apache.spark.internal.SparkLogger;
 import org.apache.spark.internal.SparkLoggerFactory;
 
 public class StructuredSparkLoggerSuite extends SparkLoggerSuiteBase {
 
+  // Enable Structured Logging before running the tests
+  @BeforeAll
+  public static void setup() {
+    Logging$.MODULE$.enableStructuredLogging();
+  }
+
+  // Disable Structured Logging after running the tests
+  @AfterAll
+  public static void teardown() {
+    Logging$.MODULE$.disableStructuredLogging();
+  }
+
   private static final SparkLogger LOGGER =
     SparkLoggerFactory.getLogger(StructuredSparkLoggerSuite.class);
 
diff --git a/common/utils/src/test/scala/org/apache/spark/util/MDCSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/MDCSuite.scala
index 7631c25662219..9615eb2263636 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/MDCSuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/MDCSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.util
 
 import scala.jdk.CollectionConverters._
 
+import org.scalatest.BeforeAndAfterAll
 import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
 
 import org.apache.spark.internal.{Logging, MDC}
@@ -26,7 +27,16 @@ import org.apache.spark.internal.LogKeys.{EXIT_CODE, OFFSET, RANGE}
 
 class MDCSuite
     extends AnyFunSuite // scalastyle:ignore funsuite
-    with Logging {
+    with Logging
+    with BeforeAndAfterAll {
+
+  override def beforeAll(): Unit = {
+    Logging.enableStructuredLogging()
+  }
+
+  override def afterAll(): Unit = {
+    Logging.disableStructuredLogging()
+  }
 
   test("check MDC message") {
     val log = log"This is a log, exitcode ${MDC(EXIT_CODE, 10086)}"
diff --git a/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala
index 2ba2b15c49f33..248136798b362 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala
@@ -17,19 +17,16 @@
 package org.apache.spark.util
 
 import org.apache.logging.log4j.Level
-import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.internal.Logging
 
-class PatternLoggingSuite extends LoggingSuiteBase with BeforeAndAfterAll {
+class PatternLoggingSuite extends LoggingSuiteBase {
 
   override def className: String = classOf[PatternLoggingSuite].getSimpleName
   override def logFilePath: String = "target/pattern.log"
 
   override def beforeAll(): Unit = Logging.disableStructuredLogging()
 
-  override def afterAll(): Unit = Logging.enableStructuredLogging()
-
   override def expectedPatternForBasicMsg(level: Level): String = {
     s""".*$level $className: This is a log message\n"""
   }
diff --git a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
index 48951c2084f17..0026b696f0695 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
@@ -23,14 +23,21 @@ import java.nio.file.Files
 import com.fasterxml.jackson.databind.ObjectMapper
 import com.fasterxml.jackson.module.scala.DefaultScalaModule
 import org.apache.logging.log4j.Level
+import org.scalatest.BeforeAndAfterAll
 import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
 
 import org.apache.spark.internal.{LogEntry, Logging, LogKey, LogKeys, MDC, MessageWithContext}
 
 trait LoggingSuiteBase
     extends AnyFunSuite // scalastyle:ignore funsuite
+    with BeforeAndAfterAll
     with Logging {
 
+  override def afterAll(): Unit = {
+    super.afterAll()
+    Logging.disableStructuredLogging()
+  }
+
   def className: String
   def logFilePath: String
 
@@ -202,7 +209,7 @@ trait LoggingSuiteBase
       }
   }
 
-  private val customLog = log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message.")}"
+  private lazy val customLog = log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message.")}"
   test("Logging with custom LogKey") {
     Seq(
       (Level.ERROR, () => logError(customLog)),
@@ -265,6 +272,13 @@ class StructuredLoggingSuite extends LoggingSuiteBase {
   override def className: String = classOf[StructuredLoggingSuite].getSimpleName
   override def logFilePath: String = "target/structured.log"
 
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    Logging.enableStructuredLogging()
+  }
+
+  override def afterAll(): Unit = super.afterAll()
+
   private val jsonMapper = new ObjectMapper().registerModule(DefaultScalaModule)
   private def compactAndToRegexPattern(json: String): String = {
     jsonMapper.readTree(json).toString.
diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/ShreddingUtils.java b/common/variant/src/main/java/org/apache/spark/types/variant/ShreddingUtils.java
new file mode 100644
index 0000000000000..6a04bf9a2b259
--- /dev/null
+++ b/common/variant/src/main/java/org/apache/spark/types/variant/ShreddingUtils.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.types.variant;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+
+import static org.apache.spark.types.variant.VariantUtil.*;
+
+public class ShreddingUtils {
+  // Interface to read from a shredded result. It essentially has the same interface and semantics
+  // as Spark's `SpecializedGetters`, but we need a new interface to avoid the dependency.
+  public interface ShreddedRow {
+    boolean isNullAt(int ordinal);
+    boolean getBoolean(int ordinal);
+    byte getByte(int ordinal);
+    short getShort(int ordinal);
+    int getInt(int ordinal);
+    long getLong(int ordinal);
+    float getFloat(int ordinal);
+    double getDouble(int ordinal);
+    BigDecimal getDecimal(int ordinal, int precision, int scale);
+    String getString(int ordinal);
+    byte[] getBinary(int ordinal);
+    ShreddedRow getStruct(int ordinal, int numFields);
+    ShreddedRow getArray(int ordinal);
+    int numElements();
+  }
+
+  // This `rebuild` function should only be called on the top-level schema, and that other private
+  // implementation will be called on any recursively shredded sub-schema.
+  public static Variant rebuild(ShreddedRow row, VariantSchema schema) {
+    if (schema.topLevelMetadataIdx < 0 || row.isNullAt(schema.topLevelMetadataIdx)) {
+      throw malformedVariant();
+    }
+    byte[] metadata = row.getBinary(schema.topLevelMetadataIdx);
+    if (schema.isUnshredded()) {
+      // `rebuild` is unnecessary for unshredded variant.
+      if (row.isNullAt(schema.variantIdx)) {
+        throw malformedVariant();
+      }
+      return new Variant(row.getBinary(schema.variantIdx), metadata);
+    }
+    VariantBuilder builder = new VariantBuilder(false);
+    rebuild(row, metadata, schema, builder);
+    return builder.result();
+  }
+
+  // Rebuild a variant value from the shredded data according to the reconstruction algorithm in
+  // https://github.com/apache/parquet-format/blob/master/VariantShredding.md.
+  // Append the result to `builder`.
+  public static void rebuild(ShreddedRow row, byte[] metadata, VariantSchema schema,
+                             VariantBuilder builder) {
+    int typedIdx = schema.typedIdx;
+    int variantIdx = schema.variantIdx;
+    if (typedIdx >= 0 && !row.isNullAt(typedIdx)) {
+      if (schema.scalarSchema != null) {
+        VariantSchema.ScalarType scalar = schema.scalarSchema;
+        if (scalar instanceof VariantSchema.StringType) {
+          builder.appendString(row.getString(typedIdx));
+        } else if (scalar instanceof VariantSchema.IntegralType) {
+          VariantSchema.IntegralType it = (VariantSchema.IntegralType) scalar;
+          long value = 0;
+          switch (it.size) {
+            case BYTE:
+              value = row.getByte(typedIdx);
+              break;
+            case SHORT:
+              value = row.getShort(typedIdx);
+              break;
+            case INT:
+              value = row.getInt(typedIdx);
+              break;
+            case LONG:
+              value = row.getLong(typedIdx);
+              break;
+          }
+          builder.appendLong(value);
+        } else if (scalar instanceof VariantSchema.FloatType) {
+          builder.appendFloat(row.getFloat(typedIdx));
+        } else if (scalar instanceof VariantSchema.DoubleType) {
+          builder.appendDouble(row.getDouble(typedIdx));
+        } else if (scalar instanceof VariantSchema.BooleanType) {
+          builder.appendBoolean(row.getBoolean(typedIdx));
+        } else if (scalar instanceof VariantSchema.BinaryType) {
+          builder.appendBinary(row.getBinary(typedIdx));
+        } else if (scalar instanceof VariantSchema.DecimalType) {
+          VariantSchema.DecimalType dt = (VariantSchema.DecimalType) scalar;
+          builder.appendDecimal(row.getDecimal(typedIdx, dt.precision, dt.scale));
+        } else if (scalar instanceof VariantSchema.DateType) {
+          builder.appendDate(row.getInt(typedIdx));
+        } else if (scalar instanceof VariantSchema.TimestampType) {
+          builder.appendTimestamp(row.getLong(typedIdx));
+        } else {
+          assert scalar instanceof VariantSchema.TimestampNTZType;
+          builder.appendTimestampNtz(row.getLong(typedIdx));
+        }
+      } else if (schema.arraySchema != null) {
+        VariantSchema elementSchema = schema.arraySchema;
+        ShreddedRow array = row.getArray(typedIdx);
+        int start = builder.getWritePos();
+        ArrayList<Integer> offsets = new ArrayList<>(array.numElements());
+        for (int i = 0; i < array.numElements(); i++) {
+          offsets.add(builder.getWritePos() - start);
+          rebuild(array.getStruct(i, elementSchema.numFields), metadata, elementSchema, builder);
+        }
+        builder.finishWritingArray(start, offsets);
+      } else {
+        ShreddedRow object = row.getStruct(typedIdx, schema.objectSchema.length);
+        ArrayList<VariantBuilder.FieldEntry> fields = new ArrayList<>();
+        int start = builder.getWritePos();
+        for (int fieldIdx = 0; fieldIdx < schema.objectSchema.length; ++fieldIdx) {
+          // Shredded field must not be null.
+          if (object.isNullAt(fieldIdx)) {
+            throw malformedVariant();
+          }
+          String fieldName = schema.objectSchema[fieldIdx].fieldName;
+          VariantSchema fieldSchema = schema.objectSchema[fieldIdx].schema;
+          ShreddedRow fieldValue = object.getStruct(fieldIdx, fieldSchema.numFields);
+          // If the field doesn't have non-null `typed_value` or `value`, it is missing.
+          if ((fieldSchema.typedIdx >= 0 && !fieldValue.isNullAt(fieldSchema.typedIdx)) ||
+              (fieldSchema.variantIdx >= 0 && !fieldValue.isNullAt(fieldSchema.variantIdx))) {
+            int id = builder.addKey(fieldName);
+            fields.add(new VariantBuilder.FieldEntry(fieldName, id, builder.getWritePos() - start));
+            rebuild(fieldValue, metadata, fieldSchema, builder);
+          }
+        }
+        if (variantIdx >= 0 && !row.isNullAt(variantIdx)) {
+          // Add the leftover fields in the variant binary.
+          Variant v = new Variant(row.getBinary(variantIdx), metadata);
+          if (v.getType() != VariantUtil.Type.OBJECT) throw malformedVariant();
+          for (int i = 0; i < v.objectSize(); ++i) {
+            Variant.ObjectField field = v.getFieldAtIndex(i);
+            // `value` must not contain any shredded field.
+            if (schema.objectSchemaMap.containsKey(field.key)) {
+              throw malformedVariant();
+            }
+            int id = builder.addKey(field.key);
+            fields.add(new VariantBuilder.FieldEntry(field.key, id, builder.getWritePos() - start));
+            builder.appendVariant(field.value);
+          }
+        }
+        builder.finishWritingObject(start, fields);
+      }
+    } else if (variantIdx >= 0 && !row.isNullAt(variantIdx)) {
+      // `typed_value` doesn't exist or is null. Read from `value`.
+      builder.appendVariant(new Variant(row.getBinary(variantIdx), metadata));
+    } else {
+      // This means the variant is missing in a context where it must present, so the input data is
+      // invalid.
+      throw malformedVariant();
+    }
+  }
+}
diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantSchema.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantSchema.java
index 551e46214859a..d1e6cc3a727fa 100644
--- a/common/variant/src/main/java/org/apache/spark/types/variant/VariantSchema.java
+++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantSchema.java
@@ -138,6 +138,12 @@ public VariantSchema(int typedIdx, int variantIdx, int topLevelMetadataIdx, int
     this.arraySchema = arraySchema;
   }
 
+  // Return whether the variant column is unshrededed. The user is not required to do anything
+  // special, but can have certain optimizations for unshrededed variant.
+  public boolean isUnshredded() {
+    return topLevelMetadataIdx >= 0 && variantIdx >= 0 && typedIdx < 0;
+  }
+
   @Override
   public String toString() {
     return "VariantSchema{" +
diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantShreddingWriter.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantShreddingWriter.java
index b5f8ea0a1484b..bbee7ee0dca38 100644
--- a/common/variant/src/main/java/org/apache/spark/types/variant/VariantShreddingWriter.java
+++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantShreddingWriter.java
@@ -101,7 +101,9 @@ public static ShreddedResult castShredded(
           int id = v.getDictionaryIdAtIndex(i);
           fieldEntries.add(new VariantBuilder.FieldEntry(
               field.key, id, variantBuilder.getWritePos() - start));
-          variantBuilder.appendVariant(field.value);
+          // shallowAppendVariant is needed for correctness, since we're relying on the metadata IDs
+          // being unchanged.
+          variantBuilder.shallowAppendVariant(field.value);
         }
       }
       if (numFieldsMatched < objectSchema.length) {
@@ -133,8 +135,6 @@ public static ShreddedResult castShredded(
         // Store the typed value.
         result.addScalar(typedValue);
       } else {
-        VariantBuilder variantBuilder = new VariantBuilder(false);
-        variantBuilder.appendVariant(v);
         result.addVariantValue(v.getValue());
       }
     } else {
diff --git a/conf/log4j2.properties.pattern-layout-template b/conf/log4j2-json-layout.properties.template
similarity index 72%
rename from conf/log4j2.properties.pattern-layout-template
rename to conf/log4j2-json-layout.properties.template
index ab96e03baed20..76499bb6691e7 100644
--- a/conf/log4j2.properties.pattern-layout-template
+++ b/conf/log4j2-json-layout.properties.template
@@ -19,17 +19,11 @@
 rootLogger.level = info
 rootLogger.appenderRef.stdout.ref = console
 
-# In the pattern layout configuration below, we specify an explicit `%ex` conversion
-# pattern for logging Throwables. If this was omitted, then (by default) Log4J would
-# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional
-# class packaging information. That extra information can sometimes add a substantial
-# performance overhead, so we disable it in our default logging config.
-# For more information, see SPARK-39361.
 appender.console.type = Console
 appender.console.name = console
 appender.console.target = SYSTEM_ERR
-appender.console.layout.type = PatternLayout
-appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
+appender.console.layout.type = JsonTemplateLayout
+appender.console.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json
 
 # Set the default spark-shell/spark-sql log level to WARN. When running the
 # spark-shell/spark-sql, the log level for these classes is used to overwrite
@@ -60,10 +54,3 @@ logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHan
 logger.RetryingHMSHandler.level = fatal
 logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
 logger.FunctionRegistry.level = error
-
-# For deploying Spark ThriftServer
-# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805
-appender.console.filter.1.type = RegexFilter
-appender.console.filter.1.regex = .*Thrift error occurred during processing of message.*
-appender.console.filter.1.onMatch = deny
-appender.console.filter.1.onMismatch = neutral
diff --git a/conf/log4j2.properties.template b/conf/log4j2.properties.template
index 8767245314449..011fca58c9b2a 100644
--- a/conf/log4j2.properties.template
+++ b/conf/log4j2.properties.template
@@ -19,11 +19,17 @@
 rootLogger.level = info
 rootLogger.appenderRef.stdout.ref = console
 
+# In the pattern layout configuration below, we specify an explicit `%ex` conversion
+# pattern for logging Throwables. If this was omitted, then (by default) Log4J would
+# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional
+# class packaging information. That extra information can sometimes add a substantial
+# performance overhead, so we disable it in our default logging config.
+# For more information, see SPARK-39361.
 appender.console.type = Console
 appender.console.name = console
 appender.console.target = SYSTEM_ERR
-appender.console.layout.type = JsonTemplateLayout
-appender.console.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
 
 # Set the default spark-shell/spark-sql log level to WARN. When running the
 # spark-shell/spark-sql, the log level for these classes is used to overwrite
@@ -54,10 +60,3 @@ logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHan
 logger.RetryingHMSHandler.level = fatal
 logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
 logger.FunctionRegistry.level = error
-
-# For deploying Spark ThriftServer
-# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805
-appender.console.filter.1.type = RegexFilter
-appender.console.filter.1.regex = .*Thrift error occurred during processing of message.*
-appender.console.filter.1.onMatch = deny
-appender.console.filter.1.onMismatch = neutral
diff --git a/connector/avro/benchmarks/AvroReadBenchmark-jdk21-results.txt b/connector/avro/benchmarks/AvroReadBenchmark-jdk21-results.txt
index 0f4579f5da24f..c41782457cd9e 100644
--- a/connector/avro/benchmarks/AvroReadBenchmark-jdk21-results.txt
+++ b/connector/avro/benchmarks/AvroReadBenchmark-jdk21-results.txt
@@ -2,140 +2,140 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                2061           2066           7          7.6         131.0       1.0X
+Sum                                                1971           1989          26          8.0         125.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                2032           2033           2          7.7         129.2       1.0X
+Sum                                                1958           2014          80          8.0         124.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1992           2005          19          7.9         126.6       1.0X
+Sum                                                1956           1987          44          8.0         124.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1992           2017          35          7.9         126.6       1.0X
+Sum                                                1953           1962          12          8.1         124.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1981           1981           0          7.9         125.9       1.0X
+Sum                                                1948           1950           3          8.1         123.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1981           1984           4          7.9         126.0       1.0X
+Sum                                                1933           1938           6          8.1         122.9       1.0X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of columns                                     3863           3867           5          2.7         368.4       1.0X
+Sum of columns                                     3570           3574           6          2.9         340.4       1.0X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column                                        2129           2143          20          7.4         135.4       1.0X
-Partition column                                   1984           1986           2          7.9         126.1       1.1X
-Both columns                                       2209           2231          31          7.1         140.4       1.0X
+Data column                                        2062           2083          30          7.6         131.1       1.0X
+Partition column                                   1869           1873           5          8.4         118.9       1.1X
+Both columns                                       2057           2093          51          7.6         130.8       1.0X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2511           2564          75          4.2         239.5       1.0X
+Sum of string length                               2010           2012           3          5.2         191.7       1.0X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               3303           3317          20          3.2         315.0       1.0X
+Sum of string length                               3082           3094          16          3.4         293.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2221           2252          44          4.7         211.8       1.0X
+Sum of string length                               2220           2245          36          4.7         211.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               1170           1174           6          9.0         111.6       1.0X
+Sum of string length                               1152           1159          10          9.1         109.9       1.0X
 
 
 ================================================================================================
 Select All From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Wide Column Scan from 1000 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select of all columns                             19095          19150          78          0.0       38190.4       1.0X
+Select of all columns                             20941          20946           6          0.0       41882.8       1.0X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               3188           3192           4          0.3        3040.7       1.0X
+Sum of single column                               3425           3440          21          0.3        3266.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               6229           6254          35          0.2        5940.6       1.0X
+Sum of single column                               6740           6770          43          0.2        6427.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               9279           9318          56          0.1        8848.8       1.0X
+Sum of single column                               9988          10056          96          0.1        9525.4       1.0X
 
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        5538           5544           7          0.2        5537.5       1.0X
-pushdown disabled                                  5546           5571          24          0.2        5546.5       1.0X
-w/ filters                                         2312           2324          18          0.4        2312.4       2.4X
+w/o filters                                        5802           5844          44          0.2        5801.7       1.0X
+pushdown disabled                                  5544           5616          97          0.2        5543.9       1.0X
+w/ filters                                         2605           2609           5          0.4        2605.4       2.2X
 
diff --git a/connector/avro/benchmarks/AvroReadBenchmark-results.txt b/connector/avro/benchmarks/AvroReadBenchmark-results.txt
index db6193e67ac39..117cb0b05e8a3 100644
--- a/connector/avro/benchmarks/AvroReadBenchmark-results.txt
+++ b/connector/avro/benchmarks/AvroReadBenchmark-results.txt
@@ -2,140 +2,140 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1959           1993          47          8.0         124.6       1.0X
+Sum                                                1945           1948           4          8.1         123.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1929           1949          28          8.2         122.6       1.0X
+Sum                                                1941           1965          34          8.1         123.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1965           1974          13          8.0         124.9       1.0X
+Sum                                                1910           1921          15          8.2         121.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1904           1918          20          8.3         121.0       1.0X
+Sum                                                1923           1927           6          8.2         122.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1916           1934          26          8.2         121.8       1.0X
+Sum                                                1893           1898           7          8.3         120.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1911           1917           8          8.2         121.5       1.0X
+Sum                                                1890           1894           6          8.3         120.2       1.0X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of columns                                     3426           3450          34          3.1         326.8       1.0X
+Sum of columns                                     3614           3616           2          2.9         344.7       1.0X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column                                        1871           1888          24          8.4         118.9       1.0X
-Partition column                                   1713           1720           9          9.2         108.9       1.1X
-Both columns                                       1962           1970          12          8.0         124.7       1.0X
+Data column                                        2106           2108           2          7.5         133.9       1.0X
+Partition column                                   1862           1864           3          8.4         118.4       1.1X
+Both columns                                       2359           2382          32          6.7         150.0       0.9X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2042           2055          18          5.1         194.7       1.0X
+Sum of string length                               2147           2151           6          4.9         204.7       1.0X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               3089           3109          28          3.4         294.6       1.0X
+Sum of string length                               3410           3421          16          3.1         325.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2103           2104           2          5.0         200.5       1.0X
+Sum of string length                               2133           2157          34          4.9         203.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               1073           1079          10          9.8         102.3       1.0X
+Sum of string length                               1107           1110           4          9.5         105.6       1.0X
 
 
 ================================================================================================
 Select All From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Wide Column Scan from 1000 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select of all columns                             18300          18346          64          0.0       36600.5       1.0X
+Select of all columns                             19318          19384          94          0.0       38635.4       1.0X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               3144           3148           5          0.3        2998.8       1.0X
+Sum of single column                               3159           3183          34          0.3        3012.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               6216           6229          19          0.2        5927.8       1.0X
+Sum of single column                               6352           6387          49          0.2        6058.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               9286           9585         423          0.1        8855.6       1.0X
+Sum of single column                               9512           9539          39          0.1        9070.9       1.0X
 
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        5308           5326          23          0.2        5307.5       1.0X
-pushdown disabled                                  5253           5288          33          0.2        5252.7       1.0X
-w/ filters                                         2036           2061          24          0.5        2036.3       2.6X
+w/o filters                                        5474           5481          12          0.2        5474.4       1.0X
+pushdown disabled                                  5453           5490          44          0.2        5452.7       1.0X
+w/ filters                                         2210           2223          18          0.5        2209.7       2.5X
 
diff --git a/connector/avro/benchmarks/AvroWriteBenchmark-jdk21-results.txt b/connector/avro/benchmarks/AvroWriteBenchmark-jdk21-results.txt
index a071bc767cfaa..b8c0d3b95e360 100644
--- a/connector/avro/benchmarks/AvroWriteBenchmark-jdk21-results.txt
+++ b/connector/avro/benchmarks/AvroWriteBenchmark-jdk21-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1566           1588          30         10.0          99.6       1.0X
-Output Single Double Column                        1655           1668          18          9.5         105.3       0.9X
-Output Int and String Column                       3493           3496           5          4.5         222.1       0.4X
-Output Partitions                                  3062           3112          71          5.1         194.7       0.5X
-Output Buckets                                     3937           3952          20          4.0         250.3       0.4X
+Output Single Int Column                           1562           1586          33         10.1          99.3       1.0X
+Output Single Double Column                        1658           1695          52          9.5         105.4       0.9X
+Output Int and String Column                       3516           3524          11          4.5         223.6       0.4X
+Output Partitions                                  2936           3033         138          5.4         186.6       0.5X
+Output Buckets                                     3856           3882          36          4.1         245.1       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Avro compression with different codec:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-BZIP2:                                           115765         115975         297          0.0     1157649.1       1.0X
-DEFLATE:                                           6345           6370          35          0.0       63448.5      18.2X
-UNCOMPRESSED:                                      5183           5184           1          0.0       51827.4      22.3X
-SNAPPY:                                            4611           4614           3          0.0       46112.5      25.1X
-XZ:                                               54096          57854        5315          0.0      540956.3       2.1X
-ZSTANDARD:                                         4877           4888          15          0.0       48770.9      23.7X
+BZIP2:                                           117457         117471          19          0.0     1174572.2       1.0X
+DEFLATE:                                           6340           6364          34          0.0       63404.0      18.5X
+UNCOMPRESSED:                                      4990           4998          12          0.0       49898.9      23.5X
+SNAPPY:                                            4561           4564           4          0.0       45610.2      25.8X
+XZ:                                               43883          49072        7337          0.0      438832.8       2.7X
+ZSTANDARD:                                         4774           4777           4          0.0       47741.1      24.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Avro deflate with different levels:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-DEFLATE: deflate.level=1                           4807           4847          57          0.0       48065.8       1.0X
-DEFLATE: deflate.level=3                           4803           4809           7          0.0       48033.5       1.0X
-DEFLATE: deflate.level=5                           6373           6389          22          0.0       63728.7       0.8X
-DEFLATE: deflate.level=7                           6427           6460          47          0.0       64266.6       0.7X
-DEFLATE: deflate.level=9                           6628           6634          10          0.0       66277.2       0.7X
+DEFLATE: deflate.level=1                           4752           4764          17          0.0       47519.5       1.0X
+DEFLATE: deflate.level=3                           4682           4687           8          0.0       46819.6       1.0X
+DEFLATE: deflate.level=5                           6382           6392          13          0.0       63820.4       0.7X
+DEFLATE: deflate.level=7                           6477           6532          77          0.0       64774.8       0.7X
+DEFLATE: deflate.level=9                           6773           6783          15          0.0       67729.6       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Avro xz with different levels:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XZ: xz.level=1                                    11974          12000          37          0.0      119736.9       1.0X
-XZ: xz.level=3                                    21671          21813         201          0.0      216709.0       0.6X
-XZ: xz.level=5                                    47055          47335         397          0.0      470545.5       0.3X
-XZ: xz.level=7                                    74766          75069         428          0.0      747658.3       0.2X
-XZ: xz.level=9                                   146478         146490          16          0.0     1464783.7       0.1X
+XZ: xz.level=1                                    11571          11577           9          0.0      115710.1       1.0X
+XZ: xz.level=3                                    21469          21642         245          0.0      214687.1       0.5X
+XZ: xz.level=5                                    40907          40912           7          0.0      409072.6       0.3X
+XZ: xz.level=7                                    60545          61371        1167          0.0      605453.1       0.2X
+XZ: xz.level=9                                   136882         137479         845          0.0     1368819.9       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Avro zstandard with different levels:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------
-ZSTANDARD: zstandard.level=1                                              4760           4797          53          0.0       47598.3       1.0X
-ZSTANDARD: zstandard.level=1, zstandard.bufferPool.enabled=true           4666           4696          43          0.0       46656.0       1.0X
-ZSTANDARD: zstandard.level=3                                              4845           4869          33          0.0       48452.8       1.0X
-ZSTANDARD: zstandard.level=3, zstandard.bufferPool.enabled=true           4790           4801          16          0.0       47896.5       1.0X
-ZSTANDARD: zstandard.level=5                                              5125           5164          55          0.0       51248.6       0.9X
-ZSTANDARD: zstandard.level=5, zstandard.bufferPool.enabled=true           4912           4928          22          0.0       49122.5       1.0X
-ZSTANDARD: zstandard.level=7                                              5319           5333          19          0.0       53192.1       0.9X
-ZSTANDARD: zstandard.level=7, zstandard.bufferPool.enabled=true           5250           5284          48          0.0       52501.2       0.9X
-ZSTANDARD: zstandard.level=9                                              6087           6087           0          0.0       60869.7       0.8X
-ZSTANDARD: zstandard.level=9, zstandard.bufferPool.enabled=true           6219           6234          21          0.0       62191.3       0.8X
+ZSTANDARD: zstandard.level=1                                              4722           4763          58          0.0       47221.7       1.0X
+ZSTANDARD: zstandard.level=1, zstandard.bufferPool.enabled=true           4734           5114         536          0.0       47341.8       1.0X
+ZSTANDARD: zstandard.level=3                                              4816           4837          29          0.0       48162.0       1.0X
+ZSTANDARD: zstandard.level=3, zstandard.bufferPool.enabled=true           4741           4766          35          0.0       47414.8       1.0X
+ZSTANDARD: zstandard.level=5                                              5054           5155         143          0.0       50536.2       0.9X
+ZSTANDARD: zstandard.level=5, zstandard.bufferPool.enabled=true           4869           4874           7          0.0       48690.8       1.0X
+ZSTANDARD: zstandard.level=7                                              5325           5350          35          0.0       53251.2       0.9X
+ZSTANDARD: zstandard.level=7, zstandard.bufferPool.enabled=true           5283           5308          35          0.0       52828.8       0.9X
+ZSTANDARD: zstandard.level=9                                              6092           6116          35          0.0       60917.5       0.8X
+ZSTANDARD: zstandard.level=9, zstandard.bufferPool.enabled=true           5925           5935          15          0.0       59246.3       0.8X
 
diff --git a/connector/avro/benchmarks/AvroWriteBenchmark-results.txt b/connector/avro/benchmarks/AvroWriteBenchmark-results.txt
index 1a605c0ea0e90..03fea3f0379f4 100644
--- a/connector/avro/benchmarks/AvroWriteBenchmark-results.txt
+++ b/connector/avro/benchmarks/AvroWriteBenchmark-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1544           1567          34         10.2          98.1       1.0X
-Output Single Double Column                        1635           1647          17          9.6         104.0       0.9X
-Output Int and String Column                       3324           3334          15          4.7         211.3       0.5X
-Output Partitions                                  2961           3047         122          5.3         188.2       0.5X
-Output Buckets                                     3776           3778           3          4.2         240.1       0.4X
+Output Single Int Column                           1562           1564           3         10.1          99.3       1.0X
+Output Single Double Column                        1658           1677          27          9.5         105.4       0.9X
+Output Int and String Column                       3417           3456          55          4.6         217.2       0.5X
+Output Partitions                                  2923           3064         199          5.4         185.8       0.5X
+Output Buckets                                     3769           3772           4          4.2         239.6       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Avro compression with different codec:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-BZIP2:                                           130388         131379        1402          0.0     1303881.3       1.0X
-DEFLATE:                                           6523           6538          21          0.0       65227.6      20.0X
-UNCOMPRESSED:                                      5394           5425          43          0.0       53944.9      24.2X
-SNAPPY:                                            4813           4816           3          0.0       48134.6      27.1X
-XZ:                                               54364          54382          26          0.0      543640.7       2.4X
-ZSTANDARD:                                         4864           4873          13          0.0       48635.9      26.8X
+BZIP2:                                           132067         132334         377          0.0     1320668.2       1.0X
+DEFLATE:                                           6456           6466          14          0.0       64562.5      20.5X
+UNCOMPRESSED:                                      5188           5189           2          0.0       51879.6      25.5X
+SNAPPY:                                            4678           4679           2          0.0       46777.8      28.2X
+XZ:                                               42468          42597         183          0.0      424677.8       3.1X
+ZSTANDARD:                                         4796           4801           7          0.0       47963.8      27.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Avro deflate with different levels:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-DEFLATE: deflate.level=1                           4909           4916           9          0.0       49091.1       1.0X
-DEFLATE: deflate.level=3                           4874           4903          42          0.0       48735.8       1.0X
-DEFLATE: deflate.level=5                           6460           6473          19          0.0       64601.7       0.8X
-DEFLATE: deflate.level=7                           6450           6482          46          0.0       64497.5       0.8X
-DEFLATE: deflate.level=9                           6875           6878           5          0.0       68745.4       0.7X
+DEFLATE: deflate.level=1                           4736           4751          21          0.0       47356.5       1.0X
+DEFLATE: deflate.level=3                           4795           4797           4          0.0       47945.1       1.0X
+DEFLATE: deflate.level=5                           6489           6492           4          0.0       64885.9       0.7X
+DEFLATE: deflate.level=7                           6464           6484          29          0.0       64640.7       0.7X
+DEFLATE: deflate.level=9                           6740           6761          30          0.0       67404.6       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Avro xz with different levels:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XZ: xz.level=1                                    12210          12226          22          0.0      122101.7       1.0X
-XZ: xz.level=3                                    22235          22235           0          0.0      222346.3       0.5X
-XZ: xz.level=5                                    47597          47659          88          0.0      475969.7       0.3X
-XZ: xz.level=7                                    69231          69482         356          0.0      692308.3       0.2X
-XZ: xz.level=9                                   147042         148998        2766          0.0     1470415.9       0.1X
+XZ: xz.level=1                                    12053          12062          13          0.0      120526.1       1.0X
+XZ: xz.level=3                                    22766          22771           7          0.0      227656.6       0.5X
+XZ: xz.level=5                                    40993          42080        1538          0.0      409927.7       0.3X
+XZ: xz.level=7                                    64226          64623         562          0.0      642261.7       0.2X
+XZ: xz.level=9                                   143378         145508        3013          0.0     1433775.6       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Avro zstandard with different levels:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------
-ZSTANDARD: zstandard.level=1                                              4750           4817          94          0.0       47504.2       1.0X
-ZSTANDARD: zstandard.level=1, zstandard.bufferPool.enabled=true           4753           4802          69          0.0       47532.8       1.0X
-ZSTANDARD: zstandard.level=3                                              4920           4924           6          0.0       49198.5       1.0X
-ZSTANDARD: zstandard.level=3, zstandard.bufferPool.enabled=true           4792           4799           9          0.0       47921.8       1.0X
-ZSTANDARD: zstandard.level=5                                              5240           5276          51          0.0       52404.0       0.9X
-ZSTANDARD: zstandard.level=5, zstandard.bufferPool.enabled=true           5072           5101          41          0.0       50722.5       0.9X
-ZSTANDARD: zstandard.level=7                                              5542           5591          69          0.0       55416.5       0.9X
-ZSTANDARD: zstandard.level=7, zstandard.bufferPool.enabled=true           5605           5617          17          0.0       56050.4       0.8X
-ZSTANDARD: zstandard.level=9                                              6311           6403         130          0.0       63109.5       0.8X
-ZSTANDARD: zstandard.level=9, zstandard.bufferPool.enabled=true           6324           6331          10          0.0       63236.4       0.8X
+ZSTANDARD: zstandard.level=1                                              4816           4828          16          0.0       48164.5       1.0X
+ZSTANDARD: zstandard.level=1, zstandard.bufferPool.enabled=true           4669           4875         292          0.0       46692.3       1.0X
+ZSTANDARD: zstandard.level=3                                              4849           4883          48          0.0       48492.9       1.0X
+ZSTANDARD: zstandard.level=3, zstandard.bufferPool.enabled=true           4793           4796           4          0.0       47929.4       1.0X
+ZSTANDARD: zstandard.level=5                                              5098           5145          66          0.0       50982.7       0.9X
+ZSTANDARD: zstandard.level=5, zstandard.bufferPool.enabled=true           5011           5024          19          0.0       50106.6       1.0X
+ZSTANDARD: zstandard.level=7                                              5480           5502          31          0.0       54796.9       0.9X
+ZSTANDARD: zstandard.level=7, zstandard.bufferPool.enabled=true           5459           5469          14          0.0       54591.1       0.9X
+ZSTANDARD: zstandard.level=9                                              6319           6343          34          0.0       63188.9       0.8X
+ZSTANDARD: zstandard.level=9, zstandard.bufferPool.enabled=true           6146           6171          35          0.0       61460.8       0.8X
 
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroExpressionEvalUtils.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroExpressionEvalUtils.scala
new file mode 100644
index 0000000000000..1a9a3609c8a5e
--- /dev/null
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroExpressionEvalUtils.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Schema
+
+import org.apache.spark.sql.catalyst.util.{ParseMode, PermissiveMode}
+import org.apache.spark.unsafe.types.UTF8String
+
+object AvroExpressionEvalUtils {
+
+  def schemaOfAvro(
+      avroOptions: AvroOptions,
+      parseMode: ParseMode,
+      expectedSchema: Schema): UTF8String = {
+    val dt = SchemaConverters.toSqlType(
+      expectedSchema,
+      avroOptions.useStableIdForUnionType,
+      avroOptions.stableIdPrefixForUnionType,
+      avroOptions.recursiveFieldMaxDepth).dataType
+    val schema = parseMode match {
+      // With PermissiveMode, the output Catalyst row might contain columns of null values for
+      // corrupt records, even if some of the columns are not nullable in the user-provided schema.
+      // Therefore we force the schema to be all nullable here.
+      case PermissiveMode => dt.asNullable
+      case _ => dt
+    }
+    UTF8String.fromString(schema.sql)
+  }
+}
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaOfAvro.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaOfAvro.scala
new file mode 100644
index 0000000000000..094fd4254e16a
--- /dev/null
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaOfAvro.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Schema
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, LeafExpression, Literal, RuntimeReplaceable}
+import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
+import org.apache.spark.sql.catalyst.util.{FailFastMode, ParseMode, PermissiveMode}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, ObjectType}
+
+private[sql] case class SchemaOfAvro(
+    jsonFormatSchema: String,
+    options: Map[String, String])
+  extends LeafExpression with RuntimeReplaceable {
+
+  override def dataType: DataType = SQLConf.get.defaultStringType
+
+  override def nullable: Boolean = false
+
+  @transient private lazy val avroOptions = AvroOptions(options)
+
+  @transient private lazy val actualSchema =
+    new Schema.Parser().setValidateDefaults(false).parse(jsonFormatSchema)
+
+  @transient private lazy val expectedSchema = avroOptions.schema.getOrElse(actualSchema)
+
+  @transient private lazy val parseMode: ParseMode = {
+    val mode = avroOptions.parseMode
+    if (mode != PermissiveMode && mode != FailFastMode) {
+      throw QueryCompilationErrors.parseModeUnsupportedError(
+        prettyName, mode
+      )
+    }
+    mode
+  }
+
+  override def prettyName: String = "schema_of_avro"
+
+  @transient private lazy val avroOptionsObjectType = ObjectType(classOf[AvroOptions])
+  @transient private lazy val parseModeObjectType = ObjectType(classOf[ParseMode])
+  @transient private lazy val schemaObjectType = ObjectType(classOf[Schema])
+
+  override def replacement: Expression = StaticInvoke(
+    AvroExpressionEvalUtils.getClass,
+    dataType,
+    "schemaOfAvro",
+    Seq(
+      Literal(avroOptions, avroOptionsObjectType),
+      Literal(parseMode, parseModeObjectType),
+      Literal(expectedSchema, schemaObjectType)),
+    Seq(avroOptionsObjectType, parseModeObjectType, schemaObjectType)
+  )
+}
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
index 8ec711b2757f5..e898253be1168 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
@@ -42,10 +42,12 @@ case class AvroTable(
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     AvroUtils.inferSchema(sparkSession, options.asScala.toMap, files)
 
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder {
-      override def build(): Write = AvroWrite(paths, formatName, supportsDataType, info)
+      override def build(): Write =
+        AvroWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
     }
+  }
 
   override def supportsDataType(dataType: DataType): Boolean = AvroUtils.supportsDataType(dataType)
 
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
index 096cdfe0b9ee4..8c128d4c7ea65 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
@@ -629,4 +629,40 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
       assert(readbackPerson2.get(2).toString === person2.get(2))
     }
   }
+
+  test("schema_of_avro") {
+    val df = spark.range(1)
+    val avroIntType = s"""
+      |{
+      |  "type": "int",
+      |  "name": "id"
+      |}""".stripMargin
+    checkAnswer(df.select(functions.schema_of_avro(avroIntType)), Row("INT"))
+
+    val avroStructType =
+      """
+        |{
+        |  "type": "record",
+        |  "name": "person",
+        |  "fields": [
+        |    {"name": "name", "type": "string"},
+        |    {"name": "age", "type": "int"},
+        |    {"name": "country", "type": "string"}
+        |  ]
+        |}""".stripMargin
+    checkAnswer(df.select(functions.schema_of_avro(avroStructType)),
+      Row("STRUCT<name: STRING NOT NULL, age: INT NOT NULL, country: STRING NOT NULL>"))
+
+    val avroMultiType =
+      """
+        |{
+        |  "type": "record",
+        |  "name": "person",
+        |  "fields": [
+        |     {"name": "u", "type": ["int", "string"]}
+        |  ]
+        |}""".stripMargin
+    checkAnswer(df.select(functions.schema_of_avro(avroMultiType)),
+      Row("STRUCT<u: STRUCT<member0: INT, member1: STRING> NOT NULL>"))
+  }
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
index 631e9057f8d15..75df538678a3d 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -42,7 +42,7 @@ import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.expressions.SparkUserDefinedFunction
 import org.apache.spark.sql.functions.{struct, to_json}
-import org.apache.spark.sql.internal.{ColumnNodeToProtoConverter, DataFrameWriterImpl, DataFrameWriterV2Impl, MergeIntoWriterImpl, ToScalaUDF, UDFAdaptors, UnresolvedAttribute, UnresolvedRegex}
+import org.apache.spark.sql.internal.{ColumnNodeToProtoConverter, DataFrameWriterImpl, DataFrameWriterV2Impl, MergeIntoWriterImpl, SubqueryExpressionNode, SubqueryType, ToScalaUDF, UDFAdaptors, UnresolvedAttribute, UnresolvedRegex}
 import org.apache.spark.sql.streaming.DataStreamWriter
 import org.apache.spark.sql.types.{Metadata, StructType}
 import org.apache.spark.storage.StorageLevel
@@ -288,9 +288,10 @@ class Dataset[T] private[sql] (
   /** @inheritdoc */
   def stat: DataFrameStatFunctions = new DataFrameStatFunctions(toDF())
 
-  private def buildJoin(right: Dataset[_])(f: proto.Join.Builder => Unit): DataFrame = {
+  private def buildJoin(right: Dataset[_], cols: Seq[Column] = Seq.empty)(
+      f: proto.Join.Builder => Unit): DataFrame = {
     checkSameSparkSession(right)
-    sparkSession.newDataFrame { builder =>
+    sparkSession.newDataFrame(cols) { builder =>
       val joinBuilder = builder.getJoinBuilder
       joinBuilder.setLeft(plan.getRoot).setRight(right.plan.getRoot)
       f(joinBuilder)
@@ -334,7 +335,7 @@ class Dataset[T] private[sql] (
 
   /** @inheritdoc */
   def join(right: Dataset[_], joinExprs: Column, joinType: String): DataFrame = {
-    buildJoin(right) { builder =>
+    buildJoin(right, Seq(joinExprs)) { builder =>
       builder
         .setJoinType(toJoinType(joinType))
         .setJoinCondition(joinExprs.expr)
@@ -383,11 +384,50 @@ class Dataset[T] private[sql] (
     }
   }
 
+  private def lateralJoin(
+      right: DS[_],
+      joinExprs: Option[Column],
+      joinType: String): DataFrame = {
+    val joinTypeValue = toJoinType(joinType)
+    joinTypeValue match {
+      case proto.Join.JoinType.JOIN_TYPE_INNER | proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER |
+          proto.Join.JoinType.JOIN_TYPE_CROSS =>
+      case _ =>
+        throw new IllegalArgumentException(s"Unsupported lateral join type $joinType")
+    }
+    sparkSession.newDataFrame(joinExprs.toSeq) { builder =>
+      val lateralJoinBuilder = builder.getLateralJoinBuilder
+      lateralJoinBuilder.setLeft(plan.getRoot).setRight(right.plan.getRoot)
+      joinExprs.foreach(c => lateralJoinBuilder.setJoinCondition(c.expr))
+      lateralJoinBuilder.setJoinType(joinTypeValue)
+    }
+  }
+
+  /** @inheritdoc */
+  def lateralJoin(right: DS[_]): DataFrame = {
+    lateralJoin(right, None, "inner")
+  }
+
+  /** @inheritdoc */
+  def lateralJoin(right: DS[_], joinExprs: Column): DataFrame = {
+    lateralJoin(right, Some(joinExprs), "inner")
+  }
+
+  /** @inheritdoc */
+  def lateralJoin(right: DS[_], joinType: String): DataFrame = {
+    lateralJoin(right, None, joinType)
+  }
+
+  /** @inheritdoc */
+  def lateralJoin(right: DS[_], joinExprs: Column, joinType: String): DataFrame = {
+    lateralJoin(right, Some(joinExprs), joinType)
+  }
+
   override protected def sortInternal(global: Boolean, sortCols: Seq[Column]): Dataset[T] = {
     val sortExprs = sortCols.map { c =>
       ColumnNodeToProtoConverter(c.sortOrder).getSortOrder
     }
-    sparkSession.newDataset(agnosticEncoder) { builder =>
+    sparkSession.newDataset(agnosticEncoder, sortCols) { builder =>
       builder.getSortBuilder
         .setInput(plan.getRoot)
         .setIsGlobal(global)
@@ -463,7 +503,7 @@ class Dataset[T] private[sql] (
    * methods and typed select methods is the encoder used to build the return dataset.
    */
   private def selectUntyped(encoder: AgnosticEncoder[_], cols: Seq[Column]): Dataset[_] = {
-    sparkSession.newDataset(encoder) { builder =>
+    sparkSession.newDataset(encoder, cols) { builder =>
       builder.getProjectBuilder
         .setInput(plan.getRoot)
         .addAllExpressions(cols.map(_.typedExpr(this.encoder)).asJava)
@@ -471,29 +511,32 @@ class Dataset[T] private[sql] (
   }
 
   /** @inheritdoc */
-  def filter(condition: Column): Dataset[T] = sparkSession.newDataset(agnosticEncoder) {
-    builder =>
+  def filter(condition: Column): Dataset[T] = {
+    sparkSession.newDataset(agnosticEncoder, Seq(condition)) { builder =>
       builder.getFilterBuilder.setInput(plan.getRoot).setCondition(condition.expr)
+    }
   }
 
   private def buildUnpivot(
       ids: Array[Column],
       valuesOption: Option[Array[Column]],
       variableColumnName: String,
-      valueColumnName: String): DataFrame = sparkSession.newDataFrame { builder =>
-    val unpivot = builder.getUnpivotBuilder
-      .setInput(plan.getRoot)
-      .addAllIds(ids.toImmutableArraySeq.map(_.expr).asJava)
-      .setVariableColumnName(variableColumnName)
-      .setValueColumnName(valueColumnName)
-    valuesOption.foreach { values =>
-      unpivot.getValuesBuilder
-        .addAllValues(values.toImmutableArraySeq.map(_.expr).asJava)
+      valueColumnName: String): DataFrame = {
+    sparkSession.newDataFrame(ids.toSeq ++ valuesOption.toSeq.flatten) { builder =>
+      val unpivot = builder.getUnpivotBuilder
+        .setInput(plan.getRoot)
+        .addAllIds(ids.toImmutableArraySeq.map(_.expr).asJava)
+        .setVariableColumnName(variableColumnName)
+        .setValueColumnName(valueColumnName)
+      valuesOption.foreach { values =>
+        unpivot.getValuesBuilder
+          .addAllValues(values.toImmutableArraySeq.map(_.expr).asJava)
+      }
     }
   }
 
   private def buildTranspose(indices: Seq[Column]): DataFrame =
-    sparkSession.newDataFrame { builder =>
+    sparkSession.newDataFrame(indices) { builder =>
       val transpose = builder.getTransposeBuilder.setInput(plan.getRoot)
       indices.foreach { indexColumn =>
         transpose.addIndexColumns(indexColumn.expr)
@@ -585,18 +628,15 @@ class Dataset[T] private[sql] (
   def transpose(): DataFrame =
     buildTranspose(Seq.empty)
 
-  // TODO(SPARK-50134): Support scalar Subquery API in Spark Connect
-  // scalastyle:off not.implemented.error.usage
   /** @inheritdoc */
   def scalar(): Column = {
-    ???
+    Column(SubqueryExpressionNode(plan.getRoot, SubqueryType.SCALAR))
   }
 
   /** @inheritdoc */
   def exists(): Column = {
-    ???
+    Column(SubqueryExpressionNode(plan.getRoot, SubqueryType.EXISTS))
   }
-  // scalastyle:on not.implemented.error.usage
 
   /** @inheritdoc */
   def limit(n: Int): Dataset[T] = sparkSession.newDataset(agnosticEncoder) { builder =>
@@ -743,7 +783,7 @@ class Dataset[T] private[sql] (
     val aliases = values.zip(names).map { case (value, name) =>
       value.name(name).expr.getAlias
     }
-    sparkSession.newDataFrame { builder =>
+    sparkSession.newDataFrame(values) { builder =>
       builder.getWithColumnsBuilder
         .setInput(plan.getRoot)
         .addAllAliases(aliases.asJava)
@@ -803,10 +843,12 @@ class Dataset[T] private[sql] (
   @scala.annotation.varargs
   def drop(col: Column, cols: Column*): DataFrame = buildDrop(col +: cols)
 
-  private def buildDrop(cols: Seq[Column]): DataFrame = sparkSession.newDataFrame { builder =>
-    builder.getDropBuilder
-      .setInput(plan.getRoot)
-      .addAllColumns(cols.map(_.expr).asJava)
+  private def buildDrop(cols: Seq[Column]): DataFrame = {
+    sparkSession.newDataFrame(cols) { builder =>
+      builder.getDropBuilder
+        .setInput(plan.getRoot)
+        .addAllColumns(cols.map(_.expr).asJava)
+    }
   }
 
   private def buildDropByNames(cols: Seq[String]): DataFrame = sparkSession.newDataFrame {
@@ -976,12 +1018,13 @@ class Dataset[T] private[sql] (
 
   private def buildRepartitionByExpression(
       numPartitions: Option[Int],
-      partitionExprs: Seq[Column]): Dataset[T] = sparkSession.newDataset(agnosticEncoder) {
-    builder =>
+      partitionExprs: Seq[Column]): Dataset[T] = {
+    sparkSession.newDataset(agnosticEncoder, partitionExprs) { builder =>
       val repartitionBuilder = builder.getRepartitionByExpressionBuilder
         .setInput(plan.getRoot)
         .addAllPartitionExprs(partitionExprs.map(_.expr).asJava)
       numPartitions.foreach(repartitionBuilder.setNumPartitions)
+    }
   }
 
   /** @inheritdoc */
@@ -1113,7 +1156,7 @@ class Dataset[T] private[sql] (
   /** @inheritdoc */
   @scala.annotation.varargs
   def observe(name: String, expr: Column, exprs: Column*): Dataset[T] = {
-    sparkSession.newDataset(agnosticEncoder) { builder =>
+    sparkSession.newDataset(agnosticEncoder, expr +: exprs) { builder =>
       builder.getCollectMetricsBuilder
         .setInput(plan.getRoot)
         .setName(name)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index 63b5f27c4745e..d5505d2222c4f 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -27,7 +27,7 @@ import org.apache.spark.connect.proto
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{agnosticEncoderFor, ProductEncoder}
 import org.apache.spark.sql.connect.ConnectConversions._
-import org.apache.spark.sql.connect.common.UdfUtils
+import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, UdfUtils}
 import org.apache.spark.sql.expressions.SparkUserDefinedFunction
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.ColumnNodeToProtoConverter.toExpr
@@ -502,6 +502,7 @@ private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
     }
 
     val outputEncoder = agnosticEncoderFor[U]
+    val stateEncoder = agnosticEncoderFor[S]
     val nf = UDFAdaptors.flatMapGroupsWithStateWithMappedValues(func, valueMapFunc)
 
     sparkSession.newDataset[U](outputEncoder) { builder =>
@@ -509,11 +510,12 @@ private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
       groupMapBuilder
         .setInput(plan.getRoot)
         .addAllGroupingExpressions(groupingExprs)
-        .setFunc(getUdf(nf, outputEncoder)(ivEncoder))
+        .setFunc(getUdf(nf, outputEncoder, stateEncoder)(ivEncoder))
         .setIsMapGroupsWithState(isMapGroupWithState)
         .setOutputMode(if (outputMode.isEmpty) OutputMode.Update.toString
         else outputMode.get.toString)
         .setTimeoutConf(timeoutConf.toString)
+        .setStateSchema(DataTypeProtoConverter.toConnectProtoType(stateEncoder.schema))
 
       if (initialStateImpl != null) {
         groupMapBuilder
@@ -533,6 +535,21 @@ private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
     udf.apply(inputEncoders.map(_ => col("*")): _*).expr.getCommonInlineUserDefinedFunction
   }
 
+  private def getUdf[U: Encoder, S: Encoder](
+      nf: AnyRef,
+      outputEncoder: AgnosticEncoder[U],
+      stateEncoder: AgnosticEncoder[S])(
+      inEncoders: AgnosticEncoder[_]*): proto.CommonInlineUserDefinedFunction = {
+    // Apply keyAs changes by setting kEncoder
+    // Add the state encoder to the inputEncoders.
+    val inputEncoders = kEncoder +: stateEncoder +: inEncoders
+    val udf = SparkUserDefinedFunction(
+      function = nf,
+      inputEncoders = inputEncoders,
+      outputEncoder = outputEncoder)
+    udf.apply(inputEncoders.map(_ => col("*")): _*).expr.getCommonInlineUserDefinedFunction
+  }
+
   /**
    * We cannot deserialize a connect [[KeyValueGroupedDataset]] because of a class clash on the
    * server side. We null out the instance for now.
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index 5bded40b0d132..0944c88a67906 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -45,7 +45,7 @@ class RelationalGroupedDataset private[sql] (
   import df.sparkSession.RichColumn
 
   protected def toDF(aggExprs: Seq[Column]): DataFrame = {
-    df.sparkSession.newDataFrame { builder =>
+    df.sparkSession.newDataFrame(groupingExprs ++ aggExprs) { builder =>
       val aggBuilder = builder.getAggregateBuilder
         .setInput(df.plan.getRoot)
       groupingExprs.foreach(c => aggBuilder.addGroupingExpressions(c.expr))
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLContext.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLContext.scala
new file mode 100644
index 0000000000000..3603eb6ea508d
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -0,0 +1,336 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.{List => JList, Map => JMap, Properties}
+
+import scala.jdk.CollectionConverters.PropertiesHasAsScala
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.spark.SparkContext
+import org.apache.spark.annotation.Stable
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.connect.ConnectClientUnsupportedErrors
+import org.apache.spark.sql.connect.ConnectConversions._
+import org.apache.spark.sql.sources.BaseRelation
+import org.apache.spark.sql.streaming.{DataStreamReader, StreamingQueryManager}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ExecutionListenerManager
+
+@Stable
+class SQLContext private[sql] (override val sparkSession: SparkSession)
+    extends api.SQLContext(sparkSession) {
+
+  /** @inheritdoc */
+  def newSession(): SQLContext = sparkSession.newSession().sqlContext
+
+  /** @inheritdoc */
+  def listenerManager: ExecutionListenerManager = sparkSession.listenerManager
+
+  /** @inheritdoc */
+  def setConf(props: Properties): Unit = sparkSession.conf.synchronized {
+    props.asScala.foreach { case (k, v) => sparkSession.conf.set(k, v) }
+  }
+
+  /** @inheritdoc */
+  def experimental: ExperimentalMethods = sparkSession.experimental
+
+  /** @inheritdoc */
+  def udf: UDFRegistration = sparkSession.udf
+
+  // scalastyle:off
+  // Disable style checker so "implicits" object can start with lowercase i
+
+  /** @inheritdoc */
+  object implicits extends SQLImplicits {
+
+    /** @inheritdoc */
+    override protected def session: SparkSession = sparkSession
+  }
+
+  // scalastyle:on
+
+  /** @inheritdoc */
+  def read: DataFrameReader = sparkSession.read
+
+  /** @inheritdoc */
+  def readStream: DataStreamReader = sparkSession.readStream
+
+  /**
+   * Returns a `StreamingQueryManager` that allows managing all the
+   * [[org.apache.spark.sql.streaming.StreamingQuery StreamingQueries]] active on `this` context.
+   *
+   * @since 4.0.0
+   */
+  def streams: StreamingQueryManager = sparkSession.streams
+
+  /** @inheritdoc */
+  override def sparkContext: SparkContext = {
+    throw ConnectClientUnsupportedErrors.sparkContext()
+  }
+
+  /** @inheritdoc */
+  override def emptyDataFrame: Dataset[Row] = super.emptyDataFrame
+
+  /** @inheritdoc */
+  override def createDataFrame[A <: Product: TypeTag](rdd: RDD[A]): Dataset[Row] =
+    super.createDataFrame(rdd)
+
+  /** @inheritdoc */
+  override def createDataFrame[A <: Product: TypeTag](data: Seq[A]): Dataset[Row] =
+    super.createDataFrame(data)
+
+  /** @inheritdoc */
+  override def baseRelationToDataFrame(baseRelation: BaseRelation): Dataset[Row] =
+    super.baseRelationToDataFrame(baseRelation)
+
+  /** @inheritdoc */
+  override def createDataFrame(rowRDD: RDD[Row], schema: StructType): Dataset[Row] =
+    super.createDataFrame(rowRDD, schema)
+
+  /** @inheritdoc */
+  override def createDataset[T: Encoder](data: Seq[T]): Dataset[T] = super.createDataset(data)
+
+  /** @inheritdoc */
+  override def createDataset[T: Encoder](data: RDD[T]): Dataset[T] = super.createDataset(data)
+
+  /** @inheritdoc */
+  override def createDataset[T: Encoder](data: JList[T]): Dataset[T] =
+    super.createDataset(data)
+
+  /** @inheritdoc */
+  override def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): Dataset[Row] =
+    super.createDataFrame(rowRDD, schema)
+
+  /** @inheritdoc */
+  override def createDataFrame(rows: JList[Row], schema: StructType): Dataset[Row] =
+    super.createDataFrame(rows, schema)
+
+  /** @inheritdoc */
+  override def createDataFrame(rdd: RDD[_], beanClass: Class[_]): Dataset[Row] =
+    super.createDataFrame(rdd, beanClass)
+
+  /** @inheritdoc */
+  override def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): Dataset[Row] =
+    super.createDataFrame(rdd, beanClass)
+
+  /** @inheritdoc */
+  override def createDataFrame(data: JList[_], beanClass: Class[_]): Dataset[Row] =
+    super.createDataFrame(data, beanClass)
+
+  /** @inheritdoc */
+  override def createExternalTable(tableName: String, path: String): Dataset[Row] =
+    super.createExternalTable(tableName, path)
+
+  /** @inheritdoc */
+  override def createExternalTable(
+      tableName: String,
+      path: String,
+      source: String): Dataset[Row] = {
+    super.createExternalTable(tableName, path, source)
+  }
+
+  /** @inheritdoc */
+  override def createExternalTable(
+      tableName: String,
+      source: String,
+      options: JMap[String, String]): Dataset[Row] = {
+    super.createExternalTable(tableName, source, options)
+  }
+
+  /** @inheritdoc */
+  override def createExternalTable(
+      tableName: String,
+      source: String,
+      options: Map[String, String]): Dataset[Row] = {
+    super.createExternalTable(tableName, source, options)
+  }
+
+  /** @inheritdoc */
+  override def createExternalTable(
+      tableName: String,
+      source: String,
+      schema: StructType,
+      options: JMap[String, String]): Dataset[Row] = {
+    super.createExternalTable(tableName, source, schema, options)
+  }
+
+  /** @inheritdoc */
+  override def createExternalTable(
+      tableName: String,
+      source: String,
+      schema: StructType,
+      options: Map[String, String]): Dataset[Row] = {
+    super.createExternalTable(tableName, source, schema, options)
+  }
+
+  /** @inheritdoc */
+  override def range(end: Long): Dataset[Row] = super.range(end)
+
+  /** @inheritdoc */
+  override def range(start: Long, end: Long): Dataset[Row] = super.range(start, end)
+
+  /** @inheritdoc */
+  override def range(start: Long, end: Long, step: Long): Dataset[Row] =
+    super.range(start, end, step)
+
+  /** @inheritdoc */
+  override def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[Row] =
+    super.range(start, end, step, numPartitions)
+
+  /** @inheritdoc */
+  override def sql(sqlText: String): Dataset[Row] = super.sql(sqlText)
+
+  /** @inheritdoc */
+  override def table(tableName: String): Dataset[Row] = super.table(tableName)
+
+  /** @inheritdoc */
+  override def tables(): Dataset[Row] = super.tables()
+
+  /** @inheritdoc */
+  override def tables(databaseName: String): Dataset[Row] = super.tables(databaseName)
+
+  /** @inheritdoc */
+  override def applySchema(rowRDD: RDD[Row], schema: StructType): Dataset[Row] =
+    super.applySchema(rowRDD, schema)
+
+  /** @inheritdoc */
+  override def applySchema(rowRDD: JavaRDD[Row], schema: StructType): Dataset[Row] =
+    super.applySchema(rowRDD, schema)
+
+  /** @inheritdoc */
+  override def applySchema(rdd: RDD[_], beanClass: Class[_]): Dataset[Row] =
+    super.applySchema(rdd, beanClass)
+
+  /** @inheritdoc */
+  override def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): Dataset[Row] =
+    super.applySchema(rdd, beanClass)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def parquetFile(paths: String*): Dataset[Row] = super.parquetFile(paths: _*)
+
+  /** @inheritdoc */
+  override def jsonFile(path: String): Dataset[Row] = super.jsonFile(path)
+
+  /** @inheritdoc */
+  override def jsonFile(path: String, schema: StructType): Dataset[Row] =
+    super.jsonFile(path, schema)
+
+  /** @inheritdoc */
+  override def jsonFile(path: String, samplingRatio: Double): Dataset[Row] =
+    super.jsonFile(path, samplingRatio)
+
+  /** @inheritdoc */
+  override def jsonRDD(json: RDD[String]): Dataset[Row] = super.jsonRDD(json)
+
+  /** @inheritdoc */
+  override def jsonRDD(json: JavaRDD[String]): Dataset[Row] = super.jsonRDD(json)
+
+  /** @inheritdoc */
+  override def jsonRDD(json: RDD[String], schema: StructType): Dataset[Row] =
+    super.jsonRDD(json, schema)
+
+  /** @inheritdoc */
+  override def jsonRDD(json: JavaRDD[String], schema: StructType): Dataset[Row] =
+    super.jsonRDD(json, schema)
+
+  /** @inheritdoc */
+  override def jsonRDD(json: RDD[String], samplingRatio: Double): Dataset[Row] =
+    super.jsonRDD(json, samplingRatio)
+
+  /** @inheritdoc */
+  override def jsonRDD(json: JavaRDD[String], samplingRatio: Double): Dataset[Row] =
+    super.jsonRDD(json, samplingRatio)
+
+  /** @inheritdoc */
+  override def load(path: String): Dataset[Row] = super.load(path)
+
+  /** @inheritdoc */
+  override def load(path: String, source: String): Dataset[Row] = super.load(path, source)
+
+  /** @inheritdoc */
+  override def load(source: String, options: JMap[String, String]): Dataset[Row] =
+    super.load(source, options)
+
+  /** @inheritdoc */
+  override def load(source: String, options: Map[String, String]): Dataset[Row] =
+    super.load(source, options)
+
+  /** @inheritdoc */
+  override def load(
+      source: String,
+      schema: StructType,
+      options: JMap[String, String]): Dataset[Row] = {
+    super.load(source, schema, options)
+  }
+
+  /** @inheritdoc */
+  override def load(
+      source: String,
+      schema: StructType,
+      options: Map[String, String]): Dataset[Row] = {
+    super.load(source, schema, options)
+  }
+
+  /** @inheritdoc */
+  override def jdbc(url: String, table: String): Dataset[Row] = super.jdbc(url, table)
+
+  /** @inheritdoc */
+  override def jdbc(
+      url: String,
+      table: String,
+      columnName: String,
+      lowerBound: Long,
+      upperBound: Long,
+      numPartitions: Int): Dataset[Row] = {
+    super.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions)
+  }
+
+  /** @inheritdoc */
+  override def jdbc(url: String, table: String, theParts: Array[String]): Dataset[Row] = {
+    super.jdbc(url, table, theParts)
+  }
+}
+object SQLContext extends api.SQLContextCompanion {
+
+  override private[sql] type SQLContextImpl = SQLContext
+  override private[sql] type SparkContextImpl = SparkContext
+
+  /**
+   * Get the singleton SQLContext if it exists or create a new one.
+   *
+   * This function can be used to create a singleton SQLContext object that can be shared across
+   * the JVM.
+   *
+   * If there is an active SQLContext for current thread, it will be returned instead of the
+   * global one.
+   *
+   * @param sparkContext
+   *   The SparkContext. This parameter is not used in Spark Connect.
+   *
+   * @since 4.0.0
+   */
+  def getOrCreate(sparkContext: SparkContext): SQLContext = {
+    SparkSession.builder().getOrCreate().sqlContext
+  }
+
+  /** @inheritdoc */
+  override def setActive(sqlContext: SQLContext): Unit = super.setActive(sqlContext)
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
index 4690253da808b..993b09ace9139 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -17,7 +17,8 @@
 package org.apache.spark.sql
 
 /** @inheritdoc */
-abstract class SQLImplicits private[sql] (override val session: SparkSession)
-    extends api.SQLImplicits {
+abstract class SQLImplicits extends api.SQLImplicits {
   type DS[U] = Dataset[U]
+
+  protected def session: SparkSession
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 7edb1f51f11b1..89519034d07cc 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -34,6 +34,7 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.ExecutePlanResponse
+import org.apache.spark.connect.proto.ExecutePlanResponse.ObservedMetrics
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalog.Catalog
@@ -45,7 +46,7 @@ import org.apache.spark.sql.connect.client.{ClassFinder, CloseableIterator, Spar
 import org.apache.spark.sql.connect.client.SparkConnectClient.Configuration
 import org.apache.spark.sql.connect.client.arrow.ArrowSerializer
 import org.apache.spark.sql.functions.lit
-import org.apache.spark.sql.internal.{CatalogImpl, ConnectRuntimeConfig, SessionCleaner, SessionState, SharedState, SqlApiConf}
+import org.apache.spark.sql.internal.{CatalogImpl, ConnectRuntimeConfig, SessionCleaner, SessionState, SharedState, SqlApiConf, SubqueryExpressionNode}
 import org.apache.spark.sql.internal.ColumnNodeToProtoConverter.{toExpr, toTypedExpr}
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.streaming.DataStreamReader
@@ -187,8 +188,7 @@ class SparkSession private[sql] (
     throw ConnectClientUnsupportedErrors.sessionState()
 
   /** @inheritdoc */
-  override def sqlContext: SQLContext =
-    throw ConnectClientUnsupportedErrors.sqlContext()
+  override val sqlContext: SQLContext = new SQLContext(this)
 
   /** @inheritdoc */
   override def listenerManager: ExecutionListenerManager =
@@ -210,15 +210,38 @@ class SparkSession private[sql] (
     throw ConnectClientUnsupportedErrors.executeCommand()
 
   /** @inheritdoc */
-  @Experimental
-  def sql(sqlText: String, args: Array[_]): DataFrame = newDataFrame { builder =>
+  def sql(sqlText: String, args: Array[_]): DataFrame = {
+    val sqlCommand = proto.SqlCommand
+      .newBuilder()
+      .setSql(sqlText)
+      .addAllPosArguments(args.map(lit(_).expr).toImmutableArraySeq.asJava)
+      .build()
+    sql(sqlCommand)
+  }
+
+  /** @inheritdoc */
+  def sql(sqlText: String, args: Map[String, Any]): DataFrame = {
+    sql(sqlText, args.asJava)
+  }
+
+  /** @inheritdoc */
+  override def sql(sqlText: String, args: java.util.Map[String, Any]): DataFrame = {
+    val sqlCommand = proto.SqlCommand
+      .newBuilder()
+      .setSql(sqlText)
+      .putAllNamedArguments(args.asScala.map { case (k, v) => (k, lit(v).expr) }.asJava)
+      .build()
+    sql(sqlCommand)
+  }
+
+  /** @inheritdoc */
+  override def sql(query: String): DataFrame = {
+    sql(query, Array.empty)
+  }
+
+  private def sql(sqlCommand: proto.SqlCommand): DataFrame = newDataFrame { builder =>
     // Send the SQL once to the server and then check the output.
-    val cmd = newCommand(b =>
-      b.setSqlCommand(
-        proto.SqlCommand
-          .newBuilder()
-          .setSql(sqlText)
-          .addAllPosArguments(args.map(lit(_).expr).toImmutableArraySeq.asJava)))
+    val cmd = newCommand(b => b.setSqlCommand(sqlCommand))
     val plan = proto.Plan.newBuilder().setCommand(cmd)
     val responseIter = client.execute(plan.build())
 
@@ -234,43 +257,6 @@ class SparkSession private[sql] (
     }
   }
 
-  /** @inheritdoc */
-  @Experimental
-  def sql(sqlText: String, args: Map[String, Any]): DataFrame = {
-    sql(sqlText, args.asJava)
-  }
-
-  /** @inheritdoc */
-  @Experimental
-  override def sql(sqlText: String, args: java.util.Map[String, Any]): DataFrame = newDataFrame {
-    builder =>
-      // Send the SQL once to the server and then check the output.
-      val cmd = newCommand(b =>
-        b.setSqlCommand(
-          proto.SqlCommand
-            .newBuilder()
-            .setSql(sqlText)
-            .putAllNamedArguments(args.asScala.map { case (k, v) => (k, lit(v).expr) }.asJava)))
-      val plan = proto.Plan.newBuilder().setCommand(cmd)
-      val responseIter = client.execute(plan.build())
-
-      try {
-        val response = responseIter
-          .find(_.hasSqlCommandResult)
-          .getOrElse(throw new RuntimeException("SQLCommandResult must be present"))
-        // Update the builder with the values from the result.
-        builder.mergeFrom(response.getSqlCommandResult.getRelation)
-      } finally {
-        // consume the rest of the iterator
-        responseIter.foreach(_ => ())
-      }
-  }
-
-  /** @inheritdoc */
-  override def sql(query: String): DataFrame = {
-    sql(query, Array.empty)
-  }
-
   /** @inheritdoc */
   def read: DataFrameReader = new DataFrameReader(this)
 
@@ -314,7 +300,9 @@ class SparkSession private[sql] (
 
   // scalastyle:off
   /** @inheritdoc */
-  object implicits extends SQLImplicits(this)
+  object implicits extends SQLImplicits {
+    override protected def session: SparkSession = SparkSession.this
+  }
   // scalastyle:on
 
   /** @inheritdoc */
@@ -336,20 +324,111 @@ class SparkSession private[sql] (
     }
   }
 
+  /**
+   * Create a DataFrame including the proto plan built by the given function.
+   *
+   * @param f
+   *   The function to build the proto plan.
+   * @return
+   *   The DataFrame created from the proto plan.
+   */
   @Since("4.0.0")
   @DeveloperApi
   def newDataFrame(f: proto.Relation.Builder => Unit): DataFrame = {
     newDataset(UnboundRowEncoder)(f)
   }
 
+  /**
+   * Create a DataFrame including the proto plan built by the given function.
+   *
+   * Use this method when columns are used to create a new DataFrame. When there are columns
+   * referring to other Dataset or DataFrame, the plan will be wrapped with a `WithRelation`.
+   *
+   * {{{
+   *   with_relations [id 10]
+   *     root: plan  [id 9]  using columns referring to other Dataset or DataFrame, holding plan ids
+   *     reference:
+   *          refs#1: [id 8]  plan for the reference 1
+   *          refs#2: [id 5]  plan for the reference 2
+   * }}}
+   *
+   * @param cols
+   *   The columns to be used in the DataFrame.
+   * @param f
+   *   The function to build the proto plan.
+   * @return
+   *   The DataFrame created from the proto plan.
+   */
+  @Since("4.0.0")
+  @DeveloperApi
+  def newDataFrame(cols: Seq[Column])(f: proto.Relation.Builder => Unit): DataFrame = {
+    newDataset(UnboundRowEncoder, cols)(f)
+  }
+
+  /**
+   * Create a Dataset including the proto plan built by the given function.
+   *
+   * @param encoder
+   *   The encoder for the Dataset.
+   * @param f
+   *   The function to build the proto plan.
+   * @return
+   *   The Dataset created from the proto plan.
+   */
   @Since("4.0.0")
   @DeveloperApi
   def newDataset[T](encoder: AgnosticEncoder[T])(
       f: proto.Relation.Builder => Unit): Dataset[T] = {
+    newDataset[T](encoder, Seq.empty)(f)
+  }
+
+  /**
+   * Create a Dataset including the proto plan built by the given function.
+   *
+   * Use this method when columns are used to create a new Dataset. When there are columns
+   * referring to other Dataset or DataFrame, the plan will be wrapped with a `WithRelation`.
+   *
+   * {{{
+   *   with_relations [id 10]
+   *     root: plan  [id 9]  using columns referring to other Dataset or DataFrame, holding plan ids
+   *     reference:
+   *          refs#1: [id 8]  plan for the reference 1
+   *          refs#2: [id 5]  plan for the reference 2
+   * }}}
+   *
+   * @param encoder
+   *   The encoder for the Dataset.
+   * @param cols
+   *   The columns to be used in the DataFrame.
+   * @param f
+   *   The function to build the proto plan.
+   * @return
+   *   The Dataset created from the proto plan.
+   */
+  @Since("4.0.0")
+  @DeveloperApi
+  def newDataset[T](encoder: AgnosticEncoder[T], cols: Seq[Column])(
+      f: proto.Relation.Builder => Unit): Dataset[T] = {
+    val references = cols.flatMap(_.node.collect { case n: SubqueryExpressionNode =>
+      n.relation
+    })
+
     val builder = proto.Relation.newBuilder()
     f(builder)
     builder.getCommonBuilder.setPlanId(planIdGenerator.getAndIncrement())
-    val plan = proto.Plan.newBuilder().setRoot(builder).build()
+
+    val rootBuilder = if (references.length == 0) {
+      builder
+    } else {
+      val rootBuilder = proto.Relation.newBuilder()
+      rootBuilder.getWithRelationsBuilder
+        .setRoot(builder)
+        .addAllReferences(references.asJava)
+      rootBuilder.getCommonBuilder.setPlanId(planIdGenerator.getAndIncrement())
+      rootBuilder
+    }
+
+    val plan = proto.Plan.newBuilder().setRoot(rootBuilder).build()
     new Dataset[T](this, plan, encoder)
   }
 
@@ -385,13 +464,8 @@ class SparkSession private[sql] (
   private[sql] def timeZoneId: String = conf.get(SqlApiConf.SESSION_LOCAL_TIMEZONE_KEY)
 
   private[sql] def execute[T](plan: proto.Plan, encoder: AgnosticEncoder[T]): SparkResult[T] = {
-    val value = client.execute(plan)
-    new SparkResult(
-      value,
-      allocator,
-      encoder,
-      timeZoneId,
-      Some(setMetricsAndUnregisterObservation))
+    val value = executeInternal(plan)
+    new SparkResult(value, allocator, encoder, timeZoneId)
   }
 
   private[sql] def execute(f: proto.Relation.Builder => Unit): Unit = {
@@ -400,7 +474,7 @@ class SparkSession private[sql] (
     builder.getCommonBuilder.setPlanId(planIdGenerator.getAndIncrement())
     val plan = proto.Plan.newBuilder().setRoot(builder).build()
     // .foreach forces that the iterator is consumed and closed
-    client.execute(plan).foreach(_ => ())
+    executeInternal(plan).foreach(_ => ())
   }
 
   @Since("4.0.0")
@@ -409,11 +483,26 @@ class SparkSession private[sql] (
     val plan = proto.Plan.newBuilder().setCommand(command).build()
     // .toSeq forces that the iterator is consumed and closed. On top, ignore all
     // progress messages.
-    client.execute(plan).filter(!_.hasExecutionProgress).toSeq
+    executeInternal(plan).filter(!_.hasExecutionProgress).toSeq
   }
 
-  private[sql] def execute(plan: proto.Plan): CloseableIterator[ExecutePlanResponse] =
-    client.execute(plan)
+  /**
+   * The real `execute` method that calls into `SparkConnectClient`.
+   *
+   * Here we inject a lazy map to process registered observed metrics, so consumers of the
+   * returned iterator does not need to worry about it.
+   *
+   * Please make sure all `execute` methods call this method.
+   */
+  private[sql] def executeInternal(plan: proto.Plan): CloseableIterator[ExecutePlanResponse] = {
+    client
+      .execute(plan)
+      .map { response =>
+        // Note, this map() is lazy.
+        processRegisteredObservedMetrics(response.getObservedMetricsList)
+        response
+      }
+  }
 
   private[sql] def registerUdf(udf: proto.CommonInlineUserDefinedFunction): Unit = {
     val command = proto.Command.newBuilder().setRegisterFunction(udf).build()
@@ -555,10 +644,14 @@ class SparkSession private[sql] (
     observationRegistry.putIfAbsent(planId, observation)
   }
 
-  private[sql] def setMetricsAndUnregisterObservation(planId: Long, metrics: Row): Unit = {
-    val observationOrNull = observationRegistry.remove(planId)
-    if (observationOrNull != null) {
-      observationOrNull.setMetricsAndNotify(metrics)
+  private def processRegisteredObservedMetrics(metrics: java.util.List[ObservedMetrics]): Unit = {
+    metrics.asScala.map { metric =>
+      // Here we only process metrics that belong to a registered Observation object.
+      // All metrics, whether registered or not, will be collected by `SparkResult`.
+      val observationOrNull = observationRegistry.remove(metric.getPlanId)
+      if (observationOrNull != null) {
+        observationOrNull.setMetricsAndNotify(SparkResult.transformObservedMetrics(metric))
+      }
     }
   }
 
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/TableValuedFunction.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/TableValuedFunction.scala
index 4f2687b537862..2a5afd1d58717 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/TableValuedFunction.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/TableValuedFunction.scala
@@ -47,7 +47,7 @@ class TableValuedFunction(sparkSession: SparkSession) extends api.TableValuedFun
   }
 
   private def fn(name: String, args: Seq[Column]): Dataset[Row] = {
-    sparkSession.newDataFrame { builder =>
+    sparkSession.newDataFrame(args) { builder =>
       builder.getUnresolvedTableValuedFunctionBuilder
         .setFunctionName(name)
         .addAllArguments(args.map(toExpr).asJava)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/ConnectClientUnsupportedErrors.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/ConnectClientUnsupportedErrors.scala
index e73bcb8a0059d..5783a20348d75 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/ConnectClientUnsupportedErrors.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/ConnectClientUnsupportedErrors.scala
@@ -53,7 +53,4 @@ private[sql] object ConnectClientUnsupportedErrors {
 
   def sparkContext(): SparkUnsupportedOperationException =
     unsupportedFeatureException("SESSION_SPARK_CONTEXT")
-
-  def sqlContext(): SparkUnsupportedOperationException =
-    unsupportedFeatureException("SESSION_SQL_CONTEXT")
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/ConnectRuntimeConfig.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/ConnectRuntimeConfig.scala
index be1a13cb2fed2..74348e8e015e2 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/ConnectRuntimeConfig.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/ConnectRuntimeConfig.scala
@@ -18,6 +18,7 @@ package org.apache.spark.sql.internal
 
 import org.apache.spark.connect.proto.{ConfigRequest, ConfigResponse, KeyValue}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.{ConfigEntry, ConfigReader, OptionalConfigEntry}
 import org.apache.spark.sql.RuntimeConfig
 import org.apache.spark.sql.connect.client.SparkConnectClient
 
@@ -28,7 +29,7 @@ import org.apache.spark.sql.connect.client.SparkConnectClient
  */
 class ConnectRuntimeConfig private[sql] (client: SparkConnectClient)
     extends RuntimeConfig
-    with Logging {
+    with Logging { self =>
 
   /** @inheritdoc */
   def set(key: String, value: String): Unit = {
@@ -37,6 +38,13 @@ class ConnectRuntimeConfig private[sql] (client: SparkConnectClient)
     }
   }
 
+  /** @inheritdoc */
+  override private[sql] def set[T](entry: ConfigEntry[T], value: T): Unit = {
+    require(entry != null, "entry cannot be null")
+    require(value != null, s"value cannot be null for key: ${entry.key}")
+    set(entry.key, entry.stringConverter(value))
+  }
+
   /** @inheritdoc */
   @throws[NoSuchElementException]("if the key is not set and there is no default value")
   def get(key: String): String = getOption(key).getOrElse {
@@ -45,11 +53,39 @@ class ConnectRuntimeConfig private[sql] (client: SparkConnectClient)
 
   /** @inheritdoc */
   def get(key: String, default: String): String = {
-    executeConfigRequestSingleValue { builder =>
-      builder.getGetWithDefaultBuilder.addPairsBuilder().setKey(key).setValue(default)
+    val kv = executeConfigRequestSinglePair { builder =>
+      val pairsBuilder = builder.getGetWithDefaultBuilder
+        .addPairsBuilder()
+        .setKey(key)
+      if (default != null) {
+        pairsBuilder.setValue(default)
+      }
+    }
+    if (kv.hasValue) {
+      kv.getValue
+    } else {
+      default
     }
   }
 
+  /** @inheritdoc */
+  override private[sql] def get[T](entry: ConfigEntry[T]): T = {
+    require(entry != null, "entry cannot be null")
+    entry.readFrom(reader)
+  }
+
+  /** @inheritdoc */
+  override private[sql] def get[T](entry: OptionalConfigEntry[T]): Option[T] = {
+    require(entry != null, "entry cannot be null")
+    entry.readFrom(reader)
+  }
+
+  /** @inheritdoc */
+  override private[sql] def get[T](entry: ConfigEntry[T], default: T): T = {
+    require(entry != null, "entry cannot be null")
+    Option(get(entry.key, null)).map(entry.valueConverter).getOrElse(default)
+  }
+
   /** @inheritdoc */
   def getAll: Map[String, String] = {
     val response = executeConfigRequest { builder =>
@@ -65,11 +101,11 @@ class ConnectRuntimeConfig private[sql] (client: SparkConnectClient)
 
   /** @inheritdoc */
   def getOption(key: String): Option[String] = {
-    val pair = executeConfigRequestSinglePair { builder =>
+    val kv = executeConfigRequestSinglePair { builder =>
       builder.getGetOptionBuilder.addKeys(key)
     }
-    if (pair.hasValue) {
-      Option(pair.getValue)
+    if (kv.hasValue) {
+      Option(kv.getValue)
     } else {
       None
     }
@@ -84,17 +120,11 @@ class ConnectRuntimeConfig private[sql] (client: SparkConnectClient)
 
   /** @inheritdoc */
   def isModifiable(key: String): Boolean = {
-    val modifiable = executeConfigRequestSingleValue { builder =>
+    val kv = executeConfigRequestSinglePair { builder =>
       builder.getIsModifiableBuilder.addKeys(key)
     }
-    java.lang.Boolean.valueOf(modifiable)
-  }
-
-  private def executeConfigRequestSingleValue(
-      f: ConfigRequest.Operation.Builder => Unit): String = {
-    val pair = executeConfigRequestSinglePair(f)
-    require(pair.hasValue, "The returned pair does not have a value set")
-    pair.getValue
+    require(kv.hasValue, "The returned pair does not have a value set")
+    java.lang.Boolean.valueOf(kv.getValue)
   }
 
   private def executeConfigRequestSinglePair(
@@ -113,4 +143,6 @@ class ConnectRuntimeConfig private[sql] (client: SparkConnectClient)
     }
     response
   }
+
+  private val reader = new ConfigReader((key: String) => Option(self.get(key, null)))
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala
index 34a8a91a0ddf8..7802d9750bbc3 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala
@@ -73,13 +73,19 @@ object ColumnNodeToProtoConverter extends (ColumnNode => proto.Expression) {
           .setColName(regex)
         planId.foreach(b.setPlanId)
 
-      case UnresolvedFunction(functionName, arguments, isDistinct, isUserDefinedFunction, _, _) =>
-        // TODO(SPARK-49087) use internal namespace.
+      case UnresolvedFunction(
+            functionName,
+            arguments,
+            isDistinct,
+            isUserDefinedFunction,
+            isInternal,
+            _) =>
         builder.getUnresolvedFunctionBuilder
           .setFunctionName(functionName)
           .setIsUserDefinedFunction(isUserDefinedFunction)
           .setIsDistinct(isDistinct)
           .addAllArguments(arguments.map(apply(_, e)).asJava)
+          .setIsInternal(isInternal)
 
       case Alias(child, name, metadata, _) =>
         val b = builder.getAliasBuilder.setExpr(apply(child, e))
@@ -156,6 +162,7 @@ object ColumnNodeToProtoConverter extends (ColumnNode => proto.Expression) {
       case CaseWhenOtherwise(branches, otherwise, _) =>
         val b = builder.getUnresolvedFunctionBuilder
           .setFunctionName("when")
+          .setIsInternal(false)
         branches.foreach { case (condition, value) =>
           b.addArguments(apply(condition, e))
           b.addArguments(apply(value, e))
@@ -164,6 +171,18 @@ object ColumnNodeToProtoConverter extends (ColumnNode => proto.Expression) {
           b.addArguments(apply(value, e))
         }
 
+      case LazyExpression(child, _) =>
+        builder.getLazyExpressionBuilder.setChild(apply(child, e))
+
+      case SubqueryExpressionNode(relation, subqueryType, _) =>
+        val b = builder.getSubqueryExpressionBuilder
+        b.setSubqueryType(subqueryType match {
+          case SubqueryType.SCALAR => proto.SubqueryExpression.SubqueryType.SUBQUERY_TYPE_SCALAR
+          case SubqueryType.EXISTS => proto.SubqueryExpression.SubqueryType.SUBQUERY_TYPE_EXISTS
+        })
+        assert(relation.hasCommon && relation.getCommon.hasPlanId)
+        b.setPlanId(relation.getCommon.getPlanId)
+
       case ProtoColumnNode(e, _) =>
         return e
 
@@ -214,4 +233,24 @@ case class ProtoColumnNode(
     override val origin: Origin = CurrentOrigin.get)
     extends ColumnNode {
   override def sql: String = expr.toString
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
+}
+
+sealed trait SubqueryType
+
+object SubqueryType {
+  case object SCALAR extends SubqueryType
+  case object EXISTS extends SubqueryType
+}
+
+case class SubqueryExpressionNode(
+    relation: proto.Relation,
+    subqueryType: SubqueryType,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override def sql: String = subqueryType match {
+    case SubqueryType.SCALAR => s"($relation)"
+    case _ => s"$subqueryType ($relation)"
+  }
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 9fcc31e562682..b2c4fcf64e70f 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -135,7 +135,10 @@ final class DataStreamWriter[T] private[sql] (ds: Dataset[T]) extends api.DataSt
   /** @inheritdoc */
   @Evolving
   def foreachBatch(function: (Dataset[T], Long) => Unit): this.type = {
-    val serializedFn = SparkSerDeUtils.serialize(function)
+    // SPARK-50661: the client should send the encoder for the input dataset together with the
+    //  function to the server.
+    val serializedFn =
+      SparkSerDeUtils.serialize(ForeachWriterPacket(function, ds.agnosticEncoder))
     sinkBuilder.getForeachBatchBuilder.getScalaFunctionBuilder
       .setPayload(ByteString.copyFrom(serializedFn))
       .setOutputType(DataTypeProtoConverter.toConnectProtoType(NullType)) // Unused.
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index 0371981b728d1..c7979b8e033ea 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -33,6 +33,7 @@ import org.scalatest.PrivateMethodTester
 
 import org.apache.spark.{SparkArithmeticException, SparkException, SparkUpgradeException}
 import org.apache.spark.SparkBuildInfo.{spark_version => SPARK_VERSION}
+import org.apache.spark.internal.config.ConfigBuilder
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, TableAlreadyExistsException, TempTableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.StringEncoder
 import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
@@ -1006,8 +1007,12 @@ class ClientE2ETestSuite
   test("RuntimeConfig") {
     intercept[NoSuchElementException](spark.conf.get("foo.bar"))
     assert(spark.conf.getOption("foo.bar").isEmpty)
+    assert(spark.conf.get("foo.bar", "nope") == "nope")
+    assert(spark.conf.get("foo.bar", null) == null)
     spark.conf.set("foo.bar", value = true)
     assert(spark.conf.getOption("foo.bar") === Option("true"))
+    assert(spark.conf.get("foo.bar", "nope") === "true")
+    assert(spark.conf.get("foo.bar", null) === "true")
     spark.conf.set("foo.bar.numBaz", 100L)
     assert(spark.conf.get("foo.bar.numBaz") === "100")
     spark.conf.set("foo.bar.name", "donkey")
@@ -1020,6 +1025,24 @@ class ClientE2ETestSuite
     assert(spark.conf.isModifiable("spark.sql.ansi.enabled"))
     assert(!spark.conf.isModifiable("spark.sql.globalTempDatabase"))
     intercept[Exception](spark.conf.set("spark.sql.globalTempDatabase", "/dev/null"))
+
+    val entry = ConfigBuilder("my.simple.conf").intConf.createOptional
+    intercept[NoSuchElementException](spark.conf.get(entry.key))
+    assert(spark.conf.get(entry).isEmpty)
+    assert(spark.conf.get(entry, Option(55)) === Option(55))
+    spark.conf.set(entry, Option(33))
+    assert(spark.conf.get(entry.key) === "33")
+    assert(spark.conf.get(entry) === Option(33))
+    assert(spark.conf.get(entry, Option(55)) === Option(33))
+
+    val entryWithDefault = ConfigBuilder("my.important.conf").intConf.createWithDefault(10)
+    intercept[NoSuchElementException](spark.conf.get(entryWithDefault.key))
+    assert(spark.conf.get(entryWithDefault) === 10)
+    assert(spark.conf.get(entryWithDefault, 11) === 11)
+    spark.conf.set(entryWithDefault, 12)
+    assert(spark.conf.get(entryWithDefault.key) === "12")
+    assert(spark.conf.get(entryWithDefault) === 12)
+    assert(spark.conf.get(entryWithDefault, 11) === 12)
   }
 
   test("SparkVersion") {
@@ -1536,28 +1559,49 @@ class ClientE2ETestSuite
     val ob1Metrics = Map("ob1" -> new GenericRowWithSchema(Array(0, 49, 98), ob1Schema))
     val ob2Metrics = Map("ob2" -> new GenericRowWithSchema(Array(-1, 48, 97), ob2Schema))
 
+    val obMetrics = observedDf.collectResult().getObservedMetrics
     assert(df.collectResult().getObservedMetrics === Map.empty)
     assert(observedDf.collectResult().getObservedMetrics === ob1Metrics)
-    assert(observedObservedDf.collectResult().getObservedMetrics === ob1Metrics ++ ob2Metrics)
-  }
-
-  test("Observation.get is blocked until the query is finished") {
-    val df = spark.range(99).withColumn("extra", col("id") - 1)
-    val observation = new Observation("ob1")
-    val observedDf = df.observe(observation, min("id"), avg("id"), max("id"))
-
-    // Start a new thread to get the observation
-    val future = Future(observation.get)(ExecutionContext.global)
-    // make sure the thread is blocked right now
-    val e = intercept[java.util.concurrent.TimeoutException] {
-      SparkThreadUtils.awaitResult(future, 2.seconds)
+    assert(obMetrics.map(_._2.schema) === Seq(ob1Schema))
+
+    val obObMetrics = observedObservedDf.collectResult().getObservedMetrics
+    assert(obObMetrics === ob1Metrics ++ ob2Metrics)
+    assert(obObMetrics.map(_._2.schema).exists(_.equals(ob1Schema)))
+    assert(obObMetrics.map(_._2.schema).exists(_.equals(ob2Schema)))
+  }
+
+  for (collectFunc <- Seq(
+      ("collect", (df: DataFrame) => df.collect()),
+      ("collectAsList", (df: DataFrame) => df.collectAsList()),
+      ("collectResult", (df: DataFrame) => df.collectResult().length),
+      ("write", (df: DataFrame) => df.write.format("noop").mode("append").save())))
+    test(
+      "Observation.get is blocked until the query is finished, " +
+        s"collect using method ${collectFunc._1}") {
+      val df = spark.range(99).withColumn("extra", col("id") - 1)
+      val ob1 = new Observation("ob1")
+      val ob2 = new Observation("ob2")
+      val observedDf = df.observe(ob1, min("id"), avg("id"), max("id"))
+      val observedObservedDf = observedDf.observe(ob2, min("extra"), avg("extra"), max("extra"))
+      // Start new threads to get observations
+      val future1 = Future(ob1.get)(ExecutionContext.global)
+      val future2 = Future(ob2.get)(ExecutionContext.global)
+      // make sure the threads are blocked right now
+      val e1 = intercept[java.util.concurrent.TimeoutException] {
+        SparkThreadUtils.awaitResult(future1, 2.seconds)
+      }
+      assert(e1.getMessage.contains("timed out after"))
+      val e2 = intercept[java.util.concurrent.TimeoutException] {
+        SparkThreadUtils.awaitResult(future2, 2.seconds)
+      }
+      assert(e2.getMessage.contains("timed out after"))
+      collectFunc._2(observedObservedDf)
+      // make sure the threads are unblocked after the query is finished
+      val metrics1 = SparkThreadUtils.awaitResult(future1, 5.seconds)
+      assert(metrics1 === Map("min(id)" -> 0, "avg(id)" -> 49, "max(id)" -> 98))
+      val metrics2 = SparkThreadUtils.awaitResult(future2, 5.seconds)
+      assert(metrics2 === Map("min(extra)" -> -1, "avg(extra)" -> 48, "max(extra)" -> 97))
     }
-    assert(e.getMessage.contains("Future timed out"))
-    observedDf.collect()
-    // make sure the thread is unblocked after the query is finished
-    val metrics = SparkThreadUtils.awaitResult(future, 2.seconds)
-    assert(metrics === Map("min(id)" -> 0, "avg(id)" -> 49, "max(id)" -> 98))
-  }
 
   test("SPARK-48852: trim function on a string column returns correct results") {
     val session: SparkSession = spark
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala
new file mode 100644
index 0000000000000..1d2165b668f61
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala
@@ -0,0 +1,732 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.SparkRuntimeException
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.test.{QueryTest, RemoteSparkSession}
+
+class DataFrameSubquerySuite extends QueryTest with RemoteSparkSession {
+  import testImplicits._
+
+  val row = identity[(java.lang.Integer, java.lang.Double)](_)
+
+  lazy val l = Seq(
+    row((1, 2.0)),
+    row((1, 2.0)),
+    row((2, 1.0)),
+    row((2, 1.0)),
+    row((3, 3.0)),
+    row((null, null)),
+    row((null, 5.0)),
+    row((6, null))).toDF("a", "b")
+
+  lazy val r = Seq(
+    row((2, 3.0)),
+    row((2, 3.0)),
+    row((3, 2.0)),
+    row((4, 1.0)),
+    row((null, null)),
+    row((null, 5.0)),
+    row((6, null))).toDF("c", "d")
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    l.createOrReplaceTempView("l")
+    r.createOrReplaceTempView("r")
+  }
+
+  test("noop outer()") {
+    checkAnswer(spark.range(1).select($"id".outer()), Row(0))
+    checkError(
+      intercept[AnalysisException](spark.range(1).select($"outer_col".outer()).collect()),
+      "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map("objectName" -> "`outer_col`", "proposal" -> "`id`"))
+  }
+
+  test("simple uncorrelated scalar subquery") {
+    checkAnswer(
+      spark.range(1).select(spark.range(1).select(lit(1)).scalar().as("b")),
+      sql("select (select 1 as b) as b"))
+
+    checkAnswer(
+      spark
+        .range(1)
+        .select(
+          spark.range(1).select(spark.range(1).select(lit(1)).scalar() + 1).scalar() + lit(1)),
+      sql("select (select (select 1) + 1) + 1"))
+
+    // string type
+    checkAnswer(
+      spark.range(1).select(spark.range(1).select(lit("s")).scalar().as("b")),
+      sql("select (select 's' as s) as b"))
+  }
+
+  test("uncorrelated scalar subquery should return null if there is 0 rows") {
+    checkAnswer(
+      spark.range(1).select(spark.range(1).select(lit("s")).limit(0).scalar().as("b")),
+      sql("select (select 's' as s limit 0) as b"))
+  }
+
+  test("uncorrelated scalar subquery on a DataFrame generated query") {
+    withTempView("subqueryData") {
+      val df = Seq((1, "one"), (2, "two"), (3, "three")).toDF("key", "value")
+      df.createOrReplaceTempView("subqueryData")
+
+      checkAnswer(
+        spark
+          .range(1)
+          .select(
+            spark
+              .table("subqueryData")
+              .select($"key")
+              .where($"key" > 2)
+              .orderBy($"key")
+              .limit(1)
+              .scalar() + lit(1)),
+        sql("select (select key from subqueryData where key > 2 order by key limit 1) + 1"))
+
+      checkAnswer(
+        spark.range(1).select(-spark.table("subqueryData").select(max($"key")).scalar()),
+        sql("select -(select max(key) from subqueryData)"))
+
+      checkAnswer(
+        spark.range(1).select(spark.table("subqueryData").select($"value").limit(0).scalar()),
+        sql("select (select value from subqueryData limit 0)"))
+
+      checkAnswer(
+        spark
+          .range(1)
+          .select(
+            spark
+              .table("subqueryData")
+              .where($"key" === spark.table("subqueryData").select(max($"key")).scalar() - lit(1))
+              .select(min($"value"))
+              .scalar()),
+        sql(
+          "select (select min(value) from subqueryData" +
+            " where key = (select max(key) from subqueryData) - 1)"))
+    }
+  }
+
+  test("correlated scalar subquery in SELECT with outer() function") {
+    val df1 = spark.table("l").as("t1")
+    val df2 = spark.table("l").as("t2")
+    // We can use the `.outer()` function to wrap either the outer column, or the entire condition,
+    // or the SQL string of the condition.
+    Seq($"t1.a" === $"t2.a".outer(), ($"t1.a" === $"t2.a").outer(), expr("t1.a = t2.a").outer())
+      .foreach { cond =>
+        checkAnswer(
+          df1.select($"a", df2.where(cond).select(sum($"b")).scalar().as("sum_b")),
+          sql("select a, (select sum(b) from l t1 where t1.a = t2.a) sum_b from l t2"))
+      }
+  }
+
+  test("correlated scalar subquery in WHERE with outer() function") {
+    // We can use the `.outer()` function to wrap either the outer column, or the entire condition,
+    // or the SQL string of the condition.
+    Seq($"a".outer() === $"c", ($"a" === $"c").outer(), expr("a = c").outer()).foreach { cond =>
+      checkAnswer(
+        spark.table("l").where($"b" < spark.table("r").where(cond).select(max($"d")).scalar()),
+        sql("select * from l where b < (select max(d) from r where a = c)"))
+    }
+  }
+
+  test("EXISTS predicate subquery with outer() function") {
+    // We can use the `.outer()` function to wrap either the outer column, or the entire condition,
+    // or the SQL string of the condition.
+    Seq($"a".outer() === $"c", ($"a" === $"c").outer(), expr("a = c").outer()).foreach { cond =>
+      checkAnswer(
+        spark.table("l").where(spark.table("r").where(cond).exists()),
+        sql("select * from l where exists (select * from r where l.a = r.c)"))
+
+      checkAnswer(
+        spark.table("l").where(spark.table("r").where(cond).exists() && $"a" <= lit(2)),
+        sql("select * from l where exists (select * from r where l.a = r.c) and l.a <= 2"))
+    }
+  }
+
+  test("SPARK-15677: Queries against local relations with scalar subquery in Select list") {
+    withTempView("t1", "t2") {
+      Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t1")
+      Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t2")
+
+      checkAnswer(
+        spark.table("t1").select(spark.range(1).select(lit(1).as("col")).scalar()),
+        sql("SELECT (select 1 as col) from t1"))
+
+      checkAnswer(
+        spark.table("t1").select(spark.table("t2").select(max($"c1")).scalar()),
+        sql("SELECT (select max(c1) from t2) from t1"))
+
+      checkAnswer(
+        spark.table("t1").select(lit(1) + spark.range(1).select(lit(1).as("col")).scalar()),
+        sql("SELECT 1 + (select 1 as col) from t1"))
+
+      checkAnswer(
+        spark.table("t1").select($"c1", spark.table("t2").select(max($"c1")).scalar() + $"c2"),
+        sql("SELECT c1, (select max(c1) from t2) + c2 from t1"))
+
+      checkAnswer(
+        spark
+          .table("t1")
+          .select(
+            $"c1",
+            spark.table("t2").where($"t1.c2".outer() === $"t2.c2").select(max($"c1")).scalar()),
+        sql("SELECT c1, (select max(c1) from t2 where t1.c2 = t2.c2) from t1"))
+    }
+  }
+
+  test("NOT EXISTS predicate subquery") {
+    checkAnswer(
+      spark.table("l").where(!spark.table("r").where($"a".outer() === $"c").exists()),
+      sql("select * from l where not exists (select * from r where l.a = r.c)"))
+
+    checkAnswer(
+      spark
+        .table("l")
+        .where(!spark.table("r").where($"a".outer() === $"c" && $"b".outer() < $"d").exists()),
+      sql("select * from l where not exists (select * from r where l.a = r.c and l.b < r.d)"))
+  }
+
+  test("EXISTS predicate subquery within OR") {
+    checkAnswer(
+      spark
+        .table("l")
+        .where(spark.table("r").where($"a".outer() === $"c").exists() ||
+          spark.table("r").where($"a".outer() === $"c").exists()),
+      sql(
+        "select * from l where exists (select * from r where l.a = r.c)" +
+          " or exists (select * from r where l.a = r.c)"))
+
+    checkAnswer(
+      spark
+        .table("l")
+        .where(!spark.table("r").where($"a".outer() === $"c" && $"b".outer() < $"d").exists() ||
+          !spark.table("r").where($"a".outer() === $"c").exists()),
+      sql(
+        "select * from l where not exists (select * from r where l.a = r.c and l.b < r.d)" +
+          " or not exists (select * from r where l.a = r.c)"))
+  }
+
+  test("correlated scalar subquery in select (null safe equal)") {
+    val df1 = spark.table("l").as("t1")
+    val df2 = spark.table("l").as("t2")
+    checkAnswer(
+      df1.select(
+        $"a",
+        df2.where($"t2.a" <=> $"t1.a".outer()).select(sum($"b")).scalar().as("sum_b")),
+      sql("select a, (select sum(b) from l t2 where t2.a <=> t1.a) sum_b from l t1"))
+  }
+
+  test("correlated scalar subquery in aggregate") {
+    checkAnswer(
+      spark
+        .table("l")
+        .groupBy(
+          $"a",
+          spark.table("r").where($"a".outer() === $"c").select(sum($"d")).scalar().as("sum_d"))
+        .agg(Map.empty[String, String]),
+      sql("select a, (select sum(d) from r where a = c) sum_d from l l1 group by 1, 2"))
+  }
+
+  test("SPARK-34269: correlated subquery with view in aggregate's grouping expression") {
+    withTable("tr") {
+      withView("vr") {
+        r.write.saveAsTable("tr")
+        sql("create view vr as select * from tr")
+        checkAnswer(
+          spark
+            .table("l")
+            .groupBy(
+              $"a",
+              spark
+                .table("vr")
+                .where($"a".outer() === $"c")
+                .select(sum($"d"))
+                .scalar()
+                .as("sum_d"))
+            .agg(Map.empty[String, String]),
+          sql("select a, (select sum(d) from vr where a = c) sum_d from l l1 group by 1, 2"))
+      }
+    }
+  }
+
+  test("non-aggregated correlated scalar subquery") {
+    val df1 = spark.table("l").as("t1")
+    val df2 = spark.table("l").as("t2")
+    val exception1 = intercept[SparkRuntimeException] {
+      df1
+        .select($"a", df2.where($"t1.a" === $"t2.a".outer()).select($"b").scalar().as("sum_b"))
+        .collect()
+    }
+    checkError(exception1, condition = "SCALAR_SUBQUERY_TOO_MANY_ROWS")
+  }
+
+  test("non-equal correlated scalar subquery") {
+    val df1 = spark.table("l").as("t1")
+    val df2 = spark.table("l").as("t2")
+    checkAnswer(
+      df1.select(
+        $"a",
+        df2.where($"t2.a" < $"t1.a".outer()).select(sum($"b")).scalar().as("sum_b")),
+      sql("select a, (select sum(b) from l t2 where t2.a < t1.a) sum_b from l t1"))
+  }
+
+  test("disjunctive correlated scalar subquery") {
+    checkAnswer(
+      spark
+        .table("l")
+        .where(
+          spark
+            .table("r")
+            .where(($"a".outer() === $"c" && $"d" === 2.0) ||
+              ($"a".outer() === $"c" && $"d" === 1.0))
+            .select(count(lit(1)))
+            .scalar() > 0)
+        .select($"a"),
+      sql("""
+            |select a
+            |from   l
+            |where  (select count(*)
+            |        from   r
+            |        where (a = c and d = 2.0) or (a = c and d = 1.0)) > 0
+        """.stripMargin))
+  }
+
+  test("correlated scalar subquery with missing outer reference") {
+    checkAnswer(
+      spark
+        .table("l")
+        .select($"a", spark.table("r").where($"c" === $"a").select(sum($"d")).scalar()),
+      sql("select a, (select sum(d) from r where c = a) from l"))
+  }
+
+  private def table1() = {
+    sql("CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)")
+    spark.table("t1")
+  }
+
+  private def table2() = {
+    sql("CREATE VIEW t2(c1, c2) AS VALUES (0, 2), (0, 3)")
+    spark.table("t2")
+  }
+
+  private def table3() = {
+    sql(
+      "CREATE VIEW t3(c1, c2) AS " +
+        "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))")
+    spark.table("t3")
+  }
+
+  test("lateral join with single column select") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.lateralJoin(spark.range(1).select($"c1".outer())).toDF("c1", "c2", "c3"),
+        sql("SELECT * FROM t1, LATERAL (SELECT c1)").toDF("c1", "c2", "c3"))
+      checkAnswer(
+        t1.lateralJoin(t2.select($"c1")).toDF("c1", "c2", "c3"),
+        sql("SELECT * FROM t1, LATERAL (SELECT c1 FROM t2)").toDF("c1", "c2", "c3"))
+      checkAnswer(
+        t1.lateralJoin(t2.select($"t1.c1".outer())).toDF("c1", "c2", "c3"),
+        sql("SELECT * FROM t1, LATERAL (SELECT t1.c1 FROM t2)").toDF("c1", "c2", "c3"))
+      checkAnswer(
+        t1.lateralJoin(t2.select($"t1.c1".outer() + $"t2.c1")).toDF("c1", "c2", "c3"),
+        sql("SELECT * FROM t1, LATERAL (SELECT t1.c1 + t2.c1 FROM t2)").toDF("c1", "c2", "c3"))
+    }
+  }
+
+  test("lateral join with star expansion") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.lateralJoin(spark.range(1).select().select($"*")),
+        sql("SELECT * FROM t1, LATERAL (SELECT *)"))
+      checkAnswer(
+        t1.lateralJoin(t2.select($"*")).toDF("c1", "c2", "c3", "c4"),
+        sql("SELECT * FROM t1, LATERAL (SELECT * FROM t2)").toDF("c1", "c2", "c3", "c4"))
+      checkAnswer(
+        t1.lateralJoin(t2.select($"t1.*".outer(), $"t2.*"))
+          .toDF("c1", "c2", "c3", "c4", "c5", "c6"),
+        sql("SELECT * FROM t1, LATERAL (SELECT t1.*, t2.* FROM t2)")
+          .toDF("c1", "c2", "c3", "c4", "c5", "c6"))
+      checkAnswer(
+        t1.lateralJoin(t2.alias("t1").select($"t1.*")).toDF("c1", "c2", "c3", "c4"),
+        sql("SELECT * FROM t1, LATERAL (SELECT t1.* FROM t2 AS t1)").toDF("c1", "c2", "c3", "c4"))
+    }
+  }
+
+  test("lateral join with different join types") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(
+        t1.lateralJoin(
+          spark.range(1).select(($"c1".outer() + $"c2".outer()).as("c3")),
+          $"c2" === $"c3"),
+        sql("SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3"))
+      checkAnswer(
+        t1.lateralJoin(
+          spark.range(1).select(($"c1".outer() + $"c2".outer()).as("c3")),
+          $"c2" === $"c3",
+          "left"),
+        sql("SELECT * FROM t1 LEFT JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3"))
+      checkAnswer(
+        t1.lateralJoin(spark.range(1).select(($"c1".outer() + $"c2".outer()).as("c3")), "cross"),
+        sql("SELECT * FROM t1 CROSS JOIN LATERAL (SELECT c1 + c2 AS c3)"))
+    }
+  }
+
+  test("lateral join with subquery alias") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(
+        t1.lateralJoin(spark.range(1).select($"c1".outer(), $"c2".outer()).toDF("a", "b").as("s"))
+          .select("a", "b"),
+        sql("SELECT a, b FROM t1, LATERAL (SELECT c1, c2) s(a, b)"))
+    }
+  }
+
+  test("lateral join with correlated equality / non-equality predicates") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.lateralJoin(t2.where($"t1.c1".outer() === $"t2.c1").select($"c2"))
+          .toDF("c1", "c2", "c3"),
+        sql("SELECT * FROM t1, LATERAL (SELECT c2 FROM t2 WHERE t1.c1 = t2.c1)")
+          .toDF("c1", "c2", "c3"))
+      checkAnswer(
+        t1.lateralJoin(t2.where($"t1.c1".outer() < $"t2.c1").select($"c2"))
+          .toDF("c1", "c2", "c3"),
+        sql("SELECT * FROM t1, LATERAL (SELECT c2 FROM t2 WHERE t1.c1 < t2.c1)")
+          .toDF("c1", "c2", "c3"))
+    }
+  }
+
+  test("lateral join with aggregation and correlated non-equality predicates") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.lateralJoin(t2.where($"t1.c2".outer() < $"t2.c2").select(max($"c2").as("m"))),
+        sql("SELECT * FROM t1, LATERAL (SELECT max(c2) AS m FROM t2 WHERE t1.c2 < t2.c2)"))
+    }
+  }
+
+  test("lateral join can reference preceding FROM clause items") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.join(t2)
+          .lateralJoin(spark.range(1).select($"t1.c2".outer() + $"t2.c2".outer()))
+          .toDF("c1", "c2", "c3", "c4", "c5"),
+        sql("SELECT * FROM t1 JOIN t2 JOIN LATERAL (SELECT t1.c2 + t2.c2)")
+          .toDF("c1", "c2", "c3", "c4", "c5"))
+    }
+  }
+
+  test("multiple lateral joins") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(
+        t1.lateralJoin(spark.range(1).select(($"c1".outer() + $"c2".outer()).as("a")))
+          .lateralJoin(spark.range(1).select(($"c1".outer() - $"c2".outer()).as("b")))
+          .lateralJoin(spark.range(1).select(($"a".outer() * $"b".outer()).as("c"))),
+        sql("""
+            |SELECT * FROM t1,
+            |LATERAL (SELECT c1 + c2 AS a),
+            |LATERAL (SELECT c1 - c2 AS b),
+            |LATERAL (SELECT a * b AS c)
+            |""".stripMargin))
+    }
+  }
+
+  test("lateral join in between regular joins") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.lateralJoin(t2.where($"t1.c1".outer() === $"t2.c1").select($"c2").as("s"), "left")
+          .join(t1.as("t3"), $"s.c2" === $"t3.c2", "left")
+          .toDF("c1", "c2", "c3", "c4", "c5"),
+        sql("""
+            |SELECT * FROM t1
+            |LEFT OUTER JOIN LATERAL (SELECT c2 FROM t2 WHERE t1.c1 = t2.c1) s
+            |LEFT OUTER JOIN t1 t3 ON s.c2 = t3.c2
+            |""".stripMargin)
+          .toDF("c1", "c2", "c3", "c4", "c5"))
+    }
+  }
+
+  test("nested lateral joins") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.lateralJoin(t2.lateralJoin(spark.range(1).select($"c1".outer())))
+          .toDF("c1", "c2", "c3", "c4", "c5"),
+        sql("SELECT * FROM t1, LATERAL (SELECT * FROM t2, LATERAL (SELECT c1))")
+          .toDF("c1", "c2", "c3", "c4", "c5"))
+      checkAnswer(
+        t1.lateralJoin(
+          spark
+            .range(1)
+            .select(($"c1".outer() + lit(1)).as("c1"))
+            .lateralJoin(spark.range(1).select($"c1".outer())))
+          .toDF("c1", "c2", "c3", "c4"),
+        sql(
+          "SELECT * FROM t1, LATERAL (SELECT * FROM (SELECT c1 + 1 AS c1), LATERAL (SELECT c1))")
+          .toDF("c1", "c2", "c3", "c4"))
+    }
+  }
+
+  test("scalar subquery inside lateral join") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      // uncorrelated
+      checkAnswer(
+        t1.lateralJoin(spark.range(1).select($"c2".outer(), t2.select(min($"c2")).scalar()))
+          .toDF("c1", "c2", "c3", "c4"),
+        sql("SELECT * FROM t1, LATERAL (SELECT c2, (SELECT MIN(c2) FROM t2))")
+          .toDF("c1", "c2", "c3", "c4"))
+
+      // correlated
+      checkAnswer(
+        t1.lateralJoin(
+          spark
+            .range(1)
+            .select($"c1".outer().as("a"))
+            .select(t2.where($"c1" === $"a".outer()).select(sum($"c2")).scalar())),
+        sql("""
+              |SELECT * FROM t1, LATERAL (
+              |    SELECT (SELECT SUM(c2) FROM t2 WHERE c1 = a) FROM (SELECT c1 AS a)
+              |)
+              |""".stripMargin))
+    }
+  }
+
+  test("lateral join inside subquery") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      // uncorrelated
+      checkAnswer(
+        t1.where(
+          $"c1" === t2
+            .lateralJoin(spark.range(1).select($"c1".outer().as("a")))
+            .select(min($"a"))
+            .scalar()),
+        sql("SELECT * FROM t1 WHERE c1 = (SELECT MIN(a) FROM t2, LATERAL (SELECT c1 AS a))"))
+      // correlated
+      checkAnswer(
+        t1.where(
+          $"c1" === t2
+            .lateralJoin(spark.range(1).select($"c1".outer().as("a")))
+            .where($"c1" === $"t1.c1".outer())
+            .select(min($"a"))
+            .scalar()),
+        sql(
+          "SELECT * FROM t1 " +
+            "WHERE c1 = (SELECT MIN(a) FROM t2, LATERAL (SELECT c1 AS a) WHERE c1 = t1.c1)"))
+    }
+  }
+
+  test("lateral join with table-valued functions") {
+    withView("t1", "t3") {
+      val t1 = table1()
+      val t3 = table3()
+
+      checkAnswer(t1.lateralJoin(spark.tvf.range(3)), sql("SELECT * FROM t1, LATERAL RANGE(3)"))
+      checkAnswer(
+        t1.lateralJoin(spark.tvf.explode(array($"c1".outer(), $"c2".outer()))),
+        sql("SELECT * FROM t1, LATERAL EXPLODE(ARRAY(c1, c2)) t2(c3)"))
+      checkAnswer(
+        t3.lateralJoin(spark.tvf.explode_outer($"c2".outer())),
+        sql("SELECT * FROM t3, LATERAL EXPLODE_OUTER(c2) t2(v)"))
+      checkAnswer(
+        spark.tvf
+          .explode(array(lit(1), lit(2)))
+          .toDF("v")
+          .lateralJoin(spark.range(1).select($"v".outer() + 1)),
+        sql("SELECT * FROM EXPLODE(ARRAY(1, 2)) t(v), LATERAL (SELECT v + 1)"))
+    }
+  }
+
+  test("lateral join with table-valued functions and join conditions") {
+    withView("t1", "t3") {
+      val t1 = table1()
+      val t3 = table3()
+
+      checkAnswer(
+        t1.lateralJoin(spark.tvf.explode(array($"c1".outer(), $"c2".outer())), $"c1" === $"col"),
+        sql("SELECT * FROM t1 JOIN LATERAL EXPLODE(ARRAY(c1, c2)) t(c3) ON t1.c1 = c3"))
+      checkAnswer(
+        t3.lateralJoin(spark.tvf.explode($"c2".outer()), $"c1" === $"col"),
+        sql("SELECT * FROM t3 JOIN LATERAL EXPLODE(c2) t(c3) ON t3.c1 = c3"))
+      checkAnswer(
+        t3.lateralJoin(spark.tvf.explode($"c2".outer()), $"c1" === $"col", "left"),
+        sql("SELECT * FROM t3 LEFT JOIN LATERAL EXPLODE(c2) t(c3) ON t3.c1 = c3"))
+    }
+  }
+
+  test("subquery with generator / table-valued functions") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(
+        spark.range(1).select(explode(t1.select(collect_list("c2")).scalar())),
+        sql("SELECT EXPLODE((SELECT COLLECT_LIST(c2) FROM t1))"))
+      checkAnswer(
+        spark.tvf.explode(t1.select(collect_list("c2")).scalar()),
+        sql("SELECT * FROM EXPLODE((SELECT COLLECT_LIST(c2) FROM t1))"))
+    }
+  }
+
+  test("subquery in join condition") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.join(t2, $"t1.c1" === t1.select(max("c1")).scalar()).toDF("c1", "c2", "c3", "c4"),
+        sql("SELECT * FROM t1 JOIN t2 ON t1.c1 = (SELECT MAX(c1) FROM t1)")
+          .toDF("c1", "c2", "c3", "c4"))
+    }
+  }
+
+  test("subquery in unpivot") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkError(
+        intercept[AnalysisException] {
+          t1.unpivot(Array(t2.exists()), "c1", "c2").collect()
+        },
+        "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_IN_EXISTS_SUBQUERY",
+        parameters = Map("treeNode" -> "(?s)'Unpivot.*"),
+        matchPVals = true)
+      checkError(
+        intercept[AnalysisException] {
+          t1.unpivot(Array($"c1"), Array(t2.exists()), "c1", "c2").collect()
+        },
+        "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_IN_EXISTS_SUBQUERY",
+        parameters = Map("treeNode" -> "(?s)Expand.*"),
+        matchPVals = true)
+    }
+  }
+
+  test("subquery in transpose") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkError(
+        intercept[AnalysisException] {
+          t1.transpose(t1.select(max("c1")).scalar()).collect()
+        },
+        "TRANSPOSE_INVALID_INDEX_COLUMN",
+        parameters = Map("reason" -> "Index column must be an atomic attribute"))
+    }
+  }
+
+  test("subquery in withColumns") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(
+        t1.withColumn(
+          "scalar",
+          spark
+            .range(1)
+            .select($"c1".outer() + $"c2".outer())
+            .scalar()),
+        t1.select($"*", ($"c1" + $"c2").as("scalar")))
+
+      checkAnswer(
+        t1.withColumn(
+          "scalar",
+          spark
+            .range(1)
+            .withColumn("c1", $"c1".outer())
+            .select($"c1" + $"c2".outer())
+            .scalar()),
+        t1.select($"*", ($"c1" + $"c2").as("scalar")))
+
+      checkAnswer(
+        t1.withColumn(
+          "scalar",
+          spark
+            .range(1)
+            .select($"c1".outer().as("c1"))
+            .withColumn("c2", $"c2".outer())
+            .select($"c1" + $"c2")
+            .scalar()),
+        t1.select($"*", ($"c1" + $"c2").as("scalar")))
+    }
+  }
+
+  test("subquery in withColumnsRenamed") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(
+        t1.withColumn(
+          "scalar",
+          spark
+            .range(1)
+            .select($"c1".outer().as("c1"), $"c2".outer().as("c2"))
+            .withColumnsRenamed(Map("c1" -> "x", "c2" -> "y"))
+            .select($"x" + $"y")
+            .scalar()),
+        t1.select($"*", ($"c1".as("x") + $"c2".as("y")).as("scalar")))
+    }
+  }
+
+  test("subquery in drop") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(t1.drop(spark.range(1).select(lit("c1")).scalar()), t1)
+    }
+  }
+
+  test("subquery in repartition") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(t1.repartition(spark.range(1).select(lit(1)).scalar()), t1)
+    }
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala
index 4c0357a3ed984..12a49ad21676e 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.{QueryTest, RemoteSparkSession}
 
 class DataFrameTableValuedFunctionsSuite extends QueryTest with RemoteSparkSession {
+  import testImplicits._
 
   test("explode") {
     val actual1 = spark.tvf.explode(array(lit(1), lit(2)))
@@ -50,6 +51,31 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with RemoteSparkSessi
     checkAnswer(actual6, expected6)
   }
 
+  test("explode - lateral join") {
+    withView("t1", "t3") {
+      sql("CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)")
+      sql(
+        "CREATE VIEW t3(c1, c2) AS " +
+          "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))")
+      val t1 = spark.table("t1")
+      val t3 = spark.table("t3")
+
+      checkAnswer(
+        t1.lateralJoin(
+          spark.tvf.explode(array($"c1".outer(), $"c2".outer())).toDF("c3").as("t2")),
+        sql("SELECT * FROM t1, LATERAL EXPLODE(ARRAY(c1, c2)) t2(c3)"))
+      checkAnswer(
+        t3.lateralJoin(spark.tvf.explode($"c2".outer()).toDF("v").as("t2")),
+        sql("SELECT * FROM t3, LATERAL EXPLODE(c2) t2(v)"))
+      checkAnswer(
+        spark.tvf
+          .explode(array(lit(1), lit(2)))
+          .toDF("v")
+          .lateralJoin(spark.range(1).select($"v".outer() + lit(1))),
+        sql("SELECT * FROM EXPLODE(ARRAY(1, 2)) t(v), LATERAL (SELECT v + 1)"))
+    }
+  }
+
   test("explode_outer") {
     val actual1 = spark.tvf.explode_outer(array(lit(1), lit(2)))
     val expected1 = spark.sql("SELECT * FROM explode_outer(array(1, 2))")
@@ -78,6 +104,31 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with RemoteSparkSessi
     checkAnswer(actual6, expected6)
   }
 
+  test("explode_outer - lateral join") {
+    withView("t1", "t3") {
+      sql("CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)")
+      sql(
+        "CREATE VIEW t3(c1, c2) AS " +
+          "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))")
+      val t1 = spark.table("t1")
+      val t3 = spark.table("t3")
+
+      checkAnswer(
+        t1.lateralJoin(
+          spark.tvf.explode_outer(array($"c1".outer(), $"c2".outer())).toDF("c3").as("t2")),
+        sql("SELECT * FROM t1, LATERAL EXPLODE_OUTER(ARRAY(c1, c2)) t2(c3)"))
+      checkAnswer(
+        t3.lateralJoin(spark.tvf.explode_outer($"c2".outer()).toDF("v").as("t2")),
+        sql("SELECT * FROM t3, LATERAL EXPLODE_OUTER(c2) t2(v)"))
+      checkAnswer(
+        spark.tvf
+          .explode_outer(array(lit(1), lit(2)))
+          .toDF("v")
+          .lateralJoin(spark.range(1).select($"v".outer() + lit(1))),
+        sql("SELECT * FROM EXPLODE_OUTER(ARRAY(1, 2)) t(v), LATERAL (SELECT v + 1)"))
+    }
+  }
+
   test("inline") {
     val actual1 = spark.tvf.inline(array(struct(lit(1), lit("a")), struct(lit(2), lit("b"))))
     val expected1 = spark.sql("SELECT * FROM inline(array(struct(1, 'a'), struct(2, 'b')))")
@@ -98,6 +149,28 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with RemoteSparkSessi
     checkAnswer(actual3, expected3)
   }
 
+  test("inline - lateral join") {
+    withView("array_struct") {
+      sql("""
+          |CREATE VIEW array_struct(id, arr) AS VALUES
+          |    (1, ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b'))),
+          |    (2, ARRAY()),
+          |    (3, ARRAY(STRUCT(3, 'c')))
+          |""".stripMargin)
+      val arrayStruct = spark.table("array_struct")
+
+      checkAnswer(
+        arrayStruct.lateralJoin(spark.tvf.inline($"arr".outer())),
+        sql("SELECT * FROM array_struct JOIN LATERAL INLINE(arr)"))
+      checkAnswer(
+        arrayStruct.lateralJoin(
+          spark.tvf.inline($"arr".outer()).toDF("k", "v").as("t"),
+          $"id" === $"k",
+          "left"),
+        sql("SELECT * FROM array_struct LEFT JOIN LATERAL INLINE(arr) t(k, v) ON id = k"))
+    }
+  }
+
   test("inline_outer") {
     val actual1 =
       spark.tvf.inline_outer(array(struct(lit(1), lit("a")), struct(lit(2), lit("b"))))
@@ -119,6 +192,28 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with RemoteSparkSessi
     checkAnswer(actual3, expected3)
   }
 
+  test("inline_outer - lateral join") {
+    withView("array_struct") {
+      sql("""
+          |CREATE VIEW array_struct(id, arr) AS VALUES
+          |    (1, ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b'))),
+          |    (2, ARRAY()),
+          |    (3, ARRAY(STRUCT(3, 'c')))
+          |""".stripMargin)
+      val arrayStruct = spark.table("array_struct")
+
+      checkAnswer(
+        arrayStruct.lateralJoin(spark.tvf.inline_outer($"arr".outer())),
+        sql("SELECT * FROM array_struct JOIN LATERAL INLINE_OUTER(arr)"))
+      checkAnswer(
+        arrayStruct.lateralJoin(
+          spark.tvf.inline_outer($"arr".outer()).toDF("k", "v").as("t"),
+          $"id" === $"k",
+          "left"),
+        sql("SELECT * FROM array_struct LEFT JOIN LATERAL INLINE_OUTER(arr) t(k, v) ON id = k"))
+    }
+  }
+
   test("json_tuple") {
     val actual = spark.tvf.json_tuple(lit("""{"a":1,"b":2}"""), lit("a"), lit("b"))
     val expected = spark.sql("""SELECT * FROM json_tuple('{"a":1,"b":2}', 'a', 'b')""")
@@ -131,6 +226,51 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with RemoteSparkSessi
     assert(ex.messageParameters("functionName") == "`json_tuple`")
   }
 
+  test("json_tuple - lateral join") {
+    withView("json_table") {
+      sql("""
+          |CREATE OR REPLACE TEMP VIEW json_table(key, jstring) AS VALUES
+          |    ('1', '{"f1": "1", "f2": "2", "f3": 3, "f5": 5.23}'),
+          |    ('2', '{"f1": "1", "f3": "3", "f2": 2, "f4": 4.01}'),
+          |    ('3', '{"f1": 3, "f4": "4", "f3": "3", "f2": 2, "f5": 5.01}'),
+          |    ('4', cast(null as string)),
+          |    ('5', '{"f1": null, "f5": ""}'),
+          |    ('6', '[invalid JSON string]')
+          |""".stripMargin)
+      val jsonTable = spark.table("json_table")
+
+      checkAnswer(
+        jsonTable
+          .as("t1")
+          .lateralJoin(
+            spark.tvf
+              .json_tuple(
+                $"t1.jstring".outer(),
+                lit("f1"),
+                lit("f2"),
+                lit("f3"),
+                lit("f4"),
+                lit("f5"))
+              .as("t2"))
+          .select($"t1.key", $"t2.*"),
+        sql(
+          "SELECT t1.key, t2.* FROM json_table t1, " +
+            "LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2"))
+      checkAnswer(
+        jsonTable
+          .as("t1")
+          .lateralJoin(spark.tvf
+            .json_tuple($"jstring".outer(), lit("f1"), lit("f2"), lit("f3"), lit("f4"), lit("f5"))
+            .as("t2"))
+          .where($"t2.c0".isNotNull)
+          .select($"t1.key", $"t2.*"),
+        sql(
+          "SELECT t1.key, t2.* FROM json_table t1, " +
+            "LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2 " +
+            "WHERE t2.c0 IS NOT NULL"))
+    }
+  }
+
   test("posexplode") {
     val actual1 = spark.tvf.posexplode(array(lit(1), lit(2)))
     val expected1 = spark.sql("SELECT * FROM posexplode(array(1, 2))")
@@ -159,6 +299,30 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with RemoteSparkSessi
     checkAnswer(actual6, expected6)
   }
 
+  test("posexplode - lateral join") {
+    withView("t1", "t3") {
+      sql("CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)")
+      sql(
+        "CREATE VIEW t3(c1, c2) AS " +
+          "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))")
+      val t1 = spark.table("t1")
+      val t3 = spark.table("t3")
+
+      checkAnswer(
+        t1.lateralJoin(spark.tvf.posexplode(array($"c1".outer(), $"c2".outer()))),
+        sql("SELECT * FROM t1, LATERAL POSEXPLODE(ARRAY(c1, c2))"))
+      checkAnswer(
+        t3.lateralJoin(spark.tvf.posexplode($"c2".outer())),
+        sql("SELECT * FROM t3, LATERAL POSEXPLODE(c2)"))
+      checkAnswer(
+        spark.tvf
+          .posexplode(array(lit(1), lit(2)))
+          .toDF("p", "v")
+          .lateralJoin(spark.range(1).select($"v".outer() + lit(1))),
+        sql("SELECT * FROM POSEXPLODE(ARRAY(1, 2)) t(p, v), LATERAL (SELECT v + 1)"))
+    }
+  }
+
   test("posexplode_outer") {
     val actual1 = spark.tvf.posexplode_outer(array(lit(1), lit(2)))
     val expected1 = spark.sql("SELECT * FROM posexplode_outer(array(1, 2))")
@@ -187,12 +351,63 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with RemoteSparkSessi
     checkAnswer(actual6, expected6)
   }
 
+  test("posexplode_outer - lateral join") {
+    withView("t1", "t3") {
+      sql("CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)")
+      sql(
+        "CREATE VIEW t3(c1, c2) AS " +
+          "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))")
+      val t1 = spark.table("t1")
+      val t3 = spark.table("t3")
+
+      checkAnswer(
+        t1.lateralJoin(spark.tvf.posexplode_outer(array($"c1".outer(), $"c2".outer()))),
+        sql("SELECT * FROM t1, LATERAL POSEXPLODE_OUTER(ARRAY(c1, c2))"))
+      checkAnswer(
+        t3.lateralJoin(spark.tvf.posexplode_outer($"c2".outer())),
+        sql("SELECT * FROM t3, LATERAL POSEXPLODE_OUTER(c2)"))
+      checkAnswer(
+        spark.tvf
+          .posexplode_outer(array(lit(1), lit(2)))
+          .toDF("p", "v")
+          .lateralJoin(spark.range(1).select($"v".outer() + lit(1))),
+        sql("SELECT * FROM POSEXPLODE_OUTER(ARRAY(1, 2)) t(p, v), LATERAL (SELECT v + 1)"))
+    }
+  }
+
   test("stack") {
     val actual = spark.tvf.stack(lit(2), lit(1), lit(2), lit(3))
     val expected = spark.sql("SELECT * FROM stack(2, 1, 2, 3)")
     checkAnswer(actual, expected)
   }
 
+  test("stack - lateral join") {
+    withView("t1", "t3") {
+      sql("CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)")
+      sql(
+        "CREATE VIEW t3(c1, c2) AS " +
+          "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))")
+      val t1 = spark.table("t1")
+      val t3 = spark.table("t3")
+
+      checkAnswer(
+        t1.lateralJoin(
+          spark.tvf.stack(lit(2), lit("Key"), $"c1".outer(), lit("Value"), $"c2".outer()).as("t"))
+          .select($"t.*"),
+        sql("SELECT t.* FROM t1, LATERAL stack(2, 'Key', c1, 'Value', c2) t"))
+      checkAnswer(
+        t1.lateralJoin(
+          spark.tvf.stack(lit(1), $"c1".outer(), $"c2".outer()).toDF("x", "y").as("t"))
+          .select($"t.*"),
+        sql("SELECT t.* FROM t1 JOIN LATERAL stack(1, c1, c2) t(x, y)"))
+      checkAnswer(
+        t1.join(t3, $"t1.c1" === $"t3.c1")
+          .lateralJoin(spark.tvf.stack(lit(1), $"t1.c2".outer(), $"t3.c2".outer()).as("t"))
+          .select($"t.*"),
+        sql("SELECT t.* FROM t1 JOIN t3 ON t1.c1 = t3.c1 JOIN LATERAL stack(1, t1.c2, t3.c2) t"))
+    }
+  }
+
   test("collations") {
     val actual = spark.tvf.collations()
     val expected = spark.sql("SELECT * FROM collations()")
@@ -205,8 +420,7 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with RemoteSparkSessi
     checkAnswer(actual, expected)
   }
 
-  // TODO(SPARK-50063): Support VARIANT in Spark Connect Scala client
-  ignore("variant_explode") {
+  test("variant_explode") {
     val actual1 = spark.tvf.variant_explode(parse_json(lit("""["hello", "world"]""")))
     val expected1 =
       spark.sql("""SELECT * FROM variant_explode(parse_json('["hello", "world"]'))""")
@@ -237,8 +451,28 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with RemoteSparkSessi
     checkAnswer(actual6, expected6)
   }
 
-  // TODO(SPARK-50063): Support VARIANT in Spark Connect Scala client
-  ignore("variant_explode_outer") {
+  test("variant_explode - lateral join") {
+    withView("variant_table") {
+      sql("""
+          |CREATE VIEW variant_table(id, v) AS
+          |SELECT id, parse_json(v) AS v FROM VALUES
+          |(0, '["hello", "world"]'), (1, '{"a": true, "b": 3.14}'),
+          |(2, '[]'), (3, '{}'),
+          |(4, NULL), (5, '1')
+          |AS t(id, v)
+          |""".stripMargin)
+      val variantTable = spark.table("variant_table")
+
+      checkAnswer(
+        variantTable
+          .as("t1")
+          .lateralJoin(spark.tvf.variant_explode($"v".outer()).as("t"))
+          .select($"t1.id", $"t.*"),
+        sql("SELECT t1.id, t.* FROM variant_table AS t1, LATERAL variant_explode(v) AS t"))
+    }
+  }
+
+  test("variant_explode_outer") {
     val actual1 = spark.tvf.variant_explode_outer(parse_json(lit("""["hello", "world"]""")))
     val expected1 =
       spark.sql("""SELECT * FROM variant_explode_outer(parse_json('["hello", "world"]'))""")
@@ -268,4 +502,25 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with RemoteSparkSessi
     val expected6 = spark.sql("SELECT * FROM variant_explode_outer(parse_json('1'))")
     checkAnswer(actual6, expected6)
   }
+
+  test("variant_explode_outer - lateral join") {
+    withView("variant_table") {
+      sql("""
+          |CREATE VIEW variant_table(id, v) AS
+          |SELECT id, parse_json(v) AS v FROM VALUES
+          |(0, '["hello", "world"]'), (1, '{"a": true, "b": 3.14}'),
+          |(2, '[]'), (3, '{}'),
+          |(4, NULL), (5, '1')
+          |AS t(id, v)
+          |""".stripMargin)
+      val variantTable = spark.table("variant_table")
+
+      checkAnswer(
+        variantTable
+          .as("t1")
+          .lateralJoin(spark.tvf.variant_explode_outer($"v".outer()).as("t"))
+          .select($"t1.id", $"t.*"),
+        sql("SELECT t1.id, t.* FROM variant_table AS t1, LATERAL variant_explode_outer(v) AS t"))
+    }
+  }
 }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/KeyValueGroupedDatasetE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/KeyValueGroupedDatasetE2ETestSuite.scala
index 988774d5eec94..021b4fea26e2a 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/KeyValueGroupedDatasetE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/KeyValueGroupedDatasetE2ETestSuite.scala
@@ -460,6 +460,14 @@ class KeyValueGroupedDatasetE2ETestSuite extends QueryTest with RemoteSparkSessi
       (5, "hello"))
   }
 
+  test("SPARK-50789: reduceGroups on unresolved plan") {
+    val ds = Seq("abc", "xyz", "hello").toDS().select("*").as[String]
+    checkDatasetUnorderly(
+      ds.groupByKey(_.length).reduceGroups(_ + _),
+      (3, "abcxyz"),
+      (5, "hello"))
+  }
+
   test("groupby") {
     val ds = Seq(("a", 1, 10), ("a", 2, 20), ("b", 2, 1), ("b", 1, 2), ("c", 1, 1))
       .toDF("key", "seq", "value")
@@ -479,6 +487,25 @@ class KeyValueGroupedDatasetE2ETestSuite extends QueryTest with RemoteSparkSessi
       "(c,1,1)")
   }
 
+  test("SPARK-50693: groupby on unresolved plan") {
+    val ds = Seq(("a", 1, 10), ("a", 2, 20), ("b", 2, 1), ("b", 1, 2), ("c", 1, 1))
+      .toDF("key", "seq", "value")
+    val grouped = ds.select("*").groupBy($"key").as[String, (String, Int, Int)]
+    val aggregated = grouped
+      .flatMapSortedGroups($"seq", expr("length(key)"), $"value") { (g, iter) =>
+        Iterator(g, iter.mkString(", "))
+      }
+
+    checkDatasetUnorderly(
+      aggregated,
+      "a",
+      "(a,1,10), (a,2,20)",
+      "b",
+      "(b,1,2), (b,2,1)",
+      "c",
+      "(c,1,1)")
+  }
+
   test("groupby - keyAs, keys") {
     val ds = Seq(("a", 1, 10), ("a", 2, 20), ("b", 2, 1), ("b", 1, 2), ("c", 1, 1))
       .toDF("key", "seq", "value")
@@ -597,6 +624,16 @@ class KeyValueGroupedDatasetE2ETestSuite extends QueryTest with RemoteSparkSessi
       ("c", 1L))
   }
 
+  test("SPARK-50693: RowEncoder in udf on unresolved plan") {
+    val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDF("c1", "c2")
+
+    checkDatasetUnorderly(
+      ds.select("*").groupByKey(k => k.getAs[String](0)).agg(sum("c2").as[Long]),
+      ("a", 30L),
+      ("b", 3L),
+      ("c", 1L))
+  }
+
   test("mapGroups with row encoder") {
     val df = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDF("c1", "c2")
 
@@ -611,6 +648,21 @@ class KeyValueGroupedDatasetE2ETestSuite extends QueryTest with RemoteSparkSessi
       1)
   }
 
+  test("SPARK-50693: mapGroups with row encoder on unresolved plan") {
+    val df = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDF("c1", "c2")
+
+    checkDataset(
+      df.select("*")
+        .groupByKey(r => r.getAs[String]("c1"))
+        .mapGroups((_, it) =>
+          it.map(r => {
+            r.getAs[Int]("c2")
+          }).sum),
+      30,
+      3,
+      1)
+  }
+
   test("coGroup with row encoder") {
     val df1 = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDF("c1", "c2")
     val df2 = Seq(("x", 10), ("x", 20), ("y", 1), ("y", 2), ("a", 1)).toDF("c1", "c2")
@@ -632,6 +684,30 @@ class KeyValueGroupedDatasetE2ETestSuite extends QueryTest with RemoteSparkSessi
       3)
   }
 
+  test("SPARK-50693: coGroup with row encoder on unresolved plan") {
+    val df1 = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDF("c1", "c2")
+    val df2 = Seq(("x", 10), ("x", 20), ("y", 1), ("y", 2), ("a", 1)).toDF("c1", "c2")
+
+    Seq((df1.select("*"), df2), (df1, df2.select("*")), (df1.select("*"), df2.select("*")))
+      .foreach { case (df1, df2) =>
+        val ds1: KeyValueGroupedDataset[String, Row] =
+          df1.groupByKey(r => r.getAs[String]("c1"))
+        val ds2: KeyValueGroupedDataset[String, Row] =
+          df2.groupByKey(r => r.getAs[String]("c1"))
+        checkDataset(
+          ds1.cogroup(ds2)((_, it, it2) => {
+            val sum1 = it.map(r => r.getAs[Int]("c2")).sum
+            val sum2 = it2.map(r => r.getAs[Int]("c2")).sum
+            Iterator(sum1 + sum2)
+          }),
+          31,
+          3,
+          1,
+          30,
+          3)
+      }
+  }
+
   test("serialize as null") {
     val kvgds = session.range(10).groupByKey(_ % 2)
     val bytes = SparkSerDeUtils.serialize(kvgds)
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLExpressionsSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLExpressionsSuite.scala
new file mode 100644
index 0000000000000..fcd2b3a388042
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLExpressionsSuite.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.test.{QueryTest, RemoteSparkSession}
+import org.apache.spark.unsafe.types.VariantVal
+
+class SQLExpressionsSuite extends QueryTest with RemoteSparkSession {
+
+  test("variants") {
+    val topLevelVariants = spark.sql("select parse_json(id::string) from range(10)")
+    checkAnswer(
+      topLevelVariants,
+      (0 until 10)
+        .map(i => Row(new VariantVal(Array[Byte](12, i.toByte), Array[Byte](1, 0, 0)))))
+    val structsOfVariants = spark.sql("select struct(parse_json(id::string)) from range(10)")
+    checkAnswer(
+      structsOfVariants,
+      (0 until 10)
+        .map(i => Row(Row(new VariantVal(Array[Byte](12, i.toByte), Array[Byte](1, 0, 0))))))
+    val arraysOfVariants = spark.sql("select array(parse_json(id::string)) from range(10)")
+    checkAnswer(
+      arraysOfVariants,
+      (0 until 10)
+        .map(i => Row(Seq(new VariantVal(Array[Byte](12, i.toByte), Array[Byte](1, 0, 0))))))
+    val mapsOfVariants = spark.sql("select map(id, parse_json(id::string)) from range(10)")
+    checkAnswer(
+      mapsOfVariants,
+      (0 until 10)
+        .map(i => Row(Map((i, new VariantVal(Array[Byte](12, i.toByte), Array[Byte](1, 0, 0)))))))
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UnsupportedFeaturesSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UnsupportedFeaturesSuite.scala
index 6a26cf581751d..42ae6987c9f36 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UnsupportedFeaturesSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UnsupportedFeaturesSuite.scala
@@ -79,10 +79,6 @@ class UnsupportedFeaturesSuite extends ConnectFunSuite {
     _.listenerManager
   }
 
-  testUnsupportedFeature("SparkSession.sqlContext", "SESSION_SQL_CONTEXT") {
-    _.sqlContext
-  }
-
   testUnsupportedFeature(
     "SparkSession.baseRelationToDataFrame",
     "SESSION_BASE_RELATION_TO_DATAFRAME") {
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala
index ca754c7b542f7..19275326d6421 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala
@@ -301,6 +301,14 @@ class UserDefinedFunctionE2ETestSuite extends QueryTest with RemoteSparkSession
     checkDataset(df.filter(r => r.getInt(1) > 5), Row("a", 10), Row("a", 20))
   }
 
+  test("SPARK-50693: Filter with row input encoder on unresolved plan") {
+    val session: SparkSession = spark
+    import session.implicits._
+    val df = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDF("c1", "c2")
+
+    checkDataset(df.select("*").filter(r => r.getInt(1) > 5), Row("a", 10), Row("a", 20))
+  }
+
   test("mapPartitions with row input encoder") {
     val session: SparkSession = spark
     import session.implicits._
@@ -393,6 +401,13 @@ class UserDefinedFunctionE2ETestSuite extends QueryTest with RemoteSparkSession
     assert(ds.select(aggCol).head() == 135) // 45 + 90
   }
 
+  test("SPARK-50789: UDAF custom Aggregator - toColumn on unresolved plan") {
+    val encoder = Encoders.product[UdafTestInput]
+    val aggCol = new CompleteUdafTestInputAggregator().toColumn
+    val ds = spark.range(10).withColumn("extra", col("id") * 2).select("*").as(encoder)
+    assert(ds.select(aggCol).head() == 135) // 45 + 90
+  }
+
   test("UDAF custom Aggregator - multiple extends - toColumn") {
     val encoder = Encoders.product[UdafTestInput]
     val aggCol = new CompleteGrandChildUdafTestInputAggregator().toColumn
@@ -400,11 +415,24 @@ class UserDefinedFunctionE2ETestSuite extends QueryTest with RemoteSparkSession
     assert(ds.select(aggCol).head() == 540) // (45 + 90) * 4
   }
 
-  test("UDAF custom aggregator - with rows - toColumn") {
+  test("SPARK-50789: UDAF custom Aggregator - multiple extends - toColumn on unresolved plan") {
+    val encoder = Encoders.product[UdafTestInput]
+    val aggCol = new CompleteGrandChildUdafTestInputAggregator().toColumn
+    val ds = spark.range(10).withColumn("extra", col("id") * 2).select("*").as(encoder)
+    assert(ds.select(aggCol).head() == 540) // (45 + 90) * 4
+  }
+
+  test("UDAF custom Aggregator - with rows - toColumn") {
     val ds = spark.range(10).withColumn("extra", col("id") * 2)
     assert(ds.select(RowAggregator.toColumn).head() == 405)
     assert(ds.agg(RowAggregator.toColumn).head().getLong(0) == 405)
   }
+
+  test("SPARK-50789: UDAF custom Aggregator - with rows - toColumn on unresolved plan") {
+    val ds = spark.range(10).withColumn("extra", col("id") * 2).select("*")
+    assert(ds.select(RowAggregator.toColumn).head() == 405)
+    assert(ds.agg(RowAggregator.toColumn).head().getLong(0) == 405)
+  }
 }
 
 case class UdafTestInput(id: Long, extra: Long)
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
index d9ff8d9122ead..7bac10e79d0b4 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
@@ -176,8 +176,6 @@ object CheckConnectJvmClientCompatibility {
 
       // Skip unsupported classes
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.ExperimentalMethods"),
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SQLContext"),
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SQLContext$*"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SparkSessionExtensions"),
       ProblemFilters.exclude[MissingClassProblem](
         "org.apache.spark.sql.SparkSessionExtensionsProvider"),
@@ -185,6 +183,11 @@ object CheckConnectJvmClientCompatibility {
         "org.apache.spark.sql.ExtendedExplainGenerator"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UDTFRegistration"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.DataSourceRegistration"),
+      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.TableArg"),
+      ProblemFilters.exclude[MissingClassProblem](
+        "org.apache.spark.sql.artifact.ArtifactStateForCleanup"),
+      ProblemFilters.exclude[MissingClassProblem](
+        "org.apache.spark.sql.artifact.ArtifactStateForCleanup$"),
 
       // DataFrameNaFunctions
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.DataFrameNaFunctions.fillValue"),
@@ -233,9 +236,11 @@ object CheckConnectJvmClientCompatibility {
         "org.apache.spark.sql.artifact.ArtifactManager$SparkContextResourceType$"),
 
       // ColumnNode conversions
+      ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.SparkSession"),
+      ProblemFilters.exclude[DirectMissingMethodProblem](
+        "org.apache.spark.sql.SparkSession.expression"),
       ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession.Converter"),
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SparkSession$Converter$"),
+        "org.apache.spark.sql.SparkSession.toRichColumn"),
 
       // UDFRegistration
       ProblemFilters.exclude[DirectMissingMethodProblem](
@@ -295,10 +300,9 @@ object CheckConnectJvmClientCompatibility {
         "org.apache.spark.sql.KeyValueGroupedDatasetImpl$"),
 
       // ColumnNode conversions
-      ProblemFilters.exclude[IncompatibleResultTypeProblem](
-        "org.apache.spark.sql.SparkSession#RichColumn.expr"),
       ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession#RichColumn.typedExpr"),
+        "org.apache.spark.sql.SparkSession.RichColumn"),
+      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SparkSession$RichColumn"),
 
       // New public APIs added in the client
       // Dataset
@@ -330,6 +334,11 @@ object CheckConnectJvmClientCompatibility {
       ProblemFilters.exclude[DirectMissingMethodProblem](
         "org.apache.spark.sql.SparkSession#Builder.interceptor"),
 
+      // Private case class in SQLContext
+      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SQLContext$ListTableRow"),
+      ProblemFilters.exclude[MissingClassProblem](
+        "org.apache.spark.sql.SQLContext$ListTableRow$"),
+
       // SQLImplicits
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.SQLImplicits.session"),
 
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowEncoderSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowEncoderSuite.scala
index 10e4c11c406fe..d0468c8d57b58 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowEncoderSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowEncoderSuite.scala
@@ -45,6 +45,7 @@ import org.apache.spark.sql.connect.client.CloseableIterator
 import org.apache.spark.sql.connect.client.arrow.FooEnum.FooEnum
 import org.apache.spark.sql.test.ConnectFunSuite
 import org.apache.spark.sql.types.{ArrayType, DataType, DayTimeIntervalType, Decimal, DecimalType, IntegerType, Metadata, SQLUserDefinedType, StringType, StructType, UserDefinedType, YearMonthIntervalType}
+import org.apache.spark.unsafe.types.VariantVal
 
 /**
  * Tests for encoding external data to and from arrow.
@@ -264,6 +265,52 @@ class ArrowEncoderSuite extends ConnectFunSuite with BeforeAndAfterAll {
     assert(inspector.numBatches == 1)
   }
 
+  test("variant round trip") {
+    val variantEncoder = toRowEncoder(new StructType().add("v", "variant"))
+    roundTripAndCheckIdentical(variantEncoder) { () =>
+      val maybeNull = MaybeNull(7)
+      Iterator.tabulate(101)(i =>
+        Row(maybeNull(new VariantVal(Array[Byte](12, i.toByte), Array[Byte](1, 0, 0)))))
+    }
+
+    val nestedVariantEncoder = toRowEncoder(
+      new StructType()
+        .add(
+          "s",
+          new StructType()
+            .add("i1", "int")
+            .add("v1", "variant")
+            .add("i2", "int")
+            .add("v2", "variant"))
+        .add("a", "array<variant>")
+        .add("m", "map<string, variant>"))
+
+    roundTripAndCheckIdentical(nestedVariantEncoder) { () =>
+      val maybeNull5 = MaybeNull(5)
+      val maybeNull7 = MaybeNull(7)
+      val maybeNull11 = MaybeNull(11)
+      val maybeNull13 = MaybeNull(13)
+      val maybeNull17 = MaybeNull(17)
+      Iterator.tabulate(100)(i =>
+        Row(
+          maybeNull5(
+            Row(
+              i,
+              maybeNull7(new VariantVal(Array[Byte](12, i.toByte), Array[Byte](1, 0, 0))),
+              i + 1,
+              maybeNull11(
+                new VariantVal(Array[Byte](12, (i + 1).toByte), Array[Byte](1, 0, 0))))),
+          maybeNull7((0 until 10).map(j =>
+            new VariantVal(Array[Byte](12, (i + j).toByte), Array[Byte](1, 0, 0)))),
+          maybeNull13(
+            Map(
+              (
+                i.toString,
+                maybeNull17(
+                  new VariantVal(Array[Byte](12, (i + 2).toByte), Array[Byte](1, 0, 0))))))))
+    }
+  }
+
   test("multiple batches - split by record count") {
     val inspector = new CountingBatchInspector
     roundTripAndCheckIdentical(
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToProtoConverterSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToProtoConverterSuite.scala
index 2efd396735191..94729d34f37b5 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToProtoConverterSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToProtoConverterSuite.scala
@@ -128,19 +128,22 @@ class ColumnNodeToProtoConverterSuite extends ConnectFunSuite {
           .setFunctionName("+")
           .setIsDistinct(false)
           .addArguments(attribute("a"))
-          .addArguments(expr(_.getLiteralBuilder.setInteger(1)))))
+          .addArguments(expr(_.getLiteralBuilder.setInteger(1)))
+          .setIsInternal(false)))
     testConversion(
       UnresolvedFunction(
         "db1.myAgg",
         Seq(UnresolvedAttribute("a")),
         isDistinct = true,
-        isUserDefinedFunction = true),
+        isUserDefinedFunction = true,
+        isInternal = true),
       expr(
         _.getUnresolvedFunctionBuilder
           .setFunctionName("db1.myAgg")
           .setIsDistinct(true)
           .setIsUserDefinedFunction(true)
-          .addArguments(attribute("a"))))
+          .addArguments(attribute("a"))
+          .setIsInternal(true)))
   }
 
   test("alias") {
@@ -247,10 +250,12 @@ class ColumnNodeToProtoConverterSuite extends ConnectFunSuite {
       expr(
         _.getWindowBuilder
           .setWindowFunction(
-            expr(_.getUnresolvedFunctionBuilder
-              .setFunctionName("sum")
-              .setIsDistinct(false)
-              .addArguments(attribute("a"))))
+            expr(
+              _.getUnresolvedFunctionBuilder
+                .setFunctionName("sum")
+                .setIsDistinct(false)
+                .addArguments(attribute("a"))
+                .setIsInternal(false)))
           .addPartitionSpec(attribute("b"))
           .addPartitionSpec(attribute("c"))
           .addOrderSpec(proto.Expression.SortOrder
@@ -276,7 +281,8 @@ class ColumnNodeToProtoConverterSuite extends ConnectFunSuite {
               _.getUnresolvedFunctionBuilder
                 .setFunctionName("sum")
                 .setIsDistinct(false)
-                .addArguments(attribute("a"))))
+                .addArguments(attribute("a"))
+                .setIsInternal(false)))
           .addPartitionSpec(attribute("b"))
           .addPartitionSpec(attribute("c"))))
     testWindowFrame(
@@ -310,7 +316,8 @@ class ColumnNodeToProtoConverterSuite extends ConnectFunSuite {
               _.getUnresolvedFunctionBuilder
                 .setFunctionName("+")
                 .addArguments(expr(_.setUnresolvedNamedLambdaVariable(catX)))
-                .addArguments(attribute("y"))))
+                .addArguments(attribute("y"))
+                .setIsInternal(false)))
           .addArguments(catX)))
   }
 
@@ -330,7 +337,8 @@ class ColumnNodeToProtoConverterSuite extends ConnectFunSuite {
           .setFunctionName("when")
           .addArguments(attribute("c1"))
           .addArguments(expr(_.getLiteralBuilder.setString("r1")))
-          .addArguments(expr(_.getLiteralBuilder.setString("fallback")))))
+          .addArguments(expr(_.getLiteralBuilder.setString("fallback")))
+          .setIsInternal(false)))
   }
 
   test("extract field") {
@@ -431,4 +439,5 @@ class ColumnNodeToProtoConverterSuite extends ConnectFunSuite {
 private[internal] case class Nope(override val origin: Origin = CurrentOrigin.get)
     extends ColumnNode {
   override def sql: String = "nope"
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
 }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala
index b1a7d81916e92..199a1507a3b19 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala
@@ -28,9 +28,8 @@ import org.scalatest.concurrent.Futures.timeout
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkException
-import org.apache.spark.api.java.function.VoidFunction2
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{DataFrame, ForeachWriter, Row, SparkSession}
+import org.apache.spark.sql.{DataFrame, Dataset, ForeachWriter, Row, SparkSession}
 import org.apache.spark.sql.functions.{col, lit, udf, window}
 import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryIdleEvent, QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
 import org.apache.spark.sql.test.{IntegrationTestUtils, QueryTest, RemoteSparkSession}
@@ -567,7 +566,7 @@ class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with L
     }
   }
 
-  test("foreachBatch") {
+  test("foreachBatch with DataFrame") {
     // Starts a streaming query with a foreachBatch function, which writes batchId and row count
     // to a temp view. The test verifies that the view is populated with data.
 
@@ -581,7 +580,12 @@ class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with L
         .option("numPartitions", "1")
         .load()
         .writeStream
-        .foreachBatch(new ForeachBatchFn(viewName))
+        .foreachBatch((df: DataFrame, batchId: Long) => {
+          val count = df.collect().map(row => row.getLong(1)).sum
+          df.sparkSession
+            .createDataFrame(Seq((batchId, count)))
+            .createOrReplaceGlobalTempView(viewName)
+        })
         .start()
 
       eventually(timeout(30.seconds)) { // Wait for first progress.
@@ -596,6 +600,7 @@ class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with L
           .collect()
           .toSeq
         assert(rows.size > 0)
+        assert(rows.map(_.getLong(1)).sum > 0)
         logInfo(s"Rows in $tableName: $rows")
       }
 
@@ -603,6 +608,75 @@ class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with L
     }
   }
 
+  test("foreachBatch with Dataset[java.lang.Long]") {
+    val viewName = "test_view"
+    val tableName = s"global_temp.$viewName"
+
+    withTable(tableName) {
+      val session = spark
+      import session.implicits._
+      val q = spark.readStream
+        .format("rate")
+        .option("rowsPerSecond", "10")
+        .option("numPartitions", "1")
+        .load()
+        .select($"value")
+        .as[java.lang.Long]
+        .writeStream
+        .foreachBatch((ds: Dataset[java.lang.Long], batchId: Long) => {
+          val count = ds.collect().map(v => v.asInstanceOf[Long]).sum
+          ds.sparkSession
+            .createDataFrame(Seq((batchId, count)))
+            .createOrReplaceGlobalTempView(viewName)
+        })
+        .start()
+
+      eventually(timeout(30.seconds)) { // Wait for first progress.
+        assert(q.lastProgress != null, "Failed to make progress")
+        assert(q.lastProgress.numInputRows > 0)
+      }
+
+      eventually(timeout(30.seconds)) {
+        // There should be row(s) in temporary view created by foreachBatch.
+        val rows = spark
+          .sql(s"select * from $tableName")
+          .collect()
+          .toSeq
+        assert(rows.size > 0)
+        assert(rows.map(_.getLong(1)).sum > 0)
+        logInfo(s"Rows in $tableName: $rows")
+      }
+
+      q.stop()
+    }
+  }
+
+  test("foreachBatch with Dataset[TestClass]") {
+    val session: SparkSession = spark
+    import session.implicits._
+    val viewName = "test_view"
+    val tableName = s"global_temp.$viewName"
+
+    val df = spark.readStream
+      .format("rate")
+      .option("rowsPerSecond", "10")
+      .load()
+
+    val q = df
+      .selectExpr("CAST(value AS INT)")
+      .as[TestClass]
+      .writeStream
+      .foreachBatch((ds: Dataset[TestClass], batchId: Long) => {
+        val count = ds.collect().map(_.value).sum
+      })
+      .start()
+    eventually(timeout(30.seconds)) {
+      assert(q.isActive)
+      assert(q.exception.isEmpty)
+    }
+    q.stop()
+  }
+
   abstract class EventCollector extends StreamingQueryListener {
     protected def tablePostfix: String
 
@@ -700,14 +774,3 @@ class TestForeachWriter[T] extends ForeachWriter[T] {
 case class TestClass(value: Int) {
   override def toString: String = value.toString
 }
-
-class ForeachBatchFn(val viewName: String)
-    extends VoidFunction2[DataFrame, java.lang.Long]
-    with Serializable {
-  override def call(df: DataFrame, batchId: java.lang.Long): Unit = {
-    val count = df.count()
-    df.sparkSession
-      .createDataFrame(Seq((batchId.toLong, count)))
-      .createOrReplaceGlobalTempView(viewName)
-  }
-}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateStreamingSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateStreamingSuite.scala
index dc74463f1a25b..9bd6614028cbf 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateStreamingSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateStreamingSuite.scala
@@ -55,7 +55,9 @@ class FlatMapGroupsWithStateStreamingSuite extends QueryTest with RemoteSparkSes
     val stateFunc =
       (key: String, values: Iterator[ClickEvent], state: GroupState[ClickState]) => {
         if (state.exists) throw new IllegalArgumentException("state.exists should be false")
-        Iterator(ClickState(key, values.size))
+        val newState = ClickState(key, values.size)
+        state.update(newState)
+        Iterator(newState)
       }
     spark.sql("DROP TABLE IF EXISTS my_sink")
 
@@ -96,7 +98,9 @@ class FlatMapGroupsWithStateStreamingSuite extends QueryTest with RemoteSparkSes
     val stateFunc =
       (key: String, values: Iterator[ClickEvent], state: GroupState[ClickState]) => {
         val currState = state.getOption.getOrElse(ClickState(key, 0))
-        Iterator(ClickState(key, currState.count + values.size))
+        val newState = ClickState(key, currState.count + values.size)
+        state.update(newState)
+        Iterator(newState)
       }
     val initialState = flatMapGroupsWithStateInitialStateData
       .toDS()
@@ -141,7 +145,9 @@ class FlatMapGroupsWithStateStreamingSuite extends QueryTest with RemoteSparkSes
     val stateFunc =
       (key: String, values: Iterator[ClickEvent], state: GroupState[ClickState]) => {
         if (state.exists) throw new IllegalArgumentException("state.exists should be false")
-        ClickState(key, values.size)
+        val newState = ClickState(key, values.size)
+        state.update(newState)
+        newState
       }
     spark.sql("DROP TABLE IF EXISTS my_sink")
 
@@ -183,7 +189,9 @@ class FlatMapGroupsWithStateStreamingSuite extends QueryTest with RemoteSparkSes
     val stateFunc =
       (key: String, values: Iterator[ClickEvent], state: GroupState[ClickState]) => {
         val currState = state.getOption.getOrElse(ClickState(key, 0))
-        ClickState(key, currState.count + values.size)
+        val newState = ClickState(key, currState.count + values.size)
+        state.update(newState)
+        newState
       }
     val initialState = flatMapGroupsWithStateInitialStateData
       .toDS()
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala
index 61d08912aec23..3ae9b9fc73b48 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala
@@ -74,7 +74,7 @@ object IntegrationTestUtils {
 
         // Redirect server log into console
         "--conf",
-        s"spark.driver.extraJavaOptions=-Dlog4j.configuration=$log4j2")
+        s"spark.driver.extraJavaOptions=-Dlog4j.configurationFile=$log4j2")
     } else Seq.empty
   }
 
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/QueryTest.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/QueryTest.scala
index 8837c76b76aeb..f22644074324c 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/QueryTest.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/QueryTest.scala
@@ -19,8 +19,11 @@ package org.apache.spark.sql.test
 
 import java.util.TimeZone
 
+import scala.jdk.CollectionConverters._
+
 import org.scalatest.Assertions
 
+import org.apache.spark.{QueryContextType, SparkThrowable}
 import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.util.SparkStringUtils.sideBySide
 import org.apache.spark.util.ArrayImplicits._
@@ -53,6 +56,158 @@ abstract class QueryTest extends ConnectFunSuite with SQLHelper {
     checkAnswer(df, expectedAnswer.toImmutableArraySeq)
   }
 
+  case class ExpectedContext(
+      contextType: QueryContextType,
+      objectType: String,
+      objectName: String,
+      startIndex: Int,
+      stopIndex: Int,
+      fragment: String,
+      callSitePattern: String)
+
+  object ExpectedContext {
+    def apply(fragment: String, start: Int, stop: Int): ExpectedContext = {
+      ExpectedContext("", "", start, stop, fragment)
+    }
+
+    def apply(
+        objectType: String,
+        objectName: String,
+        startIndex: Int,
+        stopIndex: Int,
+        fragment: String): ExpectedContext = {
+      new ExpectedContext(
+        QueryContextType.SQL,
+        objectType,
+        objectName,
+        startIndex,
+        stopIndex,
+        fragment,
+        "")
+    }
+
+    def apply(fragment: String, callSitePattern: String): ExpectedContext = {
+      new ExpectedContext(QueryContextType.DataFrame, "", "", -1, -1, fragment, callSitePattern)
+    }
+  }
+
+  /**
+   * Checks an exception with an error condition against expected results.
+   * @param exception
+   *   The exception to check
+   * @param condition
+   *   The expected error condition identifying the error
+   * @param sqlState
+   *   Optional the expected SQLSTATE, not verified if not supplied
+   * @param parameters
+   *   A map of parameter names and values. The names are as defined in the error-classes file.
+   * @param matchPVals
+   *   Optionally treat the parameters value as regular expression pattern. false if not supplied.
+   */
+  protected def checkError(
+      exception: SparkThrowable,
+      condition: String,
+      sqlState: Option[String] = None,
+      parameters: Map[String, String] = Map.empty,
+      matchPVals: Boolean = false,
+      queryContext: Array[ExpectedContext] = Array.empty): Unit = {
+    assert(exception.getCondition === condition)
+    sqlState.foreach(state => assert(exception.getSqlState === state))
+    val expectedParameters = exception.getMessageParameters.asScala
+    if (matchPVals) {
+      assert(expectedParameters.size === parameters.size)
+      expectedParameters.foreach(exp => {
+        val parm = parameters.getOrElse(
+          exp._1,
+          throw new IllegalArgumentException("Missing parameter" + exp._1))
+        if (!exp._2.matches(parm)) {
+          throw new IllegalArgumentException(
+            "For parameter '" + exp._1 + "' value '" + exp._2 +
+              "' does not match: " + parm)
+        }
+      })
+    } else {
+      assert(expectedParameters === parameters)
+    }
+    val actualQueryContext = exception.getQueryContext()
+    assert(
+      actualQueryContext.length === queryContext.length,
+      "Invalid length of the query context")
+    actualQueryContext.zip(queryContext).foreach { case (actual, expected) =>
+      assert(
+        actual.contextType() === expected.contextType,
+        "Invalid contextType of a query context Actual:" + actual.toString)
+      if (actual.contextType() == QueryContextType.SQL) {
+        assert(
+          actual.objectType() === expected.objectType,
+          "Invalid objectType of a query context Actual:" + actual.toString)
+        assert(
+          actual.objectName() === expected.objectName,
+          "Invalid objectName of a query context. Actual:" + actual.toString)
+        assert(
+          actual.startIndex() === expected.startIndex,
+          "Invalid startIndex of a query context. Actual:" + actual.toString)
+        assert(
+          actual.stopIndex() === expected.stopIndex,
+          "Invalid stopIndex of a query context. Actual:" + actual.toString)
+        assert(
+          actual.fragment() === expected.fragment,
+          "Invalid fragment of a query context. Actual:" + actual.toString)
+      } else if (actual.contextType() == QueryContextType.DataFrame) {
+        assert(
+          actual.fragment() === expected.fragment,
+          "Invalid code fragment of a query context. Actual:" + actual.toString)
+        if (expected.callSitePattern.nonEmpty) {
+          assert(
+            actual.callSite().matches(expected.callSitePattern),
+            "Invalid callSite of a query context. Actual:" + actual.toString)
+        }
+      }
+    }
+  }
+
+  protected def checkError(
+      exception: SparkThrowable,
+      condition: String,
+      sqlState: String,
+      parameters: Map[String, String]): Unit =
+    checkError(exception, condition, Some(sqlState), parameters)
+
+  protected def checkError(
+      exception: SparkThrowable,
+      condition: String,
+      sqlState: String,
+      parameters: Map[String, String],
+      context: ExpectedContext): Unit =
+    checkError(exception, condition, Some(sqlState), parameters, false, Array(context))
+
+  protected def checkError(
+      exception: SparkThrowable,
+      condition: String,
+      parameters: Map[String, String],
+      context: ExpectedContext): Unit =
+    checkError(exception, condition, None, parameters, false, Array(context))
+
+  protected def checkError(
+      exception: SparkThrowable,
+      condition: String,
+      sqlState: String,
+      context: ExpectedContext): Unit =
+    checkError(exception, condition, Some(sqlState), Map.empty, false, Array(context))
+
+  protected def checkError(
+      exception: SparkThrowable,
+      condition: String,
+      sqlState: Option[String],
+      parameters: Map[String, String],
+      context: ExpectedContext): Unit =
+    checkError(exception, condition, sqlState, parameters, false, Array(context))
+
+  protected def getCurrentClassCallSitePattern: String = {
+    val cs = Thread.currentThread().getStackTrace()(2)
+    s"${cs.getClassName}\\..*\\(${cs.getFileName}:\\d+\\)"
+  }
+
   /**
    * Evaluates a dataset to make sure that the result of calling collect matches the given
    * expected answer.
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/SQLHelper.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/SQLHelper.scala
index 4a574a15f7ab8..d9828ae92267b 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/SQLHelper.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/SQLHelper.scala
@@ -21,13 +21,28 @@ import java.util.UUID
 
 import org.scalatest.Assertions.fail
 
-import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession, SQLImplicits}
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.util.{SparkErrorUtils, SparkFileUtils}
 
 trait SQLHelper {
 
   def spark: SparkSession
 
+  // Shorthand for running a query using our SparkSession
+  protected lazy val sql: String => DataFrame = spark.sql _
+
+  /**
+   * A helper object for importing SQL implicits.
+   *
+   * Note that the alternative of importing `spark.implicits._` is not possible here. This is
+   * because we create the `SparkSession` immediately before the first test is run, but the
+   * implicits import is needed in the constructor.
+   */
+  protected object testImplicits extends SQLImplicits {
+    override protected def session: SparkSession = spark
+  }
+
   /**
    * Sets all SQL configurations specified in `pairs`, calls `f`, and then restores all SQL
    * configurations.
@@ -96,6 +111,22 @@ trait SQLHelper {
     finally SparkFileUtils.deleteRecursively(path)
   }
 
+  /**
+   * Drops temporary view `viewNames` after calling `f`.
+   */
+  protected def withTempView(viewNames: String*)(f: => Unit): Unit = {
+    SparkErrorUtils.tryWithSafeFinally(f) {
+      viewNames.foreach { viewName =>
+        try spark.catalog.dropTempView(viewName)
+        catch {
+          // If the test failed part way, we don't want to mask the failure by failing to remove
+          // temp views that never got created.
+          case _: NoSuchTableException =>
+        }
+      }
+    }
+  }
+
   /**
    * Drops table `tableName` after calling `f`.
    */
@@ -106,4 +137,13 @@ trait SQLHelper {
       }
     }
   }
+
+  /**
+   * Drops view `viewName` after calling `f`.
+   */
+  protected def withView(viewNames: String*)(f: => Unit): Unit = {
+    SparkErrorUtils.tryWithSafeFinally(f)(viewNames.foreach { name =>
+      spark.sql(s"DROP VIEW IF EXISTS $name")
+    })
+  }
 }
diff --git a/connector/connect/docs/client-connection-string.md b/connector/connect/docs/client-connection-string.md
index 37b2956a5c44a..df371c5beaaac 100644
--- a/connector/connect/docs/client-connection-string.md
+++ b/connector/connect/docs/client-connection-string.md
@@ -2,7 +2,7 @@
 
 From the client perspective, Spark Connect mostly behaves as any other GRPC
 client and can be configured as such. However, to make it easy to use from
-different programming languages and to have a homogenous connection surface
+different programming languages and to have a homogeneous connection surface
 this document proposes what the user surface is for connecting to a
 Spark Connect endpoint.
 
@@ -136,7 +136,7 @@ server_url = "sc://myhost.com:443/;use_ssl=true;token=ABCDEFG"
 
 As mentioned above, Spark Connect uses a regular GRPC client and the server path
 cannot be configured to remain compatible with the GRPC standard and HTTP. For
-example the following examles are invalid.
+example the following examples are invalid.
 
 ```python
 server_url = "sc://myhost.com:443/mypathprefix/;token=AAAAAAA"
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala
index 9fbbc8ed2e0ff..b560f86ade38c 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala
@@ -43,7 +43,7 @@ trait DockerIntegrationFunSuite extends SparkFunSuite {
     }
   }
 
-  /** Run the give body of code only if Kinesis tests are enabled */
+  /** Run the given body of code only if ENABLE_DOCKER_INTEGRATION_TESTS is 1. */
   def runIfTestsEnabled(message: String)(body: => Unit): Unit = {
     if (shouldRunTests) {
       body
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBDatabaseOnDocker.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBDatabaseOnDocker.scala
new file mode 100644
index 0000000000000..61930268eb2ab
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBDatabaseOnDocker.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import org.apache.spark.internal.Logging
+
+abstract class MariaDBDatabaseOnDocker extends DatabaseOnDocker with Logging {
+  override val imageName: String =
+    sys.env.getOrElse("MARIADB_DOCKER_IMAGE_NAME", "mariadb:10.11.10")
+  override val env: Map[String, String] = Map(
+    "MYSQL_ROOT_PASSWORD" -> "rootpass"
+  )
+  override val usesIpc = false
+  override val jdbcPort = 3306
+
+  override def getEntryPoint: Option[String] =
+    Some("/docker-entrypoint/mariadb-docker-entrypoint.sh")
+}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
index 32c552eb8c7eb..962c70510b5bd 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
@@ -37,20 +37,11 @@ class MariaDBKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override protected val userName = s"mariadb/$dockerIp"
   override protected val keytabFileName = "mariadb.keytab"
 
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("MARIADB_DOCKER_IMAGE_NAME", "mariadb:10.6.19")
-    override val env = Map(
-      "MYSQL_ROOT_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 3306
+  override val db = new MariaDBDatabaseOnDocker() {
 
     override def getJdbcUrl(ip: String, port: Int): String =
       s"jdbc:mysql://$ip:$port/mysql?user=$principal"
 
-    override def getEntryPoint: Option[String] =
-      Some("/docker-entrypoint/mariadb-docker-entrypoint.sh")
-
     override def beforeContainerStart(
         hostConfigBuilder: HostConfig,
         containerConfigBuilder: ContainerConfig): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresDatabaseOnDocker.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresDatabaseOnDocker.scala
new file mode 100644
index 0000000000000..db2495ad3c698
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresDatabaseOnDocker.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import org.apache.spark.internal.Logging
+
+class PostgresDatabaseOnDocker extends DatabaseOnDocker with Logging {
+  lazy override val imageName: String =
+    sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:17.2-alpine")
+  private val postgres_user = "postgres"
+  private val postgres_password = "rootpass"
+  override val env: Map[String, String] = Map(
+    "POSTGRES_PASSWORD" -> postgres_password
+  )
+  override val usesIpc = false
+  override val jdbcPort: Int = 5432
+
+  override def getJdbcUrl(ip: String, port: Int): String = {
+    s"jdbc:postgresql://$ip:$port/postgres?user=$postgres_user&password=$postgres_password"
+  }
+}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index 92a3e99586b5f..5c985da226b06 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -32,25 +32,16 @@ import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:17.1-alpine):
+ * To run this test suite for a specific version (e.g., postgres:17.2-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.1-alpine
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.2-alpine
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
  * }}}
  */
 @DockerTest
 class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:17.1-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
-  }
+  override val db = new PostgresDatabaseOnDocker
 
   override def dataPreparation(conn: Connection): Unit = {
     conn.prepareStatement("CREATE DATABASE foo").executeUpdate()
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
index 7c9fc477dbb78..b3cfe8bd77e2b 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
@@ -25,9 +25,9 @@ import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnecti
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:17.1-alpine):
+ * To run this test suite for a specific version (e.g., postgres:17.2-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.1-alpine
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.2-alpine
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly *PostgresKrbIntegrationSuite"
  * }}}
@@ -37,14 +37,7 @@ class PostgresKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override protected val userName = s"postgres/$dockerIp"
   override protected val keytabFileName = "postgres.keytab"
 
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:17.1-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-
+  override val db = new PostgresDatabaseOnDocker {
     override def getJdbcUrl(ip: String, port: Int): String =
       s"jdbc:postgresql://$ip:$port/postgres?user=$principal&gsslib=gssapi"
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala
index b6917df2d428a..3a1d5e18b7e5a 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala
@@ -28,9 +28,9 @@ import org.apache.spark.tags.DockerTest
 
 /**
  * This suite is used to generate subqueries, and test Spark against Postgres.
- * To run this test suite for a specific version (e.g., postgres:17.1-alpine):
+ * To run this test suite for a specific version (e.g., postgres:17.2-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.1-alpine
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.2-alpine
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.GeneratedSubquerySuite"
  * }}}
@@ -38,16 +38,7 @@ import org.apache.spark.tags.DockerTest
 @DockerTest
 class GeneratedSubquerySuite extends DockerJDBCIntegrationSuite with QueryGeneratorHelper {
 
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:17.1-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
-  }
+  override val db = new PostgresDatabaseOnDocker
 
   private val FIRST_COLUMN = "a"
   private val SECOND_COLUMN = "b"
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgresSQLQueryTestSuite.scala
similarity index 82%
rename from connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgresSQLQueryTestSuite.scala
index 56a83cc0a34d6..28320a9e0a949 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgresSQLQueryTestSuite.scala
@@ -30,31 +30,21 @@ import org.apache.spark.tags.DockerTest
  *    confidence, and you won't have to manually verify the golden files generated with your test.
  * 2. Add this line to your .sql file: --ONLY_IF spark
  *
- * Note: To run this test suite for a specific version (e.g., postgres:17.1-alpine):
+ * Note: To run this test suite for a specific version (e.g., postgres:17.2-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.1-alpine
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.2-alpine
  *     ./build/sbt -Pdocker-integration-tests
  *     "testOnly org.apache.spark.sql.jdbc.PostgreSQLQueryTestSuite"
  * }}}
  */
 @DockerTest
-class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
+class PostgresSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
 
   val DATABASE_NAME = CrossDbmsQueryTestSuite.POSTGRES
   // Scope to only subquery directory for now.
   protected val customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
 
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:17.1-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
-  }
+  override val db = new PostgresDatabaseOnDocker
 
   override def dataPreparation(conn: Connection): Unit = {
     conn.prepareStatement(
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index 95465cc6e40c5..eaf2a07ed4594 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -22,31 +22,23 @@ import java.sql.Connection
 import org.apache.spark.{SparkConf, SparkSQLException}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.execution.FilterExec
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.DatabaseOnDocker
+import org.apache.spark.sql.jdbc.PostgresDatabaseOnDocker
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:17.1-alpine)
+ * To run this test suite for a specific version (e.g., postgres:17.2-alpine)
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.1-alpine
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.2-alpine
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresIntegrationSuite"
  * }}}
  */
 @DockerTest
 class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   override val catalogName: String = "postgresql"
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:17.1-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
-  }
+  override val db = new PostgresDatabaseOnDocker
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.postgresql", classOf[JDBCTableCatalog].getName)
     .set("spark.sql.catalog.postgresql.url", db.getJdbcUrl(dockerIp, externalPort))
@@ -252,6 +244,15 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
     }
   }
 
+  test("SPARK-49695: Postgres fix xor push-down") {
+    val df = spark.sql(s"select dept, name from $catalogName.employee where dept ^ 6 = 0")
+    val rows = df.collect()
+    assert(!df.queryExecution.sparkPlan.exists(_.isInstanceOf[FilterExec]))
+    assert(rows.length == 1)
+    assert(rows(0).getInt(0) === 6)
+    assert(rows(0).getString(1) === "jen")
+  }
+
   override def testDatetime(tbl: String): Unit = {
     val df1 = sql(s"SELECT name FROM $tbl WHERE " +
       "dayofyear(date1) > 100 AND dayofmonth(date1) > 10 ")
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
index 75f7ede5bc733..f84bdb46850f2 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
@@ -21,29 +21,20 @@ import java.sql.Connection
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.jdbc.{DockerJDBCIntegrationSuite, PostgresDatabaseOnDocker}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:17.1-alpine):
+ * To run this test suite for a specific version (e.g., postgres:17.2-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.1-alpine
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.2-alpine
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresNamespaceSuite"
  * }}}
  */
 @DockerTest
 class PostgresNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:17.1-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
-  }
+  override val db = new PostgresDatabaseOnDocker
 
   val map = new CaseInsensitiveStringMap(
     Map("url" -> db.getJdbcUrl(dockerIp, externalPort),
diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 1d119de43970f..22eeae97874b1 100644
--- a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -1591,22 +1591,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase with
   }
 }
 
-
-class KafkaMicroBatchV1SourceWithAdminSuite extends KafkaMicroBatchV1SourceSuite {
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false")
-  }
-}
-
-class KafkaMicroBatchV2SourceWithAdminSuite extends KafkaMicroBatchV2SourceSuite {
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false")
-  }
-}
-
-class KafkaMicroBatchV1SourceSuite extends KafkaMicroBatchSourceSuiteBase {
+abstract class KafkaMicroBatchV1SourceSuite extends KafkaMicroBatchSourceSuiteBase {
   override def beforeAll(): Unit = {
     super.beforeAll()
     spark.conf.set(
@@ -1637,7 +1622,7 @@ class KafkaMicroBatchV1SourceSuite extends KafkaMicroBatchSourceSuiteBase {
   }
 }
 
-class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
+abstract class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
 
   test("V2 Source is used by default") {
     val topic = newTopic()
@@ -1870,6 +1855,35 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
   }
 }
 
+class KafkaMicroBatchV1SourceWithAdminSuite extends KafkaMicroBatchV1SourceSuite {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false")
+  }
+}
+
+class KafkaMicroBatchV1SourceWithConsumerSuite extends KafkaMicroBatchV1SourceSuite {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "true")
+  }
+}
+
+class KafkaMicroBatchV2SourceWithAdminSuite extends KafkaMicroBatchV2SourceSuite {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false")
+  }
+}
+
+class KafkaMicroBatchV2SourceWithConsumerSuite extends KafkaMicroBatchV2SourceSuite {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "true")
+  }
+}
+
+
 abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
 
   import testImplicits._
diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index 9e06b6c6ff4a2..60de3705636ec 100644
--- a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -504,9 +504,7 @@ class KafkaTestUtils(
       props.put("sasl.enabled.mechanisms", "GSSAPI,SCRAM-SHA-512")
     }
 
-    // Can not use properties.putAll(propsMap.asJava) in scala-2.12
-    // See https://github.com/scala/bug/issues/10418
-    withBrokerProps.foreach { case (k, v) => props.put(k, v) }
+    props.putAll(withBrokerProps.asJava)
     props
   }
 
diff --git a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
index cefaa3de182a5..f7bea064d2d6c 100644
--- a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
+++ b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
@@ -57,7 +57,7 @@ private[spark] class DirectKafkaInputDStream[K, V](
     ppc: PerPartitionConfig
   ) extends InputDStream[ConsumerRecord[K, V]](_ssc) with Logging with CanCommitOffsets {
 
-  private val initialRate = context.sparkContext.getConf.getLong(
+  private val initialRate = context.sparkContext.getReadOnlyConf.getLong(
     "spark.streaming.backpressure.initialRate", 0)
 
   val executorKafkaParams = {
diff --git a/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
index 4835e9de086c4..cc24c378f4cbf 100644
--- a/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
+++ b/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -275,7 +275,7 @@ private[streaming] object StreamingExamples extends Logging {
       // We first log something to initialize Spark's default logging, then we override the
       // logging level.
       logInfo("Setting log level to [WARN] for streaming example." +
-        " To override add a custom log4j.properties to the classpath.")
+        " To override add a custom log4j2.properties to the classpath.")
       Configurator.setRootLevel(Level.WARN)
     }
   }
diff --git a/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index aaafb3215d031..cd740f971e484 100644
--- a/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -27,7 +27,7 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.ShutdownReason
 import com.amazonaws.services.kinesis.model.Record
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKeys.{REASON, RETRY_INTERVAL, SHARD_ID, WORKER_URL}
+import org.apache.spark.internal.LogKeys.{KINESIS_REASON, RETRY_INTERVAL, SHARD_ID, WORKER_URL}
 
 /**
  * Kinesis-specific implementation of the Kinesis Client Library (KCL) IRecordProcessor.
@@ -119,7 +119,7 @@ private[kinesis] class KinesisRecordProcessor[T](receiver: KinesisReceiver[T], w
       checkpointer: IRecordProcessorCheckpointer,
       reason: ShutdownReason): Unit = {
     logInfo(log"Shutdown: Shutting down workerId ${MDC(WORKER_URL, workerId)} " +
-      log"with reason ${MDC(REASON, reason)}")
+      log"with reason ${MDC(KINESIS_REASON, reason)}")
     // null if not initialized before shutdown:
     if (shardId == null) {
       logWarning(log"No shardId for workerId ${MDC(WORKER_URL, workerId)}?")
diff --git a/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala
index 8dc4de1aa3609..7098840d62f91 100644
--- a/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala
+++ b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala
@@ -35,7 +35,7 @@ trait KinesisFunSuite extends SparkFunSuite  {
     }
   }
 
-  /** Run the give body of code only if Kinesis tests are enabled */
+  /** Run the given body of code only if ENABLE_KINESIS_TESTS is 1. */
   def runIfTestsEnabled(message: String)(body: => Unit): Unit = {
     if (shouldRunTests) {
       body
diff --git a/connector/profiler/README.md b/connector/profiler/README.md
index 1326fd55df097..30d897f21b065 100644
--- a/connector/profiler/README.md
+++ b/connector/profiler/README.md
@@ -3,8 +3,15 @@
 ## Build
 
 To build
+
+```
+./build/mvn clean package -DskipTests -Pjvm-profiler -pl :spark-profiler_2.13 -am
+```
+
+or
+
 ```
-  ./build/mvn clean package -DskipTests -Pjvm-profiler
+./build/sbt -Pjvm-profiler clean "profiler/package"
 ```
 
 ## Executor Code Profiling
@@ -16,7 +23,7 @@ The profiler writes the jfr files to the executor's working directory in the exe
 Code profiling is currently only supported for
 
 *   Linux (x64)
-*   Linux (arm 64)
+*   Linux (arm64)
 *   Linux (musl, x64)
 *   MacOS
 
@@ -54,7 +61,7 @@ Then enable the profiling in the configuration.
   <td><code>spark.executor.profiling.dfsDir</code></td>
   <td>(none)</td>
   <td>
-      An HDFS compatible path to which the profiler's output files are copied. The output files will be written as <i>dfsDir/application_id/profile-appname-exec-executor_id.jfr</i> <br/>
+      An HDFS compatible path to which the profiler's output files are copied. The output files will be written as <i>dfsDir/{{APP_ID}}/profile-exec-{{EXECUTOR_ID}}.jfr</i> <br/>
       If no <i>dfsDir</i> is specified then the files are not copied over. Users should ensure there is sufficient disk space available otherwise it may lead to corrupt jfr files.
   </td>
   <td>4.0.0</td>
@@ -72,7 +79,7 @@ Then enable the profiling in the configuration.
   <td>event=wall,interval=10ms,alloc=2m,lock=10ms,chunktime=300s</td>
   <td>
       Options to pass to the profiler. Detailed options are documented in the comments here:
-      <a href="https://github.com/async-profiler/async-profiler/blob/32601bccd9e49adda9510a2ed79d142ac6ef0ff9/src/arguments.cpp#L52">Profiler arguments</a>.  
+      <a href="https://github.com/async-profiler/async-profiler/blob/v3.0/src/arguments.cpp#L44">Profiler arguments</a>.  
        Note that the options to start, stop, specify output format, and output file do not have to be specified.
   </td>
   <td>4.0.0</td>
diff --git a/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala b/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala
index 20b6db5221fa9..94e5b46c65881 100644
--- a/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala
+++ b/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala
@@ -17,17 +17,17 @@
 package org.apache.spark.executor.profiler
 
 import java.io.{BufferedInputStream, FileInputStream, InputStream, IOException}
-import java.net.URI
 import java.util.concurrent.{ScheduledExecutorService, TimeUnit}
 
 import one.profiler.{AsyncProfiler, AsyncProfilerLoader}
 import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path}
+import org.apache.hadoop.fs.permission.FsPermission
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.PATH
-import org.apache.spark.util.ThreadUtils
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 
 /**
@@ -38,15 +38,26 @@ private[spark] class ExecutorJVMProfiler(conf: SparkConf, executorId: String) ex
   private var running = false
   private val enableProfiler = conf.get(EXECUTOR_PROFILING_ENABLED)
   private val profilerOptions = conf.get(EXECUTOR_PROFILING_OPTIONS)
-  private val profilerDfsDir = conf.get(EXECUTOR_PROFILING_DFS_DIR)
+  private val profilerDfsDirOpt = conf.get(EXECUTOR_PROFILING_DFS_DIR)
   private val profilerLocalDir = conf.get(EXECUTOR_PROFILING_LOCAL_DIR)
   private val writeInterval = conf.get(EXECUTOR_PROFILING_WRITE_INTERVAL)
 
-  private val startcmd = s"start,$profilerOptions,file=$profilerLocalDir/profile.jfr"
-  private val stopcmd = s"stop,$profilerOptions,file=$profilerLocalDir/profile.jfr"
-  private val dumpcmd = s"dump,$profilerOptions,file=$profilerLocalDir/profile.jfr"
-  private val resumecmd = s"resume,$profilerOptions,file=$profilerLocalDir/profile.jfr"
+  private val appId = try {
+    conf.getAppId
+  } catch {
+    case _: NoSuchElementException => "local-" + System.currentTimeMillis
+  }
+  private val appAttemptId = conf.getOption("spark.app.attempt.id")
+  private val baseName = Utils.nameForAppAndAttempt(appId, appAttemptId)
+  private val profileFile = s"profile-exec-$executorId.jfr"
+
+  private val startcmd = s"start,$profilerOptions,file=$profilerLocalDir/$profileFile"
+  private val stopcmd = s"stop,$profilerOptions,file=$profilerLocalDir/$profileFile"
+  private val dumpcmd = s"dump,$profilerOptions,file=$profilerLocalDir/$profileFile"
+  private val resumecmd = s"resume,$profilerOptions,file=$profilerLocalDir/$profileFile"
 
+  private val PROFILER_FOLDER_PERMISSIONS = new FsPermission(Integer.parseInt("770", 8).toShort)
+  private val PROFILER_FILE_PERMISSIONS = new FsPermission(Integer.parseInt("660", 8).toShort)
   private val UPLOAD_SIZE = 8 * 1024 * 1024 // 8 MB
   private var outputStream: FSDataOutputStream = _
   private var inputStream: InputStream = _
@@ -89,28 +100,34 @@ private[spark] class ExecutorJVMProfiler(conf: SparkConf, executorId: String) ex
     }
   }
 
+  private def requireProfilerBaseDirAsDirectory(fs: FileSystem, profilerDfsDir: String): Unit = {
+    if (!fs.getFileStatus(new Path(profilerDfsDir)).isDirectory) {
+      throw new IllegalArgumentException(
+        s"Profiler DFS base directory $profilerDfsDir is not a directory.")
+    }
+  }
+
   private def startWriting(): Unit = {
-    if (profilerDfsDir.isDefined) {
-      val applicationId = try {
-        conf.getAppId
-      } catch {
-        case _: NoSuchElementException => "local-" + System.currentTimeMillis
+    profilerDfsDirOpt.foreach { profilerDfsDir =>
+      val profilerDirForApp = s"$profilerDfsDir/$baseName"
+      val profileOutputFile = s"$profilerDirForApp/$profileFile"
+
+      val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
+      val fs = Utils.getHadoopFileSystem(profilerDfsDir, hadoopConf)
+
+      requireProfilerBaseDirAsDirectory(fs, profilerDfsDir)
+
+      val profilerDirForAppPath = new Path(profilerDirForApp)
+      if (!fs.exists(profilerDirForAppPath)) {
+        // SPARK-30860: use the class method to avoid the umask causing permission issues
+        FileSystem.mkdirs(fs, profilerDirForAppPath, PROFILER_FOLDER_PERMISSIONS)
       }
-      val config = SparkHadoopUtil.get.newConfiguration(conf)
-      val appName = conf.get("spark.app.name").replace(" ", "-")
-      val profilerOutputDirname = profilerDfsDir.get
-
-      val profileOutputFile =
-        s"$profilerOutputDirname/$applicationId/profile-$appName-exec-$executorId.jfr"
-      val fs = FileSystem.get(new URI(profileOutputFile), config);
-      val filenamePath = new Path(profileOutputFile)
-      outputStream = fs.create(filenamePath)
+
+      outputStream = FileSystem.create(fs, new Path(profileOutputFile), PROFILER_FILE_PERMISSIONS)
       try {
-        if (fs.exists(filenamePath)) {
-          fs.delete(filenamePath, true)
-        }
         logInfo(log"Copying executor profiling file to ${MDC(PATH, profileOutputFile)}")
-        inputStream = new BufferedInputStream(new FileInputStream(s"$profilerLocalDir/profile.jfr"))
+        inputStream = new BufferedInputStream(
+          new FileInputStream(s"$profilerLocalDir/$profileFile"))
         threadpool = ThreadUtils.newDaemonSingleThreadScheduledExecutor("profilerOutputThread")
         threadpool.scheduleWithFixedDelay(
           new Runnable() {
@@ -158,14 +175,14 @@ private[spark] class ExecutorJVMProfiler(conf: SparkConf, executorId: String) ex
     } catch {
       case e: IOException => logError("Exception occurred while writing some profiler output: ", e)
       case e @ (_: IllegalArgumentException | _: IllegalStateException) =>
-        logError("Some profiler output not written." +
-          " Exception occurred in profiler native code: ", e)
+        logError("Some profiler output not written. " +
+          "Exception occurred in profiler native code: ", e)
       case e: Exception => logError("Some profiler output not written. Unexpected exception: ", e)
     }
   }
 
   private def finishWriting(): Unit = {
-    if (profilerDfsDir.isDefined && writing) {
+    if (profilerDfsDirOpt.isDefined && writing) {
       try {
         // shutdown background writer
         threadpool.shutdown()
@@ -177,8 +194,8 @@ private[spark] class ExecutorJVMProfiler(conf: SparkConf, executorId: String) ex
       } catch {
         case _: InterruptedException => Thread.currentThread().interrupt()
         case e: IOException =>
-          logWarning("Some profiling output not written." +
-            "Exception occurred while completing profiler output", e)
+          logWarning("Some profiling output not written. " +
+            "Exception occurred while completing profiler output: ", e)
       }
       writing = false
     }
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index e85481ef9e1c8..22d24a7cdb62d 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -189,7 +189,7 @@
                   <inputDirectories>
                     <include>src/test/resources/protobuf</include>
                   </inputDirectories>
-                  <includeStdTypes>true</includeStdTypes>
+                  <includeMavenTypes>direct</includeMavenTypes>
                   <outputTargets>
                     <outputTarget>
                       <type>java</type>
diff --git a/core/benchmarks/ChecksumBenchmark-jdk21-results.txt b/core/benchmarks/ChecksumBenchmark-jdk21-results.txt
index 85370450f355c..9e20379abe1f5 100644
--- a/core/benchmarks/ChecksumBenchmark-jdk21-results.txt
+++ b/core/benchmarks/ChecksumBenchmark-jdk21-results.txt
@@ -2,13 +2,12 @@
 Benchmark Checksum Algorithms
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Checksum Algorithms:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-CRC32                                              2743           2746           3          0.0     2678409.9       1.0X
-CRC32C                                             1974           2055          70          0.0     1928129.2       1.4X
-Adler32                                           12689          12709          17          0.0    12391425.9       0.2X
-hadoop PureJavaCrc32C                             23027          23041          13          0.0    22487098.9       0.1X
+Adler32                                           11109          11110           1          0.0    10848227.5       1.0X
+CRC32                                              2740           2748           7          0.0     2676147.3       4.1X
+CRC32C                                             1824           1837          22          0.0     1781283.4       6.1X
 
 
diff --git a/core/benchmarks/ChecksumBenchmark-results.txt b/core/benchmarks/ChecksumBenchmark-results.txt
index cce5a61abf637..5422cabf4b2b7 100644
--- a/core/benchmarks/ChecksumBenchmark-results.txt
+++ b/core/benchmarks/ChecksumBenchmark-results.txt
@@ -2,13 +2,12 @@
 Benchmark Checksum Algorithms
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Checksum Algorithms:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-CRC32                                              2757           2758           1          0.0     2692250.2       1.0X
-CRC32C                                             2142           2244         116          0.0     2091901.8       1.3X
-Adler32                                           12699          12712          15          0.0    12401205.6       0.2X
-hadoop PureJavaCrc32C                             23049          23066          15          0.0    22508320.3       0.1X
+Adler32                                           11113          11117           4          0.0    10852521.2       1.0X
+CRC32                                              2765           2766           1          0.0     2699768.2       4.0X
+CRC32C                                             2003           2033          45          0.0     1955654.6       5.5X
 
 
diff --git a/core/benchmarks/CoalescedRDDBenchmark-jdk21-results.txt b/core/benchmarks/CoalescedRDDBenchmark-jdk21-results.txt
index 1daac7b710bbf..07e8f05a3d185 100644
--- a/core/benchmarks/CoalescedRDDBenchmark-jdk21-results.txt
+++ b/core/benchmarks/CoalescedRDDBenchmark-jdk21-results.txt
@@ -2,39 +2,39 @@
 Coalesced RDD , large scale
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Coalesced RDD:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Coalesce Num Partitions: 100 Num Hosts: 1               359            371          12          0.3        3586.9       1.0X
-Coalesce Num Partitions: 100 Num Hosts: 5               181            190          14          0.6        1812.1       2.0X
-Coalesce Num Partitions: 100 Num Hosts: 10              178            185           6          0.6        1779.9       2.0X
-Coalesce Num Partitions: 100 Num Hosts: 20              153            156           4          0.7        1531.2       2.3X
-Coalesce Num Partitions: 100 Num Hosts: 40              148            149           1          0.7        1479.1       2.4X
-Coalesce Num Partitions: 100 Num Hosts: 80              166            170           5          0.6        1657.8       2.2X
-Coalesce Num Partitions: 500 Num Hosts: 1              1054           1064          14          0.1       10543.7       0.3X
-Coalesce Num Partitions: 500 Num Hosts: 5               331            339          13          0.3        3311.1       1.1X
-Coalesce Num Partitions: 500 Num Hosts: 10              230            235           8          0.4        2295.7       1.6X
-Coalesce Num Partitions: 500 Num Hosts: 20              218            220           1          0.5        2182.0       1.6X
-Coalesce Num Partitions: 500 Num Hosts: 40              161            164           2          0.6        1614.8       2.2X
-Coalesce Num Partitions: 500 Num Hosts: 80              137            142           7          0.7        1371.6       2.6X
-Coalesce Num Partitions: 1000 Num Hosts: 1             1926           1929           3          0.1       19264.6       0.2X
-Coalesce Num Partitions: 1000 Num Hosts: 5              501            507          10          0.2        5011.1       0.7X
-Coalesce Num Partitions: 1000 Num Hosts: 10             327            331           4          0.3        3268.5       1.1X
-Coalesce Num Partitions: 1000 Num Hosts: 20             256            264           8          0.4        2556.1       1.4X
-Coalesce Num Partitions: 1000 Num Hosts: 40             185            191           7          0.5        1853.2       1.9X
-Coalesce Num Partitions: 1000 Num Hosts: 80             160            166           5          0.6        1603.5       2.2X
-Coalesce Num Partitions: 5000 Num Hosts: 1             8672           9054         615          0.0       86716.9       0.0X
-Coalesce Num Partitions: 5000 Num Hosts: 5             2016           2020           6          0.0       20159.9       0.2X
-Coalesce Num Partitions: 5000 Num Hosts: 10            1084           1096          10          0.1       10844.7       0.3X
-Coalesce Num Partitions: 5000 Num Hosts: 20             625            636          11          0.2        6245.6       0.6X
-Coalesce Num Partitions: 5000 Num Hosts: 40             418            425           6          0.2        4182.3       0.9X
-Coalesce Num Partitions: 5000 Num Hosts: 80             270            276           8          0.4        2704.6       1.3X
-Coalesce Num Partitions: 10000 Num Hosts: 1           16208          16391         226          0.0      162076.8       0.0X
-Coalesce Num Partitions: 10000 Num Hosts: 5            3930           3949          23          0.0       39300.4       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 10           2021           2031          11          0.0       20213.1       0.2X
-Coalesce Num Partitions: 10000 Num Hosts: 20           1114           1115           1          0.1       11139.0       0.3X
-Coalesce Num Partitions: 10000 Num Hosts: 40            628            639          17          0.2        6275.3       0.6X
-Coalesce Num Partitions: 10000 Num Hosts: 80            402            408          10          0.2        4016.4       0.9X
+Coalesce Num Partitions: 100 Num Hosts: 1               268            309          37          0.4        2678.2       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5               124            130           5          0.8        1244.1       2.2X
+Coalesce Num Partitions: 100 Num Hosts: 10              107            115           7          0.9        1068.7       2.5X
+Coalesce Num Partitions: 100 Num Hosts: 20              104            112           6          1.0        1044.6       2.6X
+Coalesce Num Partitions: 100 Num Hosts: 40              106            114          10          0.9        1062.8       2.5X
+Coalesce Num Partitions: 100 Num Hosts: 80              101            106           5          1.0        1009.3       2.7X
+Coalesce Num Partitions: 500 Num Hosts: 1               885            915          34          0.1        8854.5       0.3X
+Coalesce Num Partitions: 500 Num Hosts: 5               263            268           5          0.4        2630.1       1.0X
+Coalesce Num Partitions: 500 Num Hosts: 10              181            184           3          0.6        1806.0       1.5X
+Coalesce Num Partitions: 500 Num Hosts: 20              139            144           8          0.7        1387.0       1.9X
+Coalesce Num Partitions: 500 Num Hosts: 40              116            118           2          0.9        1163.7       2.3X
+Coalesce Num Partitions: 500 Num Hosts: 80              108            115           7          0.9        1078.8       2.5X
+Coalesce Num Partitions: 1000 Num Hosts: 1             1683           1735          79          0.1       16828.2       0.2X
+Coalesce Num Partitions: 1000 Num Hosts: 5              446            449           2          0.2        4461.4       0.6X
+Coalesce Num Partitions: 1000 Num Hosts: 10             256            262           7          0.4        2562.3       1.0X
+Coalesce Num Partitions: 1000 Num Hosts: 20             182            189           5          0.5        1824.3       1.5X
+Coalesce Num Partitions: 1000 Num Hosts: 40             141            145           3          0.7        1413.2       1.9X
+Coalesce Num Partitions: 1000 Num Hosts: 80             120            126           8          0.8        1203.3       2.2X
+Coalesce Num Partitions: 5000 Num Hosts: 1             7913           8247         291          0.0       79127.6       0.0X
+Coalesce Num Partitions: 5000 Num Hosts: 5             1818           1846          24          0.1       18177.8       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 10             990            992           2          0.1        9902.6       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 20             543            545           1          0.2        5432.5       0.5X
+Coalesce Num Partitions: 5000 Num Hosts: 40             327            337          11          0.3        3272.6       0.8X
+Coalesce Num Partitions: 5000 Num Hosts: 80             211            218           6          0.5        2112.1       1.3X
+Coalesce Num Partitions: 10000 Num Hosts: 1           14709          15246         580          0.0      147087.6       0.0X
+Coalesce Num Partitions: 10000 Num Hosts: 5            3485           3511          27          0.0       34849.5       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 10           1777           1801          22          0.1       17773.8       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 20            952            953           1          0.1        9517.2       0.3X
+Coalesce Num Partitions: 10000 Num Hosts: 40            523            533           9          0.2        5229.6       0.5X
+Coalesce Num Partitions: 10000 Num Hosts: 80            316            319           5          0.3        3158.4       0.8X
 
 
diff --git a/core/benchmarks/CoalescedRDDBenchmark-results.txt b/core/benchmarks/CoalescedRDDBenchmark-results.txt
index d370e6956116d..520cb5661a276 100644
--- a/core/benchmarks/CoalescedRDDBenchmark-results.txt
+++ b/core/benchmarks/CoalescedRDDBenchmark-results.txt
@@ -2,39 +2,39 @@
 Coalesced RDD , large scale
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Coalesced RDD:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Coalesce Num Partitions: 100 Num Hosts: 1               126            145          24          0.8        1257.7       1.0X
-Coalesce Num Partitions: 100 Num Hosts: 5               104            105           1          1.0        1039.8       1.2X
-Coalesce Num Partitions: 100 Num Hosts: 10               87             89           2          1.1         873.7       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 20               89             90           1          1.1         893.4       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 40               88             91           6          1.1         880.4       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 80               88             94          10          1.1         875.9       1.4X
-Coalesce Num Partitions: 500 Num Hosts: 1               308            314           8          0.3        3078.0       0.4X
-Coalesce Num Partitions: 500 Num Hosts: 5               133            136           2          0.7        1334.6       0.9X
-Coalesce Num Partitions: 500 Num Hosts: 10              112            114           4          0.9        1118.8       1.1X
-Coalesce Num Partitions: 500 Num Hosts: 20              100            111          16          1.0        1004.0       1.3X
-Coalesce Num Partitions: 500 Num Hosts: 40              100            106           6          1.0         999.7       1.3X
-Coalesce Num Partitions: 500 Num Hosts: 80               95            100           6          1.0         954.0       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 1              531            539          14          0.2        5311.0       0.2X
-Coalesce Num Partitions: 1000 Num Hosts: 5              197            201           4          0.5        1970.3       0.6X
-Coalesce Num Partitions: 1000 Num Hosts: 10             139            141           2          0.7        1392.4       0.9X
-Coalesce Num Partitions: 1000 Num Hosts: 20             114            115           1          0.9        1137.9       1.1X
-Coalesce Num Partitions: 1000 Num Hosts: 40             105            108           3          0.9        1054.9       1.2X
-Coalesce Num Partitions: 1000 Num Hosts: 80             105            109           4          1.0        1047.4       1.2X
-Coalesce Num Partitions: 5000 Num Hosts: 1             2336           2354          16          0.0       23362.8       0.1X
-Coalesce Num Partitions: 5000 Num Hosts: 5              680            684           4          0.1        6798.7       0.2X
-Coalesce Num Partitions: 5000 Num Hosts: 10             381            390           8          0.3        3810.5       0.3X
-Coalesce Num Partitions: 5000 Num Hosts: 20             253            255           2          0.4        2529.6       0.5X
-Coalesce Num Partitions: 5000 Num Hosts: 40             171            174           3          0.6        1706.1       0.7X
-Coalesce Num Partitions: 5000 Num Hosts: 80             137            139           3          0.7        1365.5       0.9X
-Coalesce Num Partitions: 10000 Num Hosts: 1            4220           4253          30          0.0       42203.4       0.0X
-Coalesce Num Partitions: 10000 Num Hosts: 5            1377           1394          16          0.1       13769.0       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 10            704            717          12          0.1        7036.4       0.2X
-Coalesce Num Partitions: 10000 Num Hosts: 20            420            422           1          0.2        4201.7       0.3X
-Coalesce Num Partitions: 10000 Num Hosts: 40            267            271           4          0.4        2669.3       0.5X
-Coalesce Num Partitions: 10000 Num Hosts: 80            184            191           6          0.5        1842.2       0.7X
+Coalesce Num Partitions: 100 Num Hosts: 1               128            134           9          0.8        1278.9       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5               102            103           2          1.0        1016.5       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 10               86             87           1          1.2         860.3       1.5X
+Coalesce Num Partitions: 100 Num Hosts: 20               87             92           7          1.1         872.0       1.5X
+Coalesce Num Partitions: 100 Num Hosts: 40               83             86           3          1.2         829.7       1.5X
+Coalesce Num Partitions: 100 Num Hosts: 80               83             84           1          1.2         832.5       1.5X
+Coalesce Num Partitions: 500 Num Hosts: 1               306            306           1          0.3        3055.9       0.4X
+Coalesce Num Partitions: 500 Num Hosts: 5               128            130           2          0.8        1277.2       1.0X
+Coalesce Num Partitions: 500 Num Hosts: 10              106            110           6          0.9        1061.9       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 20               95             96           1          1.1         950.4       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 40               92             94           4          1.1         918.5       1.4X
+Coalesce Num Partitions: 500 Num Hosts: 80               87             88           1          1.1         871.4       1.5X
+Coalesce Num Partitions: 1000 Num Hosts: 1              523            529           9          0.2        5229.5       0.2X
+Coalesce Num Partitions: 1000 Num Hosts: 5              185            189           3          0.5        1853.1       0.7X
+Coalesce Num Partitions: 1000 Num Hosts: 10             128            131           3          0.8        1278.9       1.0X
+Coalesce Num Partitions: 1000 Num Hosts: 20             106            108           3          0.9        1057.8       1.2X
+Coalesce Num Partitions: 1000 Num Hosts: 40              97             97           1          1.0         968.2       1.3X
+Coalesce Num Partitions: 1000 Num Hosts: 80              93             98           8          1.1         931.4       1.4X
+Coalesce Num Partitions: 5000 Num Hosts: 1             2321           2328          11          0.0       23205.2       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 5              674            680           5          0.1        6741.0       0.2X
+Coalesce Num Partitions: 5000 Num Hosts: 10             374            378           7          0.3        3738.4       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 20             232            238           6          0.4        2316.6       0.6X
+Coalesce Num Partitions: 5000 Num Hosts: 40             163            166           3          0.6        1630.1       0.8X
+Coalesce Num Partitions: 5000 Num Hosts: 80             127            129           2          0.8        1274.8       1.0X
+Coalesce Num Partitions: 10000 Num Hosts: 1            4228           4243          18          0.0       42280.5       0.0X
+Coalesce Num Partitions: 10000 Num Hosts: 5            1387           1400          13          0.1       13870.3       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 10            711            714           4          0.1        7105.0       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 20            401            408           7          0.2        4010.4       0.3X
+Coalesce Num Partitions: 10000 Num Hosts: 40            251            253           2          0.4        2513.0       0.5X
+Coalesce Num Partitions: 10000 Num Hosts: 80            175            182           8          0.6        1754.8       0.7X
 
 
diff --git a/core/benchmarks/KryoBenchmark-jdk21-results.txt b/core/benchmarks/KryoBenchmark-jdk21-results.txt
index aee420e8ca26a..4cc1e4dd2ba76 100644
--- a/core/benchmarks/KryoBenchmark-jdk21-results.txt
+++ b/core/benchmarks/KryoBenchmark-jdk21-results.txt
@@ -2,27 +2,27 @@
 Benchmark Kryo Unsafe vs safe Serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-basicTypes: Int with unsafe:true                       174            176           1          5.7         174.3       1.0X
-basicTypes: Long with unsafe:true                      178            184           5          5.6         178.1       1.0X
-basicTypes: Float with unsafe:true                     185            187           1          5.4         185.3       0.9X
-basicTypes: Double with unsafe:true                    187            189           1          5.3         187.0       0.9X
-Array: Int with unsafe:true                              1              1           0        752.1           1.3     131.1X
-Array: Long with unsafe:true                             2              2           0        490.6           2.0      85.5X
-Array: Float with unsafe:true                            1              1           0        757.7           1.3     132.1X
-Array: Double with unsafe:true                           2              2           0        483.9           2.1      84.4X
-Map of string->Double  with unsafe:true                 26             26           2         38.5          26.0       6.7X
-basicTypes: Int with unsafe:false                      206            207           1          4.9         205.7       0.8X
-basicTypes: Long with unsafe:false                     222            223           1          4.5         221.7       0.8X
-basicTypes: Float with unsafe:false                    217            218           1          4.6         216.5       0.8X
-basicTypes: Double with unsafe:false                   217            218           2          4.6         216.6       0.8X
-Array: Int with unsafe:false                            13             13           0         79.5          12.6      13.9X
-Array: Long with unsafe:false                           21             22           0         46.6          21.4       8.1X
-Array: Float with unsafe:false                           6              6           0        167.8           6.0      29.3X
-Array: Double with unsafe:false                         16             16           0         64.2          15.6      11.2X
-Map of string->Double  with unsafe:false                28             28           1         36.3          27.5       6.3X
+basicTypes: Int with unsafe:true                       167            168           1          6.0         167.0       1.0X
+basicTypes: Long with unsafe:true                      174            178           2          5.7         174.1       1.0X
+basicTypes: Float with unsafe:true                     203            204           1          4.9         202.9       0.8X
+basicTypes: Double with unsafe:true                    206            207           1          4.9         206.1       0.8X
+Array: Int with unsafe:true                              1              1           0        768.6           1.3     128.4X
+Array: Long with unsafe:true                             2              2           0        502.0           2.0      83.9X
+Array: Float with unsafe:true                            1              1           0        773.6           1.3     129.2X
+Array: Double with unsafe:true                           2              2           0        492.6           2.0      82.3X
+Map of string->Double  with unsafe:true                 27             27           1         37.5          26.6       6.3X
+basicTypes: Int with unsafe:false                      198            199           1          5.1         197.9       0.8X
+basicTypes: Long with unsafe:false                     217            219           2          4.6         216.8       0.8X
+basicTypes: Float with unsafe:false                    201            203           2          5.0         201.0       0.8X
+basicTypes: Double with unsafe:false                   202            204           1          5.0         201.9       0.8X
+Array: Int with unsafe:false                            13             13           0         79.7          12.5      13.3X
+Array: Long with unsafe:false                           20             21           0         49.1          20.4       8.2X
+Array: Float with unsafe:false                           7              8           0        134.4           7.4      22.5X
+Array: Double with unsafe:false                         11             12           0         87.2          11.5      14.6X
+Map of string->Double  with unsafe:false                28             28           1         36.3          27.5       6.1X
 
 
diff --git a/core/benchmarks/KryoBenchmark-results.txt b/core/benchmarks/KryoBenchmark-results.txt
index ca80b13a5346d..6c46724fbd2e8 100644
--- a/core/benchmarks/KryoBenchmark-results.txt
+++ b/core/benchmarks/KryoBenchmark-results.txt
@@ -2,27 +2,27 @@
 Benchmark Kryo Unsafe vs safe Serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-basicTypes: Int with unsafe:true                       171            173           2          5.9         170.8       1.0X
-basicTypes: Long with unsafe:true                      190            193           3          5.3         189.6       0.9X
-basicTypes: Float with unsafe:true                     186            188           2          5.4         186.1       0.9X
-basicTypes: Double with unsafe:true                    189            190           1          5.3         188.5       0.9X
-Array: Int with unsafe:true                              1              2           0        720.0           1.4     123.0X
-Array: Long with unsafe:true                             2              3           0        462.4           2.2      79.0X
-Array: Float with unsafe:true                            1              2           0        719.2           1.4     122.9X
-Array: Double with unsafe:true                           2              3           0        459.8           2.2      78.5X
-Map of string->Double  with unsafe:true                 27             28           1         37.2          26.9       6.3X
-basicTypes: Int with unsafe:false                      219            220           1          4.6         218.7       0.8X
-basicTypes: Long with unsafe:false                     242            244           2          4.1         242.1       0.7X
-basicTypes: Float with unsafe:false                    215            220          10          4.7         214.8       0.8X
-basicTypes: Double with unsafe:false                   222            224           2          4.5         221.7       0.8X
-Array: Int with unsafe:false                            15             15           0         66.9          15.0      11.4X
-Array: Long with unsafe:false                           22             22           0         45.9          21.8       7.8X
-Array: Float with unsafe:false                           6              6           1        170.1           5.9      29.1X
-Array: Double with unsafe:false                         10             10           0        103.0           9.7      17.6X
-Map of string->Double  with unsafe:false                31             32           2         32.4          30.9       5.5X
+basicTypes: Int with unsafe:true                       167            169           1          6.0         167.5       1.0X
+basicTypes: Long with unsafe:true                      189            195           3          5.3         188.7       0.9X
+basicTypes: Float with unsafe:true                     188            192           5          5.3         187.8       0.9X
+basicTypes: Double with unsafe:true                    190            192           3          5.3         189.7       0.9X
+Array: Int with unsafe:true                              1              1           0        734.5           1.4     123.0X
+Array: Long with unsafe:true                             2              2           0        478.3           2.1      80.1X
+Array: Float with unsafe:true                            1              1           0        736.7           1.4     123.4X
+Array: Double with unsafe:true                           2              2           0        475.5           2.1      79.6X
+Map of string->Double  with unsafe:true                 27             27           0         37.5          26.7       6.3X
+basicTypes: Int with unsafe:false                      210            211           2          4.8         210.0       0.8X
+basicTypes: Long with unsafe:false                     224            225           1          4.5         224.4       0.7X
+basicTypes: Float with unsafe:false                    203            204           1          4.9         203.4       0.8X
+basicTypes: Double with unsafe:false                   210            212           1          4.8         210.0       0.8X
+Array: Int with unsafe:false                            15             15           0         68.2          14.7      11.4X
+Array: Long with unsafe:false                           20             21           0         49.2          20.3       8.2X
+Array: Float with unsafe:false                           6              6           0        167.7           6.0      28.1X
+Array: Double with unsafe:false                         10             10           2         99.3          10.1      16.6X
+Map of string->Double  with unsafe:false                28             29           1         35.1          28.4       5.9X
 
 
diff --git a/core/benchmarks/KryoIteratorBenchmark-jdk21-results.txt b/core/benchmarks/KryoIteratorBenchmark-jdk21-results.txt
index e3922382068dd..36124a13c29d1 100644
--- a/core/benchmarks/KryoIteratorBenchmark-jdk21-results.txt
+++ b/core/benchmarks/KryoIteratorBenchmark-jdk21-results.txt
@@ -2,27 +2,27 @@
 Benchmark of kryo asIterator on deserialization stream
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark of kryo asIterator on deserialization stream:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------
-Colletion of int with 1 elements, useIterator: true                        6              6           0          1.7         590.7       1.0X
-Colletion of int with 10 elements, useIterator: true                      14             15           1          0.7        1431.1       0.4X
-Colletion of int with 100 elements, useIterator: true                     94             96           1          0.1        9429.2       0.1X
-Colletion of string with 1 elements, useIterator: true                     8              8           0          1.3         760.8       0.8X
-Colletion of string with 10 elements, useIterator: true                   22             23           0          0.4        2246.1       0.3X
-Colletion of string with 100 elements, useIterator: true                 167            167           1          0.1       16659.4       0.0X
-Colletion of Array[int] with 1 elements, useIterator: true                 7              8           0          1.4         735.4       0.8X
-Colletion of Array[int] with 10 elements, useIterator: true               20             20           1          0.5        1976.5       0.3X
-Colletion of Array[int] with 100 elements, useIterator: true             154            154           1          0.1       15356.7       0.0X
-Colletion of int with 1 elements, useIterator: false                       6              7           0          1.6         623.6       0.9X
-Colletion of int with 10 elements, useIterator: false                     13             14           0          0.7        1334.3       0.4X
-Colletion of int with 100 elements, useIterator: false                    82             82           0          0.1        8164.4       0.1X
-Colletion of string with 1 elements, useIterator: false                    7              8           0          1.4         727.0       0.8X
-Colletion of string with 10 elements, useIterator: false                  22             22           0          0.5        2166.5       0.3X
-Colletion of string with 100 elements, useIterator: false                159            160           0          0.1       15925.0       0.0X
-Colletion of Array[int] with 1 elements, useIterator: false                7              7           0          1.4         712.1       0.8X
-Colletion of Array[int] with 10 elements, useIterator: false              19             20           0          0.5        1932.3       0.3X
-Colletion of Array[int] with 100 elements, useIterator: false            142            143           1          0.1       14220.2       0.0X
+Colletion of int with 1 elements, useIterator: true                        6              6           0          1.6         625.1       1.0X
+Colletion of int with 10 elements, useIterator: true                      15             15           0          0.7        1466.8       0.4X
+Colletion of int with 100 elements, useIterator: true                     95             96           1          0.1        9536.2       0.1X
+Colletion of string with 1 elements, useIterator: true                     8              8           0          1.3         771.9       0.8X
+Colletion of string with 10 elements, useIterator: true                   23             23           0          0.4        2260.7       0.3X
+Colletion of string with 100 elements, useIterator: true                 162            162           0          0.1       16179.1       0.0X
+Colletion of Array[int] with 1 elements, useIterator: true                 7              8           1          1.4         730.4       0.9X
+Colletion of Array[int] with 10 elements, useIterator: true               20             20           1          0.5        1966.8       0.3X
+Colletion of Array[int] with 100 elements, useIterator: true             146            147           1          0.1       14593.4       0.0X
+Colletion of int with 1 elements, useIterator: false                       6              7           0          1.6         636.6       1.0X
+Colletion of int with 10 elements, useIterator: false                     14             14           0          0.7        1366.4       0.5X
+Colletion of int with 100 elements, useIterator: false                    84             85           1          0.1        8439.6       0.1X
+Colletion of string with 1 elements, useIterator: false                    7              7           0          1.4         725.5       0.9X
+Colletion of string with 10 elements, useIterator: false                  21             22           0          0.5        2115.1       0.3X
+Colletion of string with 100 elements, useIterator: false                173            174           3          0.1       17316.2       0.0X
+Colletion of Array[int] with 1 elements, useIterator: false                7              7           0          1.4         698.9       0.9X
+Colletion of Array[int] with 10 elements, useIterator: false              19             19           0          0.5        1894.5       0.3X
+Colletion of Array[int] with 100 elements, useIterator: false            141            142           1          0.1       14108.1       0.0X
 
 
diff --git a/core/benchmarks/KryoIteratorBenchmark-results.txt b/core/benchmarks/KryoIteratorBenchmark-results.txt
index 77452144ac01d..6c3496909c6b7 100644
--- a/core/benchmarks/KryoIteratorBenchmark-results.txt
+++ b/core/benchmarks/KryoIteratorBenchmark-results.txt
@@ -2,27 +2,27 @@
 Benchmark of kryo asIterator on deserialization stream
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark of kryo asIterator on deserialization stream:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------
-Colletion of int with 1 elements, useIterator: true                        6              6           0          1.6         621.6       1.0X
-Colletion of int with 10 elements, useIterator: true                      14             14           0          0.7        1422.5       0.4X
-Colletion of int with 100 elements, useIterator: true                     90             92           1          0.1        9030.9       0.1X
-Colletion of string with 1 elements, useIterator: true                     7              8           0          1.4         726.3       0.9X
-Colletion of string with 10 elements, useIterator: true                   23             23           1          0.4        2251.0       0.3X
-Colletion of string with 100 elements, useIterator: true                 172            172           0          0.1       17183.3       0.0X
-Colletion of Array[int] with 1 elements, useIterator: true                 7              7           0          1.4         718.6       0.9X
-Colletion of Array[int] with 10 elements, useIterator: true               21             21           1          0.5        2078.4       0.3X
-Colletion of Array[int] with 100 elements, useIterator: true             162            162           0          0.1       16189.6       0.0X
-Colletion of int with 1 elements, useIterator: false                       6              6           0          1.7         599.6       1.0X
-Colletion of int with 10 elements, useIterator: false                     13             13           0          0.8        1313.8       0.5X
-Colletion of int with 100 elements, useIterator: false                    81             82           0          0.1        8132.1       0.1X
-Colletion of string with 1 elements, useIterator: false                    7              7           0          1.4         705.6       0.9X
-Colletion of string with 10 elements, useIterator: false                  22             23           0          0.4        2240.1       0.3X
-Colletion of string with 100 elements, useIterator: false                170            170           0          0.1       16995.3       0.0X
-Colletion of Array[int] with 1 elements, useIterator: false                7              7           0          1.5         675.7       0.9X
-Colletion of Array[int] with 10 elements, useIterator: false              18             19           0          0.5        1842.7       0.3X
-Colletion of Array[int] with 100 elements, useIterator: false            138            139           0          0.1       13801.7       0.0X
+Colletion of int with 1 elements, useIterator: true                        6              7           0          1.5         646.6       1.0X
+Colletion of int with 10 elements, useIterator: true                      14             14           0          0.7        1354.6       0.5X
+Colletion of int with 100 elements, useIterator: true                     82             82           0          0.1        8169.3       0.1X
+Colletion of string with 1 elements, useIterator: true                     8              8           0          1.3         777.8       0.8X
+Colletion of string with 10 elements, useIterator: true                   22             23           1          0.4        2237.2       0.3X
+Colletion of string with 100 elements, useIterator: true                 161            161           1          0.1       16071.4       0.0X
+Colletion of Array[int] with 1 elements, useIterator: true                 7              8           0          1.4         726.7       0.9X
+Colletion of Array[int] with 10 elements, useIterator: true               20             20           0          0.5        1984.8       0.3X
+Colletion of Array[int] with 100 elements, useIterator: true             151            151           0          0.1       15059.2       0.0X
+Colletion of int with 1 elements, useIterator: false                       6              6           0          1.6         609.0       1.1X
+Colletion of int with 10 elements, useIterator: false                     13             14           0          0.8        1322.7       0.5X
+Colletion of int with 100 elements, useIterator: false                    81             82           1          0.1        8138.4       0.1X
+Colletion of string with 1 elements, useIterator: false                    7              8           0          1.4         732.7       0.9X
+Colletion of string with 10 elements, useIterator: false                  23             23           0          0.4        2254.4       0.3X
+Colletion of string with 100 elements, useIterator: false                171            173           4          0.1       17050.7       0.0X
+Colletion of Array[int] with 1 elements, useIterator: false                7              7           0          1.4         705.9       0.9X
+Colletion of Array[int] with 10 elements, useIterator: false              20             20           0          0.5        1974.9       0.3X
+Colletion of Array[int] with 100 elements, useIterator: false            147            148           1          0.1       14730.9       0.0X
 
 
diff --git a/core/benchmarks/KryoSerializerBenchmark-jdk21-results.txt b/core/benchmarks/KryoSerializerBenchmark-jdk21-results.txt
index c00cd9152b278..14649cd560327 100644
--- a/core/benchmarks/KryoSerializerBenchmark-jdk21-results.txt
+++ b/core/benchmarks/KryoSerializerBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 Benchmark KryoPool vs old"pool of 1" implementation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-KryoPool:true                                                 4166           5737        1977          0.0     8331992.4       1.0X
-KryoPool:false                                                6201           7778        1281          0.0    12402118.8       0.7X
+KryoPool:true                                                 3445           5067        1740          0.0     6889852.4       1.0X
+KryoPool:false                                                5594           7457        1439          0.0    11188845.8       0.6X
 
 
diff --git a/core/benchmarks/KryoSerializerBenchmark-results.txt b/core/benchmarks/KryoSerializerBenchmark-results.txt
index a86338957cc37..c08bbfebe993d 100644
--- a/core/benchmarks/KryoSerializerBenchmark-results.txt
+++ b/core/benchmarks/KryoSerializerBenchmark-results.txt
@@ -2,11 +2,11 @@
 Benchmark KryoPool vs old"pool of 1" implementation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-KryoPool:true                                                 3973           5797        1879          0.0     7945107.3       1.0X
-KryoPool:false                                                6041           7623        1484          0.0    12082153.5       0.7X
+KryoPool:true                                                 3409           5129        1620          0.0     6817249.8       1.0X
+KryoPool:false                                                5506           7416        1256          0.0    11011835.6       0.6X
 
 
diff --git a/core/benchmarks/LZFBenchmark-jdk21-results.txt b/core/benchmarks/LZFBenchmark-jdk21-results.txt
index 7104879c5c753..1f39e58139e65 100644
--- a/core/benchmarks/LZFBenchmark-jdk21-results.txt
+++ b/core/benchmarks/LZFBenchmark-jdk21-results.txt
@@ -2,18 +2,18 @@
 Benchmark LZFCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Compress small objects:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Compression 256000000 int values in parallel                599            601           4        427.4           2.3       1.0X
-Compression 256000000 int values single-threaded            608            615           7        420.9           2.4       1.0X
+Compression 256000000 int values in parallel                605            611           5        423.4           2.4       1.0X
+Compression 256000000 int values single-threaded            612            619           5        418.5           2.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Compress large objects:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Compression 1024 array values in 1 threads                37             45           5          0.0       35857.3       1.0X
-Compression 1024 array values single-threaded             31             31           0          0.0       30334.5       1.2X
+Compression 1024 array values in 1 threads                44             48           3          0.0       43323.6       1.0X
+Compression 1024 array values single-threaded             32             32           0          0.0       30772.9       1.4X
 
 
diff --git a/core/benchmarks/LZFBenchmark-results.txt b/core/benchmarks/LZFBenchmark-results.txt
index 142d3aad2f1ba..92d8ba52412df 100644
--- a/core/benchmarks/LZFBenchmark-results.txt
+++ b/core/benchmarks/LZFBenchmark-results.txt
@@ -2,18 +2,18 @@
 Benchmark LZFCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Compress small objects:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Compression 256000000 int values in parallel                601            610           9        426.3           2.3       1.0X
-Compression 256000000 int values single-threaded            610            619           7        419.3           2.4       1.0X
+Compression 256000000 int values in parallel                598            608           7        428.2           2.3       1.0X
+Compression 256000000 int values single-threaded            615            623           6        416.1           2.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Compress large objects:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Compression 1024 array values in 1 threads                35             44           5          0.0       34512.1       1.0X
-Compression 1024 array values single-threaded             31             32           1          0.0       30396.7       1.1X
+Compression 1024 array values in 1 threads                39             46           5          0.0       37798.8       1.0X
+Compression 1024 array values single-threaded             31             32           0          0.0       29960.9       1.3X
 
 
diff --git a/core/benchmarks/MapStatusesConvertBenchmark-jdk21-results.txt b/core/benchmarks/MapStatusesConvertBenchmark-jdk21-results.txt
index 7c1b1eb4ac803..123a40fad3e62 100644
--- a/core/benchmarks/MapStatusesConvertBenchmark-jdk21-results.txt
+++ b/core/benchmarks/MapStatusesConvertBenchmark-jdk21-results.txt
@@ -2,12 +2,12 @@
 MapStatuses Convert Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 MapStatuses Convert:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Num Maps: 50000 Fetch partitions:500                696            705          13          0.0   696430567.0       1.0X
-Num Maps: 50000 Fetch partitions:1000              1620           1628           7          0.0  1620094001.0       0.4X
-Num Maps: 50000 Fetch partitions:1500              2507           2522          13          0.0  2507485825.0       0.3X
+Num Maps: 50000 Fetch partitions:500                716            730          14          0.0   715747604.0       1.0X
+Num Maps: 50000 Fetch partitions:1000              1592           1619          29          0.0  1591519021.0       0.4X
+Num Maps: 50000 Fetch partitions:1500              2500           2507           7          0.0  2499934291.0       0.3X
 
 
diff --git a/core/benchmarks/MapStatusesConvertBenchmark-results.txt b/core/benchmarks/MapStatusesConvertBenchmark-results.txt
index 4ca2e502b9404..ef390204bb0af 100644
--- a/core/benchmarks/MapStatusesConvertBenchmark-results.txt
+++ b/core/benchmarks/MapStatusesConvertBenchmark-results.txt
@@ -2,12 +2,12 @@
 MapStatuses Convert Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 MapStatuses Convert:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Num Maps: 50000 Fetch partitions:500                775            782           8          0.0   774584162.0       1.0X
-Num Maps: 50000 Fetch partitions:1000              1605           1634          29          0.0  1604801022.0       0.5X
-Num Maps: 50000 Fetch partitions:1500              2568           2585          22          0.0  2568404459.0       0.3X
+Num Maps: 50000 Fetch partitions:500                612            614           3          0.0   611543498.0       1.0X
+Num Maps: 50000 Fetch partitions:1000              1389           1398          15          0.0  1388971632.0       0.4X
+Num Maps: 50000 Fetch partitions:1500              2178           2222          39          0.0  2177711722.0       0.3X
 
 
diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk21-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk21-results.txt
index 1ffe7594c22cd..708bf8f770d40 100644
--- a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk21-results.txt
+++ b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk21-results.txt
@@ -1,64 +1,64 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 10 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Serialization                                         81             85           4          2.5         406.5       1.0X
-Deserialization                                      147            155           9          1.4         734.2       0.6X
+Serialization                                         84             87           3          2.4         422.4       1.0X
+Deserialization                                      143            151           6          1.4         712.6       0.6X
 
 Compressed Serialized MapStatus sizes: 426.0 B
 Compressed Serialized Broadcast MapStatus sizes: 2.5 MiB
 
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 10 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                          73             75           3          2.7         365.9       1.0X
-Deserialization                                       146            153          10          1.4         732.1       0.5X
+Serialization                                          83             85           2          2.4         414.3       1.0X
+Deserialization                                       141            145           7          1.4         703.0       0.6X
 
 Compressed Serialized MapStatus sizes: 2.5 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
 
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 100 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                         152            157           6          1.3         759.6       1.0X
-Deserialization                                       162            166           4          1.2         811.8       0.9X
+Serialization                                         154            160           9          1.3         770.3       1.0X
+Deserialization                                       158            164          10          1.3         788.4       1.0X
 
 Compressed Serialized MapStatus sizes: 442.0 B
 Compressed Serialized Broadcast MapStatus sizes: 13.6 MiB
 
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 100 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          141            142           1          1.4         703.4       1.0X
-Deserialization                                        161            165           5          1.2         807.0       0.9X
+Serialization                                          145            146           1          1.4         724.1       1.0X
+Deserialization                                        158            162           7          1.3         790.2       0.9X
 
 Compressed Serialized MapStatus sizes: 13.6 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
 
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 1000 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          686            714          31          0.3        3431.6       1.0X
-Deserialization                                        329            346          18          0.6        1645.6       2.1X
+Serialization                                          693            722          42          0.3        3463.5       1.0X
+Deserialization                                        330            357          25          0.6        1648.3       2.1X
 
-Compressed Serialized MapStatus sizes: 569.0 B
+Compressed Serialized MapStatus sizes: 568.0 B
 Compressed Serialized Broadcast MapStatus sizes: 122.3 MiB
 
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 1000 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Serialization                                           560            562           3          0.4        2797.7       1.0X
-Deserialization                                         317            334          20          0.6        1587.0       1.8X
+Serialization                                           569            573           3          0.4        2845.7       1.0X
+Deserialization                                         330            350          17          0.6        1647.8       1.7X
 
 Compressed Serialized MapStatus sizes: 122.3 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
index edd6207a12f8b..6e69a91cbafdb 100644
--- a/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
+++ b/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
@@ -1,64 +1,64 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 10 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Serialization                                         93             99           8          2.1         466.1       1.0X
-Deserialization                                      140            151          12          1.4         698.8       0.7X
+Serialization                                         88             92           3          2.3         442.0       1.0X
+Deserialization                                      138            143          10          1.5         688.2       0.6X
 
 Compressed Serialized MapStatus sizes: 426.0 B
 Compressed Serialized Broadcast MapStatus sizes: 2.5 MiB
 
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 10 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                          82             83           1          2.4         409.0       1.0X
-Deserialization                                       139            142           8          1.4         692.8       0.6X
+Serialization                                          77             79           1          2.6         386.9       1.0X
+Deserialization                                       137            140           5          1.5         685.8       0.6X
 
 Compressed Serialized MapStatus sizes: 2.5 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
 
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 100 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                         161            168           7          1.2         802.6       1.0X
-Deserialization                                       155            169          13          1.3         777.3       1.0X
+Serialization                                         159            160           1          1.3         793.9       1.0X
+Deserialization                                       154            160           9          1.3         770.9       1.0X
 
 Compressed Serialized MapStatus sizes: 442.0 B
 Compressed Serialized Broadcast MapStatus sizes: 13.6 MiB
 
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 100 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          145            147           1          1.4         726.2       1.0X
-Deserialization                                        155            160          10          1.3         772.7       0.9X
+Serialization                                          146            147           1          1.4         730.8       1.0X
+Deserialization                                        154            157           3          1.3         772.4       0.9X
 
 Compressed Serialized MapStatus sizes: 13.6 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
 
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 1000 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          693            714          23          0.3        3465.9       1.0X
-Deserialization                                        326            351          13          0.6        1628.7       2.1X
+Serialization                                          697            702           9          0.3        3483.3       1.0X
+Deserialization                                        317            323           7          0.6        1583.0       2.2X
 
-Compressed Serialized MapStatus sizes: 568.0 B
+Compressed Serialized MapStatus sizes: 569.0 B
 Compressed Serialized Broadcast MapStatus sizes: 122.3 MiB
 
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 1000 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Serialization                                           564            576          13          0.4        2817.6       1.0X
-Deserialization                                         339            354           9          0.6        1694.4       1.7X
+Serialization                                           568            577           7          0.4        2842.2       1.0X
+Deserialization                                         308            316           7          0.6        1540.4       1.8X
 
 Compressed Serialized MapStatus sizes: 122.3 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
diff --git a/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt b/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt
index 7262ea63a6ef9..c91af8730b49c 100644
--- a/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt
+++ b/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt
@@ -2,17 +2,17 @@
 PersistenceEngineBenchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 1000 Workers:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-ZooKeeperPersistenceEngine with JavaSerializer                     5620           5811         236          0.0     5619587.2       1.0X
-FileSystemPersistenceEngine with JavaSerializer                    2876           2924          42          0.0     2876068.8       2.0X
-FileSystemPersistenceEngine with JavaSerializer (lz4)               825            829           4          0.0      824880.6       6.8X
-FileSystemPersistenceEngine with JavaSerializer (lzf)               742            774          39          0.0      742492.3       7.6X
-FileSystemPersistenceEngine with JavaSerializer (snappy)            785            832          42          0.0      784738.0       7.2X
-FileSystemPersistenceEngine with JavaSerializer (zstd)              966            982          14          0.0      965925.8       5.8X
-RocksDBPersistenceEngine with JavaSerializer                        299            301           2          0.0      299470.1      18.8X
-BlackHolePersistenceEngine                                            0              0           0          6.0         166.6   33740.5X
+ZooKeeperPersistenceEngine with JavaSerializer                     7133           7390         257          0.0     7132665.6       1.0X
+FileSystemPersistenceEngine with JavaSerializer                    2449           2470          22          0.0     2448714.2       2.9X
+FileSystemPersistenceEngine with JavaSerializer (lz4)               784            805          19          0.0      783603.9       9.1X
+FileSystemPersistenceEngine with JavaSerializer (lzf)               719            763          52          0.0      719310.0       9.9X
+FileSystemPersistenceEngine with JavaSerializer (snappy)            731            765          42          0.0      731346.6       9.8X
+FileSystemPersistenceEngine with JavaSerializer (zstd)              920            971          52          0.0      919508.6       7.8X
+RocksDBPersistenceEngine with JavaSerializer                        283            284           1          0.0      282641.0      25.2X
+BlackHolePersistenceEngine                                            0              0           0          6.0         167.4   42612.8X
 
 
diff --git a/core/benchmarks/PersistenceEngineBenchmark-results.txt b/core/benchmarks/PersistenceEngineBenchmark-results.txt
index c373d88842d2e..14ca05abad071 100644
--- a/core/benchmarks/PersistenceEngineBenchmark-results.txt
+++ b/core/benchmarks/PersistenceEngineBenchmark-results.txt
@@ -2,17 +2,17 @@
 PersistenceEngineBenchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 1000 Workers:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-ZooKeeperPersistenceEngine with JavaSerializer                     6146           6314         215          0.0     6146007.1       1.0X
-FileSystemPersistenceEngine with JavaSerializer                    2944           2957          17          0.0     2944099.7       2.1X
-FileSystemPersistenceEngine with JavaSerializer (lz4)               827            869          37          0.0      827379.0       7.4X
-FileSystemPersistenceEngine with JavaSerializer (lzf)               799            826          25          0.0      799318.1       7.7X
-FileSystemPersistenceEngine with JavaSerializer (snappy)            775            805          50          0.0      774802.8       7.9X
-FileSystemPersistenceEngine with JavaSerializer (zstd)              972           1002          28          0.0      971773.9       6.3X
-RocksDBPersistenceEngine with JavaSerializer                        310            312           3          0.0      310401.2      19.8X
-BlackHolePersistenceEngine                                            0              0           0          6.0         165.7   37097.7X
+ZooKeeperPersistenceEngine with JavaSerializer                     6582           6738         184          0.0     6581975.7       1.0X
+FileSystemPersistenceEngine with JavaSerializer                    2493           2507          12          0.0     2492854.1       2.6X
+FileSystemPersistenceEngine with JavaSerializer (lz4)               784            827          40          0.0      783848.3       8.4X
+FileSystemPersistenceEngine with JavaSerializer (lzf)               755            774          17          0.0      755155.3       8.7X
+FileSystemPersistenceEngine with JavaSerializer (snappy)            739            786          49          0.0      739163.8       8.9X
+FileSystemPersistenceEngine with JavaSerializer (zstd)              956            988          33          0.0      955958.8       6.9X
+RocksDBPersistenceEngine with JavaSerializer                        290            295           7          0.0      289554.4      22.7X
+BlackHolePersistenceEngine                                            0              0           0          6.2         161.8   40674.2X
 
 
diff --git a/core/benchmarks/PropertiesCloneBenchmark-jdk21-results.txt b/core/benchmarks/PropertiesCloneBenchmark-jdk21-results.txt
index ccae104413f6a..cfff77298896a 100644
--- a/core/benchmarks/PropertiesCloneBenchmark-jdk21-results.txt
+++ b/core/benchmarks/PropertiesCloneBenchmark-jdk21-results.txt
@@ -2,39 +2,39 @@
 Properties Cloning
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.3        3296.0       1.0X
-Utils.cloneProperties                                 0              0           0         34.5          29.0     113.7X
+SerializationUtils.clone                              0              0           0          0.3        3146.0       1.0X
+Utils.cloneProperties                                 0              0           0         11.2          89.0      35.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      156331.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.4        2595.0      60.2X
+SerializationUtils.clone                              0              0           0          0.0      158717.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.2        4819.0      32.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      255154.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.4        2624.0      97.2X
+SerializationUtils.clone                              0              0           0          0.0      241783.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.2        6051.0      40.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              1              1           0          0.0      887239.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.1       14908.0      59.5X
+SerializationUtils.clone                              1              1           0          0.0      853297.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       30927.0      27.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              2              2           0          0.0     1655264.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       30837.0      53.7X
+SerializationUtils.clone                              2              2           0          0.0     1598481.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       63448.0      25.2X
 
 
diff --git a/core/benchmarks/PropertiesCloneBenchmark-results.txt b/core/benchmarks/PropertiesCloneBenchmark-results.txt
index f6c6c8781dc25..cceb3e8710dee 100644
--- a/core/benchmarks/PropertiesCloneBenchmark-results.txt
+++ b/core/benchmarks/PropertiesCloneBenchmark-results.txt
@@ -2,39 +2,39 @@
 Properties Cloning
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.3        3466.0       1.0X
-Utils.cloneProperties                                 0              0           0         34.5          29.0     119.5X
+SerializationUtils.clone                              0              0           0          0.3        3186.0       1.0X
+Utils.cloneProperties                                 0              0           0         11.1          90.0      35.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      156422.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.4        2685.0      58.3X
+SerializationUtils.clone                              0              0           0          0.0      175435.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.2        4247.0      41.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      277017.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.3        3666.0      75.6X
+SerializationUtils.clone                              0              0           0          0.0      255744.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.1        7273.0      35.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              1              1           0          0.0      920141.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       20097.0      45.8X
+SerializationUtils.clone                              1              1           0          0.0      863683.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       36508.0      23.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              2              2           0          0.0     1714798.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       40385.0      42.5X
+SerializationUtils.clone                              2              2           0          0.0     1612893.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       73617.0      21.9X
 
 
diff --git a/core/benchmarks/XORShiftRandomBenchmark-jdk21-results.txt b/core/benchmarks/XORShiftRandomBenchmark-jdk21-results.txt
index 9f2baa5d9bf80..4b892b7ea2c85 100644
--- a/core/benchmarks/XORShiftRandomBenchmark-jdk21-results.txt
+++ b/core/benchmarks/XORShiftRandomBenchmark-jdk21-results.txt
@@ -2,43 +2,43 @@
 Pseudo random
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    451            451           0        222.0           4.5       1.0X
-XORShiftRandom                                      185            185           0        539.4           1.9       2.4X
+java.util.Random                                    453            453           0        220.7           4.5       1.0X
+XORShiftRandom                                      186            186           0        536.6           1.9       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    897            897           0        111.5           9.0       1.0X
-XORShiftRandom                                      371            371           0        269.5           3.7       2.4X
+java.util.Random                                    900            901           1        111.1           9.0       1.0X
+XORShiftRandom                                      373            373           1        268.1           3.7       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    914            914           0        109.4           9.1       1.0X
-XORShiftRandom                                      371            371           1        269.5           3.7       2.5X
+java.util.Random                                    905            905           0        110.5           9.0       1.0X
+XORShiftRandom                                      373            373           0        268.2           3.7       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   3381           3384           3         29.6          33.8       1.0X
-XORShiftRandom                                     2480           2498          29         40.3          24.8       1.4X
+java.util.Random                                   3412           3427          13         29.3          34.1       1.0X
+XORShiftRandom                                     2469           2472           4         40.5          24.7       1.4X
 
 
 ================================================================================================
 hash seed
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XORShiftRandom.hashSeed                               1              1           0      12973.9           0.1       1.0X
+XORShiftRandom.hashSeed                               1              1           0      12522.5           0.1       1.0X
 
 
diff --git a/core/benchmarks/XORShiftRandomBenchmark-results.txt b/core/benchmarks/XORShiftRandomBenchmark-results.txt
index de5f7c04fddfc..c45a3c66afafa 100644
--- a/core/benchmarks/XORShiftRandomBenchmark-results.txt
+++ b/core/benchmarks/XORShiftRandomBenchmark-results.txt
@@ -2,43 +2,43 @@
 Pseudo random
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    439            439           0        227.7           4.4       1.0X
-XORShiftRandom                                      185            185           0        539.5           1.9       2.4X
+java.util.Random                                    441            441           0        226.6           4.4       1.0X
+XORShiftRandom                                      186            186           0        536.7           1.9       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    911            912           1        109.8           9.1       1.0X
-XORShiftRandom                                      371            371           1        269.7           3.7       2.5X
+java.util.Random                                    918            918           1        109.0           9.2       1.0X
+XORShiftRandom                                      373            373           0        268.2           3.7       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    904            904           0        110.6           9.0       1.0X
-XORShiftRandom                                      371            371           0        269.7           3.7       2.4X
+java.util.Random                                    904            905           1        110.7           9.0       1.0X
+XORShiftRandom                                      373            374           1        268.2           3.7       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   3997           3998           1         25.0          40.0       1.0X
-XORShiftRandom                                     2926           2929           2         34.2          29.3       1.4X
+java.util.Random                                   3590           3600          10         27.9          35.9       1.0X
+XORShiftRandom                                     2941           2942           1         34.0          29.4       1.2X
 
 
 ================================================================================================
 hash seed
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XORShiftRandom.hashSeed                               2              2           0       6487.7           0.2       1.0X
+XORShiftRandom.hashSeed                               2              2           0       6458.4           0.2       1.0X
 
 
diff --git a/core/benchmarks/ZStandardBenchmark-jdk21-results.txt b/core/benchmarks/ZStandardBenchmark-jdk21-results.txt
index f6bd681451d5e..b2a325942cd88 100644
--- a/core/benchmarks/ZStandardBenchmark-jdk21-results.txt
+++ b/core/benchmarks/ZStandardBenchmark-jdk21-results.txt
@@ -2,48 +2,48 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.8.0-1014-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool            659            676          16          0.0       65860.7       1.0X
-Compression 10000 times at level 2 without buffer pool            721            723           2          0.0       72135.5       0.9X
-Compression 10000 times at level 3 without buffer pool            815            816           1          0.0       81500.6       0.8X
-Compression 10000 times at level 1 with buffer pool               608            609           0          0.0       60846.6       1.1X
-Compression 10000 times at level 2 with buffer pool               645            647           3          0.0       64476.3       1.0X
-Compression 10000 times at level 3 with buffer pool               746            746           1          0.0       74584.0       0.9X
+Compression 10000 times at level 1 without buffer pool            656            668          13          0.0       65555.1       1.0X
+Compression 10000 times at level 2 without buffer pool            711            713           2          0.0       71147.9       0.9X
+Compression 10000 times at level 3 without buffer pool            827            830           2          0.0       82718.7       0.8X
+Compression 10000 times at level 1 with buffer pool               598            599           2          0.0       59789.9       1.1X
+Compression 10000 times at level 2 with buffer pool               628            630           2          0.0       62774.0       1.0X
+Compression 10000 times at level 3 with buffer pool               735            736           1          0.0       73517.1       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.8.0-1014-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool            828            829           1          0.0       82822.6       1.0X
-Decompression 10000 times from level 2 without buffer pool            829            829           1          0.0       82900.7       1.0X
-Decompression 10000 times from level 3 without buffer pool            828            833           8          0.0       82784.4       1.0X
-Decompression 10000 times from level 1 with buffer pool               758            760           2          0.0       75756.5       1.1X
-Decompression 10000 times from level 2 with buffer pool               758            758           1          0.0       75772.3       1.1X
-Decompression 10000 times from level 3 with buffer pool               759            759           0          0.0       75852.7       1.1X
+Decompression 10000 times from level 1 without buffer pool            823            824           1          0.0       82271.8       1.0X
+Decompression 10000 times from level 2 without buffer pool            823            825           2          0.0       82313.0       1.0X
+Decompression 10000 times from level 3 without buffer pool            825            832          10          0.0       82532.5       1.0X
+Decompression 10000 times from level 1 with buffer pool               756            757           2          0.0       75593.4       1.1X
+Decompression 10000 times from level 2 with buffer pool               757            759           2          0.0       75728.2       1.1X
+Decompression 10000 times from level 3 with buffer pool               760            760           0          0.0       75986.2       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.8.0-1014-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 3:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                  58             59           1          0.0      452489.9       1.0X
-Parallel Compression with 1 workers                  42             45           4          0.0      330066.0       1.4X
-Parallel Compression with 2 workers                  40             42           1          0.0      312560.3       1.4X
-Parallel Compression with 4 workers                  40             42           2          0.0      308802.7       1.5X
-Parallel Compression with 8 workers                  41             45           3          0.0      321331.3       1.4X
-Parallel Compression with 16 workers                 44             45           1          0.0      343311.5       1.3X
+Parallel Compression with 0 workers                  58             60           4          0.0      456002.7       1.0X
+Parallel Compression with 1 workers                  43             45           3          0.0      332797.0       1.4X
+Parallel Compression with 2 workers                  41             42           1          0.0      317101.0       1.4X
+Parallel Compression with 4 workers                  39             41           1          0.0      306350.9       1.5X
+Parallel Compression with 8 workers                  42             44           1          0.0      326335.3       1.4X
+Parallel Compression with 16 workers                 46             47           1          0.0      356789.8       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.8.0-1014-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 9:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                 158            160           2          0.0     1234257.6       1.0X
-Parallel Compression with 1 workers                 193            194           1          0.0     1507686.4       0.8X
-Parallel Compression with 2 workers                 113            127          11          0.0      881068.0       1.4X
-Parallel Compression with 4 workers                 109            111           2          0.0      849241.3       1.5X
-Parallel Compression with 8 workers                 111            115           3          0.0      869455.2       1.4X
-Parallel Compression with 16 workers                113            116           2          0.0      881832.5       1.4X
+Parallel Compression with 0 workers                 158            160           1          0.0     1237762.1       1.0X
+Parallel Compression with 1 workers                 189            190           3          0.0     1473899.5       0.8X
+Parallel Compression with 2 workers                 112            120           9          0.0      874992.3       1.4X
+Parallel Compression with 4 workers                 108            112           3          0.0      846156.6       1.5X
+Parallel Compression with 8 workers                 113            117           3          0.0      886576.8       1.4X
+Parallel Compression with 16 workers                113            116           2          0.0      881278.0       1.4X
 
 
diff --git a/core/benchmarks/ZStandardBenchmark-results.txt b/core/benchmarks/ZStandardBenchmark-results.txt
index 136f0333590cc..0cd02cc48963a 100644
--- a/core/benchmarks/ZStandardBenchmark-results.txt
+++ b/core/benchmarks/ZStandardBenchmark-results.txt
@@ -2,48 +2,48 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.8.0-1014-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool            257            259           2          0.0       25704.2       1.0X
-Compression 10000 times at level 2 without buffer pool            674            676           2          0.0       67396.3       0.4X
-Compression 10000 times at level 3 without buffer pool            775            787          11          0.0       77497.9       0.3X
-Compression 10000 times at level 1 with buffer pool               573            574           0          0.0       57347.3       0.4X
-Compression 10000 times at level 2 with buffer pool               602            603           2          0.0       60162.8       0.4X
-Compression 10000 times at level 3 with buffer pool               722            725           3          0.0       72247.3       0.4X
+Compression 10000 times at level 1 without buffer pool            263            405         194          0.0       26293.9       1.0X
+Compression 10000 times at level 2 without buffer pool            693            694           1          0.0       69337.7       0.4X
+Compression 10000 times at level 3 without buffer pool            805            809           4          0.0       80511.1       0.3X
+Compression 10000 times at level 1 with buffer pool               576            577           2          0.0       57572.6       0.5X
+Compression 10000 times at level 2 with buffer pool               611            612           1          0.0       61149.9       0.4X
+Compression 10000 times at level 3 with buffer pool               730            731           1          0.0       73001.9       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.8.0-1014-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool            176            177           1          0.1       17641.2       1.0X
-Decompression 10000 times from level 2 without buffer pool            176            178           1          0.1       17628.9       1.0X
-Decompression 10000 times from level 3 without buffer pool            175            176           0          0.1       17506.1       1.0X
-Decompression 10000 times from level 1 with buffer pool               151            152           1          0.1       15051.5       1.2X
-Decompression 10000 times from level 2 with buffer pool               150            151           1          0.1       14998.0       1.2X
-Decompression 10000 times from level 3 with buffer pool               150            151           0          0.1       15019.4       1.2X
+Decompression 10000 times from level 1 without buffer pool            616            616           1          0.0       61555.7       1.0X
+Decompression 10000 times from level 2 without buffer pool            617            618           1          0.0       61746.1       1.0X
+Decompression 10000 times from level 3 without buffer pool            614            615           1          0.0       61402.4       1.0X
+Decompression 10000 times from level 1 with buffer pool               541            542           1          0.0       54078.9       1.1X
+Decompression 10000 times from level 2 with buffer pool               541            542           1          0.0       54094.5       1.1X
+Decompression 10000 times from level 3 with buffer pool               540            541           1          0.0       54049.5       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.8.0-1014-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 3:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                  57             57           0          0.0      444425.2       1.0X
-Parallel Compression with 1 workers                  42             44           3          0.0      325107.6       1.4X
-Parallel Compression with 2 workers                  38             39           2          0.0      294840.0       1.5X
-Parallel Compression with 4 workers                  36             37           1          0.0      282143.1       1.6X
-Parallel Compression with 8 workers                  39             40           1          0.0      303793.6       1.5X
-Parallel Compression with 16 workers                 41             43           1          0.0      324165.5       1.4X
+Parallel Compression with 0 workers                  57             58           1          0.0      442501.6       1.0X
+Parallel Compression with 1 workers                  42             44           3          0.0      325787.4       1.4X
+Parallel Compression with 2 workers                  38             40           2          0.0      295047.1       1.5X
+Parallel Compression with 4 workers                  37             38           1          0.0      285755.4       1.5X
+Parallel Compression with 8 workers                  39             40           1          0.0      301689.5       1.5X
+Parallel Compression with 16 workers                 42             44           1          0.0      327951.9       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.8.0-1014-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 9:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                 156            158           1          0.0     1220298.8       1.0X
-Parallel Compression with 1 workers                 188            189           1          0.0     1467911.4       0.8X
-Parallel Compression with 2 workers                 111            118           7          0.0      866985.2       1.4X
-Parallel Compression with 4 workers                 106            109           2          0.0      827592.1       1.5X
-Parallel Compression with 8 workers                 114            116           2          0.0      888419.5       1.4X
-Parallel Compression with 16 workers                111            115           2          0.0      868463.5       1.4X
+Parallel Compression with 0 workers                 155            158           1          0.0     1213931.6       1.0X
+Parallel Compression with 1 workers                 189            191           2          0.0     1475730.7       0.8X
+Parallel Compression with 2 workers                 112            117           5          0.0      878455.7       1.4X
+Parallel Compression with 4 workers                 107            110           3          0.0      834762.2       1.5X
+Parallel Compression with 8 workers                 113            116           2          0.0      886435.5       1.4X
+Parallel Compression with 16 workers                110            115           3          0.0      859182.0       1.4X
 
 
diff --git a/core/pom.xml b/core/pom.xml
index 7805a3f37ae53..79563c246ec4b 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -426,7 +426,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.10.9.7</version>
+      <version>0.10.9.9</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
index f96513f1b1097..de3c41a4b526b 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -165,7 +165,7 @@ private void writeSortedFile(boolean isFinalFile) {
         MDC.of(LogKeys.TASK_ATTEMPT_ID$.MODULE$, taskContext.taskAttemptId()),
         MDC.of(LogKeys.THREAD_ID$.MODULE$, Thread.currentThread().getId()),
         MDC.of(LogKeys.MEMORY_SIZE$.MODULE$, Utils.bytesToString(getMemoryUsage())),
-        MDC.of(LogKeys.NUM_SPILL_INFOS$.MODULE$, spills.size()),
+        MDC.of(LogKeys.NUM_SPILLS$.MODULE$, spills.size()),
         MDC.of(LogKeys.SPILL_TIMES$.MODULE$, spills.size() != 1 ? "times" : "time"));
     }
 
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
index ca7c1f8ba65e2..bf9b230446b26 100755
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -361,6 +361,10 @@ a.downloadbutton {
   width: 170px;
 }
 
+.shuffle-write-time-checkbox-div {
+  width: 155px; 
+} 
+
 .result-serialization-time-checkbox-div {
   width: 185px;
 }
diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
index c8d6000cd6282..b5c6033bd9da4 100644
--- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark
 
+import java.io.Closeable
 import java.util.{Properties, TimerTask}
 import java.util.concurrent.{ScheduledThreadPoolExecutor, TimeUnit}
 
@@ -62,7 +63,7 @@ class BarrierTaskContext private[spark] (
       log"for ${MDC(TOTAL_TIME, System.currentTimeMillis() - st)} ms,")
     logInfo(log"Task ${MDC(TASK_ATTEMPT_ID, taskAttemptId())}" +
       log" from Stage ${MDC(STAGE_ID, stageId())}" +
-      log"(Attempt ${MDC(STAGE_ATTEMPT, stageAttemptNumber())}) " +
+      log"(Attempt ${MDC(STAGE_ATTEMPT_ID, stageAttemptNumber())}) " +
       msg + waitMsg +
       log" current barrier epoch is ${MDC(BARRIER_EPOCH, barrierEpoch)}.")
   }
@@ -273,6 +274,18 @@ class BarrierTaskContext private[spark] (
   }
 
   override private[spark] def getLocalProperties: Properties = taskContext.getLocalProperties
+
+  override private[spark] def interruptible(): Boolean = taskContext.interruptible()
+
+  override private[spark] def pendingInterrupt(threadToInterrupt: Option[Thread], reason: String)
+  : Unit = {
+    taskContext.pendingInterrupt(threadToInterrupt, reason)
+  }
+
+  override private[spark] def createResourceUninterruptibly[T <: Closeable](resourceBuilder: => T)
+  : T = {
+    taskContext.createResourceUninterruptibly(resourceBuilder)
+  }
 }
 
 @Experimental
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 1fe02eec3a072..dd131e443135f 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -885,7 +885,7 @@ private[spark] class ExecutorAllocationManager(
         } else {
           logWarning(log"Should have exactly one resource profile for stage " +
             log"${MDC(STAGE_ATTEMPT, stageAttempt)}, but have " +
-            log"${MDC(RESOURCE_PROFILE_ID, rpForStage)}")
+            log"${MDC(RESOURCE_PROFILE_IDS, rpForStage)}")
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index cfb514913694b..ae6ef1ee55608 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -35,6 +35,228 @@ import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.Utils
 
+trait ReadOnlySparkConf {
+  /** Get a parameter; throws a NoSuchElementException if it's not set */
+  def get(key: String): String = {
+    getOption(key).getOrElse(throw new NoSuchElementException(key))
+  }
+
+  /** Get a parameter, falling back to a default if not set */
+  def get(key: String, defaultValue: String): String = {
+    getOption(key).getOrElse(defaultValue)
+  }
+
+  /**
+   * Retrieves the value of a pre-defined configuration entry.
+   *
+   * - This is an internal Spark API.
+   * - The return type if defined by the configuration entry.
+   * - This will throw an exception is the config is not optional and the value is not set.
+   */
+  private[spark] def get[T](entry: ConfigEntry[T]): T
+
+  /**
+   * Get a time parameter as seconds; throws a NoSuchElementException if it's not set. If no
+   * suffix is provided then seconds are assumed.
+   *
+   * @throws java.util.NoSuchElementException If the time parameter is not set
+   * @throws NumberFormatException            If the value cannot be interpreted as seconds
+   */
+  def getTimeAsSeconds(key: String): Long = catchIllegalValue(key) {
+    Utils.timeStringAsSeconds(get(key))
+  }
+
+  /**
+   * Get a time parameter as seconds, falling back to a default if not set. If no
+   * suffix is provided then seconds are assumed.
+   *
+   * @throws NumberFormatException If the value cannot be interpreted as seconds
+   */
+  def getTimeAsSeconds(key: String, defaultValue: String): Long = catchIllegalValue(key) {
+    Utils.timeStringAsSeconds(get(key, defaultValue))
+  }
+
+  /**
+   * Get a time parameter as milliseconds; throws a NoSuchElementException if it's not set. If no
+   * suffix is provided then milliseconds are assumed.
+   *
+   * @throws java.util.NoSuchElementException If the time parameter is not set
+   * @throws NumberFormatException            If the value cannot be interpreted as milliseconds
+   */
+  def getTimeAsMs(key: String): Long = catchIllegalValue(key) {
+    Utils.timeStringAsMs(get(key))
+  }
+
+  /**
+   * Get a time parameter as milliseconds, falling back to a default if not set. If no
+   * suffix is provided then milliseconds are assumed.
+   *
+   * @throws NumberFormatException If the value cannot be interpreted as milliseconds
+   */
+  def getTimeAsMs(key: String, defaultValue: String): Long = catchIllegalValue(key) {
+    Utils.timeStringAsMs(get(key, defaultValue))
+  }
+
+  /**
+   * Get a size parameter as bytes; throws a NoSuchElementException if it's not set. If no
+   * suffix is provided then bytes are assumed.
+   *
+   * @throws java.util.NoSuchElementException If the size parameter is not set
+   * @throws NumberFormatException            If the value cannot be interpreted as bytes
+   */
+  def getSizeAsBytes(key: String): Long = catchIllegalValue(key) {
+    Utils.byteStringAsBytes(get(key))
+  }
+
+  /**
+   * Get a size parameter as bytes, falling back to a default if not set. If no
+   * suffix is provided then bytes are assumed.
+   *
+   * @throws NumberFormatException If the value cannot be interpreted as bytes
+   */
+  def getSizeAsBytes(key: String, defaultValue: String): Long = catchIllegalValue(key) {
+    Utils.byteStringAsBytes(get(key, defaultValue))
+  }
+
+  /**
+   * Get a size parameter as bytes, falling back to a default if not set.
+   *
+   * @throws NumberFormatException If the value cannot be interpreted as bytes
+   */
+  def getSizeAsBytes(key: String, defaultValue: Long): Long = catchIllegalValue(key) {
+    Utils.byteStringAsBytes(get(key, s"${defaultValue}B"))
+  }
+
+  /**
+   * Get a size parameter as Kibibytes; throws a NoSuchElementException if it's not set. If no
+   * suffix is provided then Kibibytes are assumed.
+   *
+   * @throws java.util.NoSuchElementException If the size parameter is not set
+   * @throws NumberFormatException            If the value cannot be interpreted as Kibibytes
+   */
+  def getSizeAsKb(key: String): Long = catchIllegalValue(key) {
+    Utils.byteStringAsKb(get(key))
+  }
+
+  /**
+   * Get a size parameter as Kibibytes, falling back to a default if not set. If no
+   * suffix is provided then Kibibytes are assumed.
+   *
+   * @throws NumberFormatException If the value cannot be interpreted as Kibibytes
+   */
+  def getSizeAsKb(key: String, defaultValue: String): Long = catchIllegalValue(key) {
+    Utils.byteStringAsKb(get(key, defaultValue))
+  }
+
+  /**
+   * Get a size parameter as Mebibytes; throws a NoSuchElementException if it's not set. If no
+   * suffix is provided then Mebibytes are assumed.
+   *
+   * @throws java.util.NoSuchElementException If the size parameter is not set
+   * @throws NumberFormatException            If the value cannot be interpreted as Mebibytes
+   */
+  def getSizeAsMb(key: String): Long = catchIllegalValue(key) {
+    Utils.byteStringAsMb(get(key))
+  }
+
+  /**
+   * Get a size parameter as Mebibytes, falling back to a default if not set. If no
+   * suffix is provided then Mebibytes are assumed.
+   *
+   * @throws NumberFormatException If the value cannot be interpreted as Mebibytes
+   */
+  def getSizeAsMb(key: String, defaultValue: String): Long = catchIllegalValue(key) {
+    Utils.byteStringAsMb(get(key, defaultValue))
+  }
+
+  /**
+   * Get a size parameter as Gibibytes; throws a NoSuchElementException if it's not set. If no
+   * suffix is provided then Gibibytes are assumed.
+   *
+   * @throws java.util.NoSuchElementException If the size parameter is not set
+   * @throws NumberFormatException            If the value cannot be interpreted as Gibibytes
+   */
+  def getSizeAsGb(key: String): Long = catchIllegalValue(key) {
+    Utils.byteStringAsGb(get(key))
+  }
+
+  /**
+   * Get a size parameter as Gibibytes, falling back to a default if not set. If no
+   * suffix is provided then Gibibytes are assumed.
+   *
+   * @throws NumberFormatException If the value cannot be interpreted as Gibibytes
+   */
+  def getSizeAsGb(key: String, defaultValue: String): Long = catchIllegalValue(key) {
+    Utils.byteStringAsGb(get(key, defaultValue))
+  }
+
+  /** Get a parameter as an Option */
+  def getOption(key: String): Option[String]
+
+  /** Get all parameters as a list of pairs */
+  def getAll: Array[(String, String)]
+
+  /**
+   * Get a parameter as an integer, falling back to a default if not set
+   *
+   * @throws NumberFormatException If the value cannot be interpreted as an integer
+   */
+  def getInt(key: String, defaultValue: Int): Int = catchIllegalValue(key) {
+    getOption(key).map(_.toInt).getOrElse(defaultValue)
+  }
+
+  /**
+   * Get a parameter as a long, falling back to a default if not set
+   *
+   * @throws NumberFormatException If the value cannot be interpreted as a long
+   */
+  def getLong(key: String, defaultValue: Long): Long = catchIllegalValue(key) {
+    getOption(key).map(_.toLong).getOrElse(defaultValue)
+  }
+
+  /**
+   * Get a parameter as a double, falling back to a default if not ste
+   *
+   * @throws NumberFormatException If the value cannot be interpreted as a double
+   */
+  def getDouble(key: String, defaultValue: Double): Double = catchIllegalValue(key) {
+    getOption(key).map(_.toDouble).getOrElse(defaultValue)
+  }
+
+  /**
+   * Get a parameter as a boolean, falling back to a default if not set
+   *
+   * @throws IllegalArgumentException If the value cannot be interpreted as a boolean
+   */
+  def getBoolean(key: String, defaultValue: Boolean): Boolean = catchIllegalValue(key) {
+    getOption(key).map(_.toBoolean).getOrElse(defaultValue)
+  }
+
+  /** Does the configuration contain a given parameter? */
+  def contains(key: String): Boolean
+
+  /** Does the configuration have the typed config entry? */
+  def contains(entry: ConfigEntry[_]): Boolean = contains(entry.key)
+
+  /**
+   * Wrapper method for get() methods which require some specific value format. This catches
+   * any [[NumberFormatException]] or [[IllegalArgumentException]] and re-raises it with the
+   * incorrectly configured key in the exception message.
+   */
+  protected def catchIllegalValue[T](key: String)(getValue: => T): T = {
+    try {
+      getValue
+    } catch {
+      case e: NumberFormatException =>
+        // NumberFormatException doesn't have a constructor that takes a cause for some reason.
+        throw new NumberFormatException(s"Illegal value for config key $key: ${e.getMessage}")
+          .initCause(e)
+      case e: IllegalArgumentException =>
+        throw new IllegalArgumentException(s"Illegal value for config key $key: ${e.getMessage}", e)
+    }
+  }
+}
+
 /**
  * Configuration for a Spark application. Used to set various Spark parameters as key-value pairs.
  *
@@ -53,7 +275,11 @@ import org.apache.spark.util.Utils
  * @note Once a SparkConf object is passed to Spark, it is cloned and can no longer be modified
  * by the user. Spark does not support modifying the configuration at runtime.
  */
-class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Serializable {
+class SparkConf(loadDefaults: Boolean)
+    extends ReadOnlySparkConf
+    with Cloneable
+    with Logging
+    with Serializable {
 
   import SparkConf._
 
@@ -242,16 +468,6 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
     remove(entry.key)
   }
 
-  /** Get a parameter; throws a NoSuchElementException if it's not set */
-  def get(key: String): String = {
-    getOption(key).getOrElse(throw new NoSuchElementException(key))
-  }
-
-  /** Get a parameter, falling back to a default if not set */
-  def get(key: String, defaultValue: String): String = {
-    getOption(key).getOrElse(defaultValue)
-  }
-
   /**
    * Retrieves the value of a pre-defined configuration entry.
    *
@@ -263,128 +479,6 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
     entry.readFrom(reader)
   }
 
-  /**
-   * Get a time parameter as seconds; throws a NoSuchElementException if it's not set. If no
-   * suffix is provided then seconds are assumed.
-   * @throws java.util.NoSuchElementException If the time parameter is not set
-   * @throws NumberFormatException If the value cannot be interpreted as seconds
-   */
-  def getTimeAsSeconds(key: String): Long = catchIllegalValue(key) {
-    Utils.timeStringAsSeconds(get(key))
-  }
-
-  /**
-   * Get a time parameter as seconds, falling back to a default if not set. If no
-   * suffix is provided then seconds are assumed.
-   * @throws NumberFormatException If the value cannot be interpreted as seconds
-   */
-  def getTimeAsSeconds(key: String, defaultValue: String): Long = catchIllegalValue(key) {
-    Utils.timeStringAsSeconds(get(key, defaultValue))
-  }
-
-  /**
-   * Get a time parameter as milliseconds; throws a NoSuchElementException if it's not set. If no
-   * suffix is provided then milliseconds are assumed.
-   * @throws java.util.NoSuchElementException If the time parameter is not set
-   * @throws NumberFormatException If the value cannot be interpreted as milliseconds
-   */
-  def getTimeAsMs(key: String): Long = catchIllegalValue(key) {
-    Utils.timeStringAsMs(get(key))
-  }
-
-  /**
-   * Get a time parameter as milliseconds, falling back to a default if not set. If no
-   * suffix is provided then milliseconds are assumed.
-   * @throws NumberFormatException If the value cannot be interpreted as milliseconds
-   */
-  def getTimeAsMs(key: String, defaultValue: String): Long = catchIllegalValue(key) {
-    Utils.timeStringAsMs(get(key, defaultValue))
-  }
-
-  /**
-   * Get a size parameter as bytes; throws a NoSuchElementException if it's not set. If no
-   * suffix is provided then bytes are assumed.
-   * @throws java.util.NoSuchElementException If the size parameter is not set
-   * @throws NumberFormatException If the value cannot be interpreted as bytes
-   */
-  def getSizeAsBytes(key: String): Long = catchIllegalValue(key) {
-    Utils.byteStringAsBytes(get(key))
-  }
-
-  /**
-   * Get a size parameter as bytes, falling back to a default if not set. If no
-   * suffix is provided then bytes are assumed.
-   * @throws NumberFormatException If the value cannot be interpreted as bytes
-   */
-  def getSizeAsBytes(key: String, defaultValue: String): Long = catchIllegalValue(key) {
-    Utils.byteStringAsBytes(get(key, defaultValue))
-  }
-
-  /**
-   * Get a size parameter as bytes, falling back to a default if not set.
-   * @throws NumberFormatException If the value cannot be interpreted as bytes
-   */
-  def getSizeAsBytes(key: String, defaultValue: Long): Long = catchIllegalValue(key) {
-    Utils.byteStringAsBytes(get(key, s"${defaultValue}B"))
-  }
-
-  /**
-   * Get a size parameter as Kibibytes; throws a NoSuchElementException if it's not set. If no
-   * suffix is provided then Kibibytes are assumed.
-   * @throws java.util.NoSuchElementException If the size parameter is not set
-   * @throws NumberFormatException If the value cannot be interpreted as Kibibytes
-   */
-  def getSizeAsKb(key: String): Long = catchIllegalValue(key) {
-    Utils.byteStringAsKb(get(key))
-  }
-
-  /**
-   * Get a size parameter as Kibibytes, falling back to a default if not set. If no
-   * suffix is provided then Kibibytes are assumed.
-   * @throws NumberFormatException If the value cannot be interpreted as Kibibytes
-   */
-  def getSizeAsKb(key: String, defaultValue: String): Long = catchIllegalValue(key) {
-    Utils.byteStringAsKb(get(key, defaultValue))
-  }
-
-  /**
-   * Get a size parameter as Mebibytes; throws a NoSuchElementException if it's not set. If no
-   * suffix is provided then Mebibytes are assumed.
-   * @throws java.util.NoSuchElementException If the size parameter is not set
-   * @throws NumberFormatException If the value cannot be interpreted as Mebibytes
-   */
-  def getSizeAsMb(key: String): Long = catchIllegalValue(key) {
-    Utils.byteStringAsMb(get(key))
-  }
-
-  /**
-   * Get a size parameter as Mebibytes, falling back to a default if not set. If no
-   * suffix is provided then Mebibytes are assumed.
-   * @throws NumberFormatException If the value cannot be interpreted as Mebibytes
-   */
-  def getSizeAsMb(key: String, defaultValue: String): Long = catchIllegalValue(key) {
-    Utils.byteStringAsMb(get(key, defaultValue))
-  }
-
-  /**
-   * Get a size parameter as Gibibytes; throws a NoSuchElementException if it's not set. If no
-   * suffix is provided then Gibibytes are assumed.
-   * @throws java.util.NoSuchElementException If the size parameter is not set
-   * @throws NumberFormatException If the value cannot be interpreted as Gibibytes
-   */
-  def getSizeAsGb(key: String): Long = catchIllegalValue(key) {
-    Utils.byteStringAsGb(get(key))
-  }
-
-  /**
-   * Get a size parameter as Gibibytes, falling back to a default if not set. If no
-   * suffix is provided then Gibibytes are assumed.
-   * @throws NumberFormatException If the value cannot be interpreted as Gibibytes
-   */
-  def getSizeAsGb(key: String, defaultValue: String): Long = catchIllegalValue(key) {
-    Utils.byteStringAsGb(get(key, defaultValue))
-  }
-
   /** Get a parameter as an Option */
   def getOption(key: String): Option[String] = {
     Option(settings.get(key)).orElse(getDeprecatedConfig(key, settings))
@@ -408,38 +502,6 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       .map { case (k, v) => (k.substring(prefix.length), v) }
   }
 
-  /**
-   * Get a parameter as an integer, falling back to a default if not set
-   * @throws NumberFormatException If the value cannot be interpreted as an integer
-   */
-  def getInt(key: String, defaultValue: Int): Int = catchIllegalValue(key) {
-    getOption(key).map(_.toInt).getOrElse(defaultValue)
-  }
-
-  /**
-   * Get a parameter as a long, falling back to a default if not set
-   * @throws NumberFormatException If the value cannot be interpreted as a long
-   */
-  def getLong(key: String, defaultValue: Long): Long = catchIllegalValue(key) {
-    getOption(key).map(_.toLong).getOrElse(defaultValue)
-  }
-
-  /**
-   * Get a parameter as a double, falling back to a default if not ste
-   * @throws NumberFormatException If the value cannot be interpreted as a double
-   */
-  def getDouble(key: String, defaultValue: Double): Double = catchIllegalValue(key) {
-    getOption(key).map(_.toDouble).getOrElse(defaultValue)
-  }
-
-  /**
-   * Get a parameter as a boolean, falling back to a default if not set
-   * @throws IllegalArgumentException If the value cannot be interpreted as a boolean
-   */
-  def getBoolean(key: String, defaultValue: Boolean): Boolean = catchIllegalValue(key) {
-    getOption(key).map(_.toBoolean).getOrElse(defaultValue)
-  }
-
   /** Get all executor environment variables set on this SparkConf */
   def getExecutorEnv: Seq[(String, String)] = {
     getAllWithPrefix("spark.executorEnv.").toImmutableArraySeq
@@ -457,8 +519,6 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       configsWithAlternatives.get(key).toSeq.flatten.exists { alt => contains(alt.key) }
   }
 
-  private[spark] def contains(entry: ConfigEntry[_]): Boolean = contains(entry.key)
-
   /** Copy this object */
   override def clone: SparkConf = {
     val cloned = new SparkConf(false)
@@ -474,25 +534,6 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
    */
   private[spark] def getenv(name: String): String = System.getenv(name)
 
-  /**
-   * Wrapper method for get() methods which require some specific value format. This catches
-   * any [[NumberFormatException]] or [[IllegalArgumentException]] and re-raises it with the
-   * incorrectly configured key in the exception message.
-   */
-  private def catchIllegalValue[T](key: String)(getValue: => T): T = {
-    try {
-      getValue
-    } catch {
-      case e: NumberFormatException =>
-        // NumberFormatException doesn't have a constructor that takes a cause for some reason.
-        throw new NumberFormatException(s"Illegal value for config key $key: ${e.getMessage}")
-            .initCause(e)
-      case e: IllegalArgumentException =>
-        throw new IllegalArgumentException(s"Illegal value for config key $key: ${e.getMessage}", e)
-    }
-  }
-
-
   /**
    * Checks for illegal or deprecated config settings. Throws an exception for the former. Not
    * idempotent - may mutate this conf object to convert deprecated settings to supported ones.
@@ -608,6 +649,7 @@ private[spark] object SparkConf extends Logging {
         "Please use spark.kryoserializer.buffer instead. The default value for " +
           "spark.kryoserializer.buffer.mb was previously specified as '0.064'. Fractional values " +
           "are no longer accepted. To specify the equivalent now, one may use '64k'."),
+      DeprecatedConfig("spark.shuffle.spill", "1.6", "Not used anymore."),
       DeprecatedConfig("spark.rpc", "2.0", "Not used anymore."),
       DeprecatedConfig("spark.scheduler.executorTaskBlacklistTime", "2.1.0",
         "Please use the new excludedOnFailure options, spark.excludeOnFailure.*"),
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 042179d86c31a..30d772bd62d77 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -252,6 +252,9 @@ class SparkContext(config: SparkConf) extends Logging {
 
   private[spark] def conf: SparkConf = _conf
 
+  /** Get a read-only reference to the spark conf. This is preferred version over [[getConf]]. */
+  def getReadOnlyConf: ReadOnlySparkConf = _conf
+
   /**
    * Return a copy of this SparkContext's configuration. The configuration ''cannot'' be
    * changed at runtime.
@@ -420,9 +423,6 @@ class SparkContext(config: SparkConf) extends Logging {
     if (!_conf.contains("spark.app.name")) {
       throw new SparkException("An application name must be set in your configuration")
     }
-    // HADOOP-19097 Set fs.s3a.connection.establish.timeout to 30s
-    // We can remove this after Apache Hadoop 3.4.1 releases
-    conf.setIfMissing("spark.hadoop.fs.s3a.connection.establish.timeout", "30000")
     // This should be set as early as possible.
     SparkContext.fillMissingMagicCommitterConfsIfNeeded(_conf)
 
@@ -1878,6 +1878,7 @@ class SparkContext(config: SparkConf) extends Logging {
         if (uri.getFragment != null) uri.getFragment else source.getName)
       logInfo(
         log"Unpacking an archive ${MDC(LogKeys.PATH, path)}" +
+          log" (${MDC(LogKeys.BYTE_SIZE, source.length)} bytes)" +
           log" from ${MDC(LogKeys.SOURCE_PATH, source.getAbsolutePath)}" +
           log" to ${MDC(LogKeys.DESTINATION_PATH, dest.getAbsolutePath)}")
       Utils.deleteRecursively(dest)
diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
index 15ddd08fb4aef..5384fd86a8f19 100644
--- a/core/src/main/scala/org/apache/spark/TaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark
 
-import java.io.Serializable
+import java.io.Closeable
 import java.util.Properties
 
 import org.apache.spark.annotation.{DeveloperApi, Evolving, Since}
@@ -305,4 +305,24 @@ abstract class TaskContext extends Serializable {
 
   /** Gets local properties set upstream in the driver. */
   private[spark] def getLocalProperties: Properties
+
+  /** Whether the current task is allowed to interrupt. */
+  private[spark] def interruptible(): Boolean
+
+  /**
+   * Pending the interruption request until the task is able to
+   * interrupt after creating the resource uninterruptibly.
+   */
+  private[spark] def pendingInterrupt(threadToInterrupt: Option[Thread], reason: String): Unit
+
+  /**
+   * Creating a closeable resource uninterruptibly. A task is not allowed to interrupt in this
+   * state until the resource creation finishes. E.g.,
+   * {{{
+   *  val linesReader = TaskContext.get().createResourceUninterruptibly {
+   *    new HadoopFileLinesReader(file, parser.options.lineSeparatorInRead, conf)
+   *  }
+   * }}}
+   */
+  private[spark] def createResourceUninterruptibly[T <: Closeable](resourceBuilder: => T): T
 }
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
index 8167952d6b87f..f0e844289b9db 100644
--- a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark
 
+import java.io.Closeable
 import java.util.{Properties, Stack}
 import javax.annotation.concurrent.GuardedBy
 
@@ -82,6 +83,13 @@ private[spark] class TaskContextImpl(
   // If defined, the corresponding task has been killed and this option contains the reason.
   @volatile private var reasonIfKilled: Option[String] = None
 
+  // The pending interruption request, which is blocked by uninterruptible resource creation.
+  // Should be protected by `TaskContext.synchronized`.
+  private var pendingInterruptRequest: Option[(Option[Thread], String)] = None
+
+  // Whether this task is able to be interrupted. Should be protected by `TaskContext.synchronized`.
+  private var _interruptible = true
+
   // Whether the task has completed.
   private var completed: Boolean = false
 
@@ -296,4 +304,39 @@ private[spark] class TaskContextImpl(
   private[spark] override def fetchFailed: Option[FetchFailedException] = _fetchFailedException
 
   private[spark] override def getLocalProperties: Properties = localProperties
+
+
+  override def interruptible(): Boolean = TaskContext.synchronized(_interruptible)
+
+  override def pendingInterrupt(threadToInterrupt: Option[Thread], reason: String): Unit = {
+    TaskContext.synchronized {
+      pendingInterruptRequest = Some((threadToInterrupt, reason))
+    }
+  }
+
+  def createResourceUninterruptibly[T <: Closeable](resourceBuilder: => T): T = {
+
+    @inline def interruptIfRequired(): Unit = {
+      pendingInterruptRequest.foreach { case (threadToInterrupt, reason) =>
+        markInterrupted(reason)
+        threadToInterrupt.foreach(_.interrupt())
+      }
+      killTaskIfInterrupted()
+    }
+
+    TaskContext.synchronized {
+      interruptIfRequired()
+      _interruptible = false
+    }
+    try {
+      val resource = resourceBuilder
+      addTaskCompletionListener[Unit](_ => resource.close())
+      resource
+    } finally {
+      TaskContext.synchronized {
+        _interruptible = true
+        interruptIfRequired()
+      }
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 9de350bc3130f..7311ef296363d 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -630,6 +630,9 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
    */
   def getConf: SparkConf = sc.getConf
 
+  /** Return a read-only version of the spark conf. */
+  def getReadOnlyConf: ReadOnlySparkConf = sc.getReadOnlyConf
+
   /**
    * Pass-through to SparkContext.setCallSite.  For API support only.
    */
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index b7fb22bab844a..e3d10574419b3 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -109,7 +109,8 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
     protected val funcs: Seq[ChainedPythonFunctions],
     protected val evalType: Int,
     protected val argOffsets: Array[Array[Int]],
-    protected val jobArtifactUUID: Option[String])
+    protected val jobArtifactUUID: Option[String],
+    protected val metrics: Map[String, AccumulatorV2[Long, Long]])
   extends Logging {
 
   require(funcs.length == argOffsets.length, "argOffsets should have the same length as funcs")
@@ -128,6 +129,8 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
   protected val pythonExec: String = funcs.head.funcs.head.pythonExec
   protected val pythonVer: String = funcs.head.funcs.head.pythonVer
 
+  protected val batchSizeForPythonUDF: Int = 100
+
   // WARN: Both configurations, 'spark.python.daemon.module' and 'spark.python.worker.module' are
   // for very advanced users and they are experimental. This should be considered
   // as expert-only option, and shouldn't be used before knowing what it means exactly.
@@ -211,6 +214,8 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
     if (faultHandlerEnabled) {
       envVars.put("PYTHON_FAULTHANDLER_DIR", BasePythonRunner.faultHandlerLogDir.toString)
     }
+    // allow the user to set the batch size for the BatchedSerializer on UDFs
+    envVars.put("PYTHON_UDF_BATCH_SIZE", batchSizeForPythonUDF.toString)
 
     envVars.put("SPARK_JOB_ARTIFACT_UUID", jobArtifactUUID.getOrElse("default"))
 
@@ -522,6 +527,9 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
         log"boot = ${MDC(LogKeys.BOOT_TIME, boot)}, " +
         log"init = ${MDC(LogKeys.INIT_TIME, init)}, " +
         log"finish = ${MDC(LogKeys.FINISH_TIME, finish)}")
+      metrics.get("pythonBootTime").foreach(_.add(boot))
+      metrics.get("pythonInitTime").foreach(_.add(init))
+      metrics.get("pythonTotalTime").foreach(_.add(total))
       val memoryBytesSpilled = stream.readLong()
       val diskBytesSpilled = stream.readLong()
       context.taskMetrics().incMemoryBytesSpilled(memoryBytesSpilled)
@@ -824,7 +832,7 @@ private[spark] object PythonRunner {
 private[spark] class PythonRunner(
     funcs: Seq[ChainedPythonFunctions], jobArtifactUUID: Option[String])
   extends BasePythonRunner[Array[Byte], Array[Byte]](
-    funcs, PythonEvalType.NON_UDF, Array(Array(0)), jobArtifactUUID) {
+    funcs, PythonEvalType.NON_UDF, Array(Array(0)), jobArtifactUUID, Map.empty) {
 
   protected override def newWriter(
       env: SparkEnv,
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 045ed0e4c01cb..816ceea327aae 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -34,7 +34,7 @@ import org.apache.spark.util.ArrayImplicits.SparkArrayOps
 import org.apache.spark.util.Utils
 
 private[spark] object PythonUtils extends Logging {
-  val PY4J_ZIP_NAME = "py4j-0.10.9.7-src.zip"
+  val PY4J_ZIP_NAME = "py4j-0.10.9.9-src.zip"
 
   /** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */
   def sparkPythonPath: String = {
diff --git a/core/src/main/scala/org/apache/spark/api/python/StreamingPythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/StreamingPythonRunner.scala
index 0ff2b79ab6623..ce933337afc35 100644
--- a/core/src/main/scala/org/apache/spark/api/python/StreamingPythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/StreamingPythonRunner.scala
@@ -48,10 +48,10 @@ private[spark] class StreamingPythonRunner(
   protected val bufferSize: Int = conf.get(BUFFER_SIZE)
   protected val authSocketTimeout = conf.get(PYTHON_AUTH_SOCKET_TIMEOUT)
 
-  private val envVars: java.util.Map[String, String] = func.envVars
-  private val pythonExec: String = func.pythonExec
-  private var pythonWorker: Option[PythonWorker] = None
-  private var pythonWorkerFactory: Option[PythonWorkerFactory] = None
+  protected val envVars: java.util.Map[String, String] = func.envVars
+  protected val pythonExec: String = func.pythonExec
+  protected var pythonWorker: Option[PythonWorker] = None
+  protected var pythonWorkerFactory: Option[PythonWorkerFactory] = None
   protected val pythonVer: String = func.pythonVer
 
   /**
@@ -68,7 +68,9 @@ private[spark] class StreamingPythonRunner(
 
     envVars.put("SPARK_AUTH_SOCKET_TIMEOUT", authSocketTimeout.toString)
     envVars.put("SPARK_BUFFER_SIZE", bufferSize.toString)
-    envVars.put("SPARK_CONNECT_LOCAL_URL", connectUrl)
+    if (!connectUrl.isEmpty) {
+      envVars.put("SPARK_CONNECT_LOCAL_URL", connectUrl)
+    }
 
     val workerFactory =
       new PythonWorkerFactory(pythonExec, workerModule, envVars.asScala.toMap, false)
@@ -83,7 +85,9 @@ private[spark] class StreamingPythonRunner(
     PythonWorkerUtils.writePythonVersion(pythonVer, dataOut)
 
     // Send sessionId
-    PythonRDD.writeUTF(sessionId, dataOut)
+    if (!sessionId.isEmpty) {
+      PythonRDD.writeUTF(sessionId, dataOut)
+    }
 
     // Send the user function to python process
     PythonWorkerUtils.writePythonFunction(func, dataOut)
diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
index cb5996a5097d2..12e031711aa2a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -205,7 +205,6 @@ private[deploy] object DeployMessages {
 
   case class RegisteredApplication(appId: String, master: RpcEndpointRef) extends DeployMessage
 
-  // TODO(matei): replace hostPort with host
   case class ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: Int, memory: Int) {
     Utils.checkHostPort(hostPort)
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
index 6e0fe69f3bfb6..8caf67ff4680b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
@@ -24,7 +24,7 @@ import scala.jdk.CollectionConverters._
 import com.codahale.metrics.{Counter, MetricRegistry, Timer}
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache, RemovalListener, RemovalNotification}
 import com.google.common.util.concurrent.UncheckedExecutionException
-import jakarta.servlet.{DispatcherType, Filter, FilterChain, FilterConfig, ServletException, ServletRequest, ServletResponse}
+import jakarta.servlet.{DispatcherType, Filter, FilterChain, ServletException, ServletRequest, ServletResponse}
 import jakarta.servlet.http.{HttpServletRequest, HttpServletResponse}
 import org.eclipse.jetty.servlet.FilterHolder
 
@@ -428,9 +428,4 @@ private[history] class ApplicationCacheCheckFilter(
       httpResponse.sendRedirect(redirectUrl)
     }
   }
-
-  override def init(config: FilterConfig): Unit = { }
-
-  override def destroy(): Unit = { }
-
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala
index f3bb6d5af3358..990ab680f3aaf 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala
@@ -187,12 +187,7 @@ object EventLogFileWriter {
   }
 
   def nameForAppAndAttempt(appId: String, appAttemptId: Option[String]): String = {
-    val base = Utils.sanitizeDirName(appId)
-    if (appAttemptId.isDefined) {
-      base + "_" + Utils.sanitizeDirName(appAttemptId.get)
-    } else {
-      base
-    }
+    Utils.nameForAppAndAttempt(appId, appAttemptId)
   }
 
   def codecName(log: Path): Option[String] = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
index 6647b11874d72..0904581d72367 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
@@ -33,12 +33,6 @@ private[master] class MasterArguments(args: Array[String], conf: SparkConf) exte
   var webUiPort = 8080
   var propertiesFile: String = null
 
-  // Check for settings in environment variables
-  if (System.getenv("SPARK_MASTER_IP") != null) {
-    logWarning("SPARK_MASTER_IP is deprecated, please use SPARK_MASTER_HOST")
-    host = System.getenv("SPARK_MASTER_IP")
-  }
-
   if (System.getenv("SPARK_MASTER_HOST") != null) {
     host = System.getenv("SPARK_MASTER_HOST")
   }
@@ -63,11 +57,6 @@ private[master] class MasterArguments(args: Array[String], conf: SparkConf) exte
 
   @tailrec
   private def parse(args: List[String]): Unit = args match {
-    case ("--ip" | "-i") :: value :: tail =>
-      Utils.checkHost(value)
-      host = value
-      parse(tail)
-
     case ("--host" | "-h") :: value :: tail =>
       Utils.checkHost(value)
       host = value
@@ -103,7 +92,6 @@ private[master] class MasterArguments(args: Array[String], conf: SparkConf) exte
       "Usage: Master [options]\n" +
       "\n" +
       "Options:\n" +
-      "  -i HOST, --ip HOST     Hostname to listen on (deprecated, please use --host or -h) \n" +
       "  -h HOST, --host HOST   Hostname to listen on\n" +
       "  -p PORT, --port PORT   Port to listen on (default: 7077)\n" +
       "  --webui-port PORT      Port for web UI (default: 8080)\n" +
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
index f24cd59418300..87ca01fe82a97 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
@@ -70,11 +70,6 @@ private[worker] class WorkerArguments(args: Array[String], conf: SparkConf) {
 
   @tailrec
   private def parse(args: List[String]): Unit = args match {
-    case ("--ip" | "-i") :: value :: tail =>
-      Utils.checkHost(value)
-      host = value
-      parse(tail)
-
     case ("--host" | "-h") :: value :: tail =>
       Utils.checkHost(value)
       host = value
@@ -137,7 +132,6 @@ private[worker] class WorkerArguments(args: Array[String], conf: SparkConf) {
       "  -c CORES, --cores CORES  Number of cores to use\n" +
       "  -m MEM, --memory MEM     Amount of memory to use (e.g. 1000M, 2G)\n" +
       "  -d DIR, --work-dir DIR   Directory to run apps in (default: SPARK_HOME/work)\n" +
-      "  -i HOST, --ip IP         Hostname to listen on (deprecated, please use --host or -h)\n" +
       "  -h HOST, --host HOST     Hostname to listen on\n" +
       "  -p PORT, --port PORT     Port to listen on (default: random)\n" +
       "  --webui-port PORT        Port for web UI (default: 8081)\n" +
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index e880cf8da9ec2..a73380cab690e 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -479,6 +479,29 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       driverConf.set(EXECUTOR_ID, arguments.executorId)
       cfg.logLevel.foreach(logLevel => Utils.setLogLevelIfNeeded(logLevel))
 
+      // Set executor memory related config here according to resource profile
+      if (cfg.resourceProfile.id != ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID) {
+        cfg.resourceProfile
+          .executorResources
+          .foreach {
+            case (ResourceProfile.OFFHEAP_MEM, request) =>
+              driverConf.set(MEMORY_OFFHEAP_SIZE.key, request.amount.toString + "m")
+              logInfo(log"Set executor off-heap memory to " +
+                log"${MDC(LogKeys.EXECUTOR_MEMORY_OFFHEAP, request)}")
+            case (ResourceProfile.MEMORY, request) =>
+              driverConf.set(EXECUTOR_MEMORY.key, request.amount.toString + "m")
+              logInfo(log"Set executor memory to ${MDC(LogKeys.EXECUTOR_MEMORY_SIZE, request)}")
+            case (ResourceProfile.OVERHEAD_MEM, request) =>
+              // Maybe don't need to set this since it's nearly used by tasks.
+              driverConf.set(EXECUTOR_MEMORY_OVERHEAD.key, request.amount.toString + "m")
+              logInfo(log"Set executor memory_overhead to " +
+                log"${MDC(LogKeys.EXECUTOR_MEMORY_OVERHEAD_SIZE, request)}")
+            case (ResourceProfile.CORES, request) =>
+              driverConf.set(EXECUTOR_CORES.key, request.amount.toString)
+              logInfo(log"Set executor cores to ${MDC(LogKeys.NUM_EXECUTOR_CORES, request)}")
+            case _ =>
+          }
+      }
       val env = SparkEnv.createExecutorEnv(driverConf, arguments.executorId, arguments.bindAddress,
         arguments.hostname, arguments.cores, cfg.ioEncryptionKey, isLocal = false)
       // Set the application attemptId in the BlockStoreClient if available.
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index c299f38526aeb..f1087b695a7da 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -1210,6 +1210,7 @@ private[spark] class Executor(
           if (sourceURI.getFragment != null) sourceURI.getFragment else source.getName)
         logInfo(
           log"Unpacking an archive ${LogMDC(ARCHIVE_NAME, name)}" +
+            log" (${LogMDC(BYTE_SIZE, source.length)} bytes)" +
             log" from ${LogMDC(SOURCE_PATH, source.getAbsolutePath)}" +
             log" to ${LogMDC(DESTINATION_PATH, dest.getAbsolutePath)}")
         Utils.deleteRecursively(dest)
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala b/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala
index 0c2db21905d1f..1ed53868992ac 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala
@@ -45,7 +45,7 @@ private[spark] object Deploy {
 
   val RECOVERY_TIMEOUT = ConfigBuilder("spark.deploy.recoveryTimeout")
     .doc("Configures the timeout for recovery process. The default value is the same " +
-      "with ${WORKER_TIMEOUT.key}.")
+      s"with ${Worker.WORKER_TIMEOUT.key}.")
     .version("4.0.0")
     .timeConf(TimeUnit.SECONDS)
     .checkValue(_ > 0, "spark.deploy.recoveryTimeout must be positive.")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/UtilSuite.scala b/core/src/main/scala/org/apache/spark/internal/config/SparkConfigProvider.scala
similarity index 58%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/util/UtilSuite.scala
rename to core/src/main/scala/org/apache/spark/internal/config/SparkConfigProvider.scala
index d95de71e897a2..8739c87a65877 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/UtilSuite.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/SparkConfigProvider.scala
@@ -14,18 +14,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+package org.apache.spark.internal.config
 
-package org.apache.spark.sql.util
+import java.util.{Map => JMap}
 
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.SparkConf
 
-class UtilSuite extends SparkFunSuite {
-  test("truncatedString") {
-    assert(truncatedString(Nil, "[", ", ", "]", 2) == "[]")
-    assert(truncatedString(Seq(1, 2), "[", ", ", "]", 2) == "[1, 2]")
-    assert(truncatedString(Seq(1, 2, 3), "[", ", ", "]", 2) == "[1, ... 2 more fields]")
-    assert(truncatedString(Seq(1, 2, 3), "[", ", ", "]", -5) == "[, ... 3 more fields]")
-    assert(truncatedString(Seq(1, 2, 3), ", ", 10) == "1, 2, 3")
+/**
+ * A config provider that only reads Spark config keys.
+ */
+private[spark] class SparkConfigProvider(conf: JMap[String, String]) extends ConfigProvider {
+
+  override def get(key: String): Option[String] = {
+    if (key.startsWith("spark.")) {
+      Option(conf.get(key)).orElse(SparkConf.getDeprecatedConfig(key, conf))
+    } else {
+      None
+    }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 324ef701c4266..5dda7afc3ebcb 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -162,7 +162,7 @@ package object config {
         "PySpark shell.")
       .version("4.0.0")
       .booleanConf
-      .createWithDefault(true)
+      .createWithDefault(false)
 
   private[spark] val LEGACY_TASK_NAME_MDC_ENABLED =
     ConfigBuilder("spark.log.legacyTaskNameMdc.enabled")
@@ -1023,8 +1023,7 @@ package object config {
   private[spark] val MAX_EXECUTOR_FAILURES =
     ConfigBuilder("spark.executor.maxNumFailures")
       .doc("The maximum number of executor failures before failing the application. " +
-        "This configuration only takes effect on YARN, or Kubernetes when " +
-        "`spark.kubernetes.allocation.pods.allocator` is set to 'direct'.")
+        "This configuration only takes effect on YARN and Kubernetes.")
       .version("3.5.0")
       .intConf
       .createOptional
@@ -1032,8 +1031,8 @@ package object config {
   private[spark] val EXECUTOR_ATTEMPT_FAILURE_VALIDITY_INTERVAL_MS =
     ConfigBuilder("spark.executor.failuresValidityInterval")
       .doc("Interval after which executor failures will be considered independent and not " +
-        "accumulate towards the attempt count. This configuration only takes effect on YARN, " +
-        "or Kubernetes when `spark.kubernetes.allocation.pods.allocator` is set to 'direct'.")
+        "accumulate towards the attempt count. This configuration only takes effect on YARN " +
+        "and Kubernetes.")
       .version("3.5.0")
       .timeConf(TimeUnit.MILLISECONDS)
       .createOptional
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 545eafe7a4449..01bc46fc0b623 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -25,11 +25,13 @@ import java.util.{Date, Locale}
 import scala.reflect.ClassTag
 
 import org.apache.hadoop.conf.{Configurable, Configuration}
+import org.apache.hadoop.hdfs.BlockMissingException
 import org.apache.hadoop.io.compress.CompressionCodecFactory
 import org.apache.hadoop.mapred._
 import org.apache.hadoop.mapred.lib.CombineFileSplit
 import org.apache.hadoop.mapreduce.TaskType
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
+import org.apache.hadoop.security.AccessControlException
 import org.apache.hadoop.util.ReflectionUtils
 
 import org.apache.spark._
@@ -319,6 +321,7 @@ class HadoopRDD[K, V](
             null
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles => throw e
+          case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(log"Skipped the rest content in the corrupted file: " +
               log"${MDC(PATH, split.inputSplit)}", e)
@@ -345,6 +348,7 @@ class HadoopRDD[K, V](
             finished = true
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles => throw e
+          case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(log"Skipped the rest content in the corrupted file: " +
               log"${MDC(PATH, split.inputSplit)}", e)
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index 2b6f322d1805d..d619602305890 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -26,12 +26,14 @@ import scala.jdk.CollectionConverters._
 import scala.reflect.ClassTag
 
 import org.apache.hadoop.conf.{Configurable, Configuration}
+import org.apache.hadoop.hdfs.BlockMissingException
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.io.compress.CompressionCodecFactory
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileInputFormat, FileSplit, InvalidInputException}
 import org.apache.hadoop.mapreduce.task.{JobContextImpl, TaskAttemptContextImpl}
+import org.apache.hadoop.security.AccessControlException
 
 import org.apache.spark._
 import org.apache.spark.annotation.DeveloperApi
@@ -255,6 +257,7 @@ class NewHadoopRDD[K, V](
             null
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles => throw e
+          case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(
               log"Skipped the rest content in the corrupted file: " +
@@ -284,6 +287,7 @@ class NewHadoopRDD[K, V](
               finished = true
             // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
             case e: FileNotFoundException if !ignoreMissingFiles => throw e
+            case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
             case e: IOException if ignoreCorruptFiles =>
               logWarning(
                 log"Skipped the rest content in the corrupted file: " +
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 0db0133f632bf..80db818b77e42 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1834,8 +1834,9 @@ abstract class RDD[T: ClassTag](
    * Please read the linked SPIP and design docs to understand the limitations and future plans.
    * @return an [[RDDBarrier]] instance that provides actions within a barrier stage
    * @see [[org.apache.spark.BarrierTaskContext]]
-   * @see <a href="https://jira.apache.org/jira/browse/SPARK-24374">SPIP: Barrier Execution Mode</a>
-   * @see <a href="https://jira.apache.org/jira/browse/SPARK-24582">Design Doc</a>
+   * @see <a href="https://issues.apache.org/jira/browse/SPARK-24374">
+   *        SPIP: Barrier Execution Mode</a>
+   * @see <a href="https://issues.apache.org/jira/browse/SPARK-24582">Design Doc</a>
    */
   @Experimental
   @Since("2.4.0")
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 4f7338f74e298..aee92ba928b4a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1382,9 +1382,9 @@ private[spark] class DAGScheduler(
     logInfo(
       log"Got job ${MDC(JOB_ID, job.jobId)} (${MDC(CALL_SITE_SHORT_FORM, callSite.shortForm)}) " +
       log"with ${MDC(NUM_PARTITIONS, partitions.length)} output partitions")
-    logInfo(log"Final stage: ${MDC(STAGE_ID, finalStage)} " +
+    logInfo(log"Final stage: ${MDC(STAGE, finalStage)} " +
       log"(${MDC(STAGE_NAME, finalStage.name)})")
-    logInfo(log"Parents of final stage: ${MDC(STAGE_ID, finalStage.parents)}")
+    logInfo(log"Parents of final stage: ${MDC(STAGES, finalStage.parents)}")
     logInfo(log"Missing parents: ${MDC(MISSING_PARENT_STAGES, getMissingParentStages(finalStage))}")
 
     val jobSubmissionTime = clock.getTimeMillis()
@@ -1465,7 +1465,7 @@ private[spark] class DAGScheduler(
           val missing = getMissingParentStages(stage).sortBy(_.id)
           logDebug("missing: " + missing)
           if (missing.isEmpty) {
-            logInfo(log"Submitting ${MDC(STAGE_ID, stage)} (${MDC(RDD_ID, stage.rdd)}), " +
+            logInfo(log"Submitting ${MDC(STAGE, stage)} (${MDC(RDD_ID, stage.rdd)}), " +
                     log"which has no missing parents")
             submitMissingTasks(stage, jobId.get)
           } else {
@@ -1517,12 +1517,12 @@ private[spark] class DAGScheduler(
     val shuffleId = stage.shuffleDep.shuffleId
     val shuffleMergeId = stage.shuffleDep.shuffleMergeId
     if (stage.shuffleDep.shuffleMergeEnabled) {
-      logInfo(log"Shuffle merge enabled before starting the stage for ${MDC(STAGE_ID, stage)}" +
+      logInfo(log"Shuffle merge enabled before starting the stage for ${MDC(STAGE, stage)}" +
         log" with shuffle ${MDC(SHUFFLE_ID, shuffleId)} and shuffle merge" +
         log" ${MDC(SHUFFLE_MERGE_ID, shuffleMergeId)} with" +
         log" ${MDC(NUM_MERGER_LOCATIONS, stage.shuffleDep.getMergerLocs.size.toString)} merger locations")
     } else {
-      logInfo(log"Shuffle merge disabled for ${MDC(STAGE_ID, stage)} with " +
+      logInfo(log"Shuffle merge disabled for ${MDC(STAGE, stage)} with " +
         log"shuffle ${MDC(SHUFFLE_ID, shuffleId)} and " +
         log"shuffle merge ${MDC(SHUFFLE_MERGE_ID, shuffleMergeId)}, " +
         log"but can get enabled later adaptively once enough " +
@@ -1583,7 +1583,7 @@ private[spark] class DAGScheduler(
             // merger locations but the corresponding shuffle map stage did not complete
             // successfully, we would still enable push for its retry.
             s.shuffleDep.setShuffleMergeAllowed(false)
-            logInfo(log"Push-based shuffle disabled for ${MDC(STAGE_ID, stage)} " +
+            logInfo(log"Push-based shuffle disabled for ${MDC(STAGE, stage)} " +
               log"(${MDC(STAGE_NAME, stage.name)}) since it is already shuffle merge finalized")
           }
         }
@@ -1707,7 +1707,7 @@ private[spark] class DAGScheduler(
 
     if (tasks.nonEmpty) {
       logInfo(log"Submitting ${MDC(NUM_TASKS, tasks.size)} missing tasks from " +
-        log"${MDC(STAGE_ID, stage)} (${MDC(RDD_ID, stage.rdd)}) (first 15 tasks are " +
+        log"${MDC(STAGE, stage)} (${MDC(RDD_ID, stage.rdd)}) (first 15 tasks are " +
         log"for partitions ${MDC(PARTITION_IDS, tasks.take(15).map(_.partitionId))})")
       val shuffleId = stage match {
         case s: ShuffleMapStage => Some(s.shuffleDep.shuffleId)
@@ -1964,7 +1964,7 @@ private[spark] class DAGScheduler(
                     } catch {
                       case e: UnsupportedOperationException =>
                         logWarning(log"Could not cancel tasks " +
-                          log"for stage ${MDC(STAGE_ID, stageId)}", e)
+                          log"for stage ${MDC(STAGE, stageId)}", e)
                     }
                     listenerBus.post(
                       SparkListenerJobEnd(job.jobId, clock.getTimeMillis(), JobSucceeded))
@@ -1996,7 +1996,7 @@ private[spark] class DAGScheduler(
               logDebug("ShuffleMapTask finished on " + execId)
               if (executorFailureEpoch.contains(execId) &&
                 smt.epoch <= executorFailureEpoch(execId)) {
-                logInfo(log"Ignoring possibly bogus ${MDC(STAGE_ID, smt)} completion from " +
+                logInfo(log"Ignoring possibly bogus ${MDC(STAGE, smt)} completion from " +
                   log"executor ${MDC(EXECUTOR_ID, execId)}")
               } else {
                 // The epoch of the task is acceptable (i.e., the task was launched after the most
@@ -2026,8 +2026,8 @@ private[spark] class DAGScheduler(
         if (failedStage.latestInfo.attemptNumber() != task.stageAttemptId) {
           logInfo(log"Ignoring fetch failure from " +
             log"${MDC(TASK_ID, task)} as it's from " +
-            log"${MDC(STAGE_ID, failedStage)} attempt " +
-            log"${MDC(STAGE_ATTEMPT, task.stageAttemptId)} and there is a more recent attempt for " +
+            log"${MDC(FAILED_STAGE, failedStage)} attempt " +
+            log"${MDC(STAGE_ATTEMPT_ID, task.stageAttemptId)} and there is a more recent attempt for " +
             log"that stage (attempt " +
             log"${MDC(NUM_ATTEMPT, failedStage.latestInfo.attemptNumber())}) running")
         } else {
@@ -2035,8 +2035,8 @@ private[spark] class DAGScheduler(
             isExecutorDecommissioningOrDecommissioned(taskScheduler, bmAddress)
           if (ignoreStageFailure) {
             logInfo(log"Ignoring fetch failure from ${MDC(TASK_NAME, task)} of " +
-              log"${MDC(STAGE, failedStage)} attempt " +
-              log"${MDC(STAGE_ATTEMPT, task.stageAttemptId)} when count " +
+              log"${MDC(FAILED_STAGE, failedStage)} attempt " +
+              log"${MDC(STAGE_ATTEMPT_ID, task.stageAttemptId)} when count " +
               log"${MDC(MAX_ATTEMPTS, config.STAGE_MAX_CONSECUTIVE_ATTEMPTS.key)} " +
               log"as executor ${MDC(EXECUTOR_ID, bmAddress.executorId)} is decommissioned and " +
               log"${MDC(CONFIG, config.STAGE_IGNORE_DECOMMISSION_FETCH_FAILURE.key)}=true")
@@ -2937,7 +2937,8 @@ private[spark] class DAGScheduler(
         } else {
           // This stage is only used by the job, so finish the stage if it is running.
           val stage = stageIdToStage(stageId)
-          if (runningStages.contains(stage)) {
+          // Stages with failedAttemptIds may have tasks that are running
+          if (runningStages.contains(stage) || stage.failedAttemptIds.nonEmpty) {
             try { // killAllTaskAttempts will fail if a SchedulerBackend does not implement killTask
               taskScheduler.killAllTaskAttempts(stageId, shouldInterruptTaskThread(job), reason)
               if (legacyAbortStageAfterKillTasks) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala b/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
index df28a97a349ea..a769c3fa14b62 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
@@ -149,7 +149,7 @@ private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean)
       // The task output has been committed successfully
       case _: TaskCommitDenied =>
         logInfo(log"Task was denied committing, stage: ${MDC(LogKeys.STAGE_ID, stage)}." +
-          log"${MDC(LogKeys.STAGE_ATTEMPT, stageAttempt)}, " +
+          log"${MDC(LogKeys.STAGE_ATTEMPT_ID, stageAttempt)}, " +
           log"partition: ${MDC(LogKeys.PARTITION_ID, partition)}, " +
           log"attempt: ${MDC(LogKeys.NUM_ATTEMPT, attemptNumber)}")
       case _ =>
@@ -181,7 +181,7 @@ private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean)
     stageStates.get(stage) match {
       case Some(state) if attemptFailed(state, stageAttempt, partition, attemptNumber) =>
         logInfo(log"Commit denied for stage=${MDC(LogKeys.STAGE_ID, stage)}." +
-          log"${MDC(LogKeys.STAGE_ATTEMPT, stageAttempt)}, partition=" +
+          log"${MDC(LogKeys.STAGE_ATTEMPT_ID, stageAttempt)}, partition=" +
           log"${MDC(LogKeys.PARTITION_ID, partition)}: task attempt " +
           log"${MDC(LogKeys.NUM_ATTEMPT, attemptNumber)} already marked as failed.")
         false
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index f511aed6d2166..e21ec77ce69ec 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -22,6 +22,7 @@ import java.util.Properties
 
 import org.apache.spark._
 import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config.APP_CALLER_CONTEXT
 import org.apache.spark.internal.plugin.PluginContainer
 import org.apache.spark.memory.{MemoryMode, TaskMemoryManager}
@@ -70,7 +71,7 @@ private[spark] abstract class Task[T](
     val jobId: Option[Int] = None,
     val appId: Option[String] = None,
     val appAttemptId: Option[String] = None,
-    val isBarrier: Boolean = false) extends Serializable {
+    val isBarrier: Boolean = false) extends Serializable with Logging {
 
   @transient lazy val metrics: TaskMetrics =
     SparkEnv.get.closureSerializer.newInstance().deserialize(ByteBuffer.wrap(serializedTaskMetrics))
@@ -231,10 +232,19 @@ private[spark] abstract class Task[T](
     require(reason != null)
     _reasonIfKilled = reason
     if (context != null) {
-      context.markInterrupted(reason)
-    }
-    if (interruptThread && taskThread != null) {
-      taskThread.interrupt()
+      TaskContext.synchronized {
+        if (context.interruptible()) {
+          context.markInterrupted(reason)
+          if (interruptThread && taskThread != null) {
+            taskThread.interrupt()
+          }
+        } else {
+          logInfo(log"Task ${MDC(LogKeys.TASK_ID, context.taskAttemptId())} " +
+            log"is currently not interruptible. ")
+          val threadToInterrupt = if (interruptThread) Option(taskThread) else None
+          context.pendingInterrupt(threadToInterrupt, reason)
+        }
+      }
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 8e3cb1379339d..43193dc5366a4 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -325,7 +325,7 @@ private[spark] class TaskSchedulerImpl(
         }
         tsm.suspend()
         logInfo(log"Stage ${MDC(LogKeys.STAGE_ID, stageId)}." +
-          log"${MDC(LogKeys.STAGE_ATTEMPT, tsm.taskSet.stageAttemptId)} was cancelled")
+          log"${MDC(LogKeys.STAGE_ATTEMPT_ID, tsm.taskSet.stageAttemptId)} was cancelled")
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
index 2474a1342eb2e..3513cb1f93764 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler
 
 import java.util.Properties
 
-import org.apache.spark.internal.LogKeys.{STAGE_ATTEMPT, STAGE_ID}
+import org.apache.spark.internal.LogKeys.{STAGE_ATTEMPT_ID, STAGE_ID}
 import org.apache.spark.internal.MessageWithContext
 
 /**
@@ -42,7 +42,7 @@ private[spark] class TaskSet(
   lazy val logId: MessageWithContext = {
     val hashMap = new java.util.HashMap[String, String]()
     hashMap.put(STAGE_ID.name, stageId.toString)
-    hashMap.put(STAGE_ATTEMPT.name, stageAttemptId.toString)
+    hashMap.put(STAGE_ATTEMPT_ID.name, stageAttemptId.toString)
     MessageWithContext(id, hashMap)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index fdc82285b76bb..0eaf138d3eb8d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -1001,7 +1001,7 @@ private[spark] class TaskSetManager(
           logError(
             log"Task ${MDC(TASK_INDEX, info.index)}.${MDC(TASK_ATTEMPT_ID, info.attemptNumber)} " +
               log"in stage ${MDC(STAGE_ID, taskSet.stageId)}." +
-            log"${MDC(STAGE_ATTEMPT, taskSet.stageAttemptId)} (TID ${MDC(TASK_ID, tid)}) " +
+            log"${MDC(STAGE_ATTEMPT_ID, taskSet.stageAttemptId)} (TID ${MDC(TASK_ID, tid)}) " +
             log"can not write to output file: ${MDC(ERROR, ef.description)}; not retrying")
           emptyTaskInfoAccumulablesAndNotifyDagScheduler(tid, tasks(index), reason, null,
             accumUpdates, metricPeaks)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index 30bc1382fb021..bf3117a9a9b12 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -85,6 +85,9 @@ private[spark] class IndexShuffleBlockResolver(
   private val remoteShuffleMaxDisk: Option[Long] =
     conf.get(config.STORAGE_DECOMMISSION_SHUFFLE_MAX_DISK_SIZE)
 
+  private val checksumEnabled = conf.get(config.SHUFFLE_CHECKSUM_ENABLED)
+  private lazy val algorithm = conf.get(config.SHUFFLE_CHECKSUM_ALGORITHM)
+
   def getDataFile(shuffleId: Int, mapId: Long): File = getDataFile(shuffleId, mapId, None)
 
   /**
@@ -195,9 +198,11 @@ private[spark] class IndexShuffleBlockResolver(
       logWarning(log"Error deleting index ${MDC(PATH, file.getPath())}")
     }
 
-    file = getChecksumFile(shuffleId, mapId, conf.get(config.SHUFFLE_CHECKSUM_ALGORITHM))
-    if (file.exists() && !file.delete()) {
-      logWarning(log"Error deleting checksum ${MDC(PATH, file.getPath())}")
+    if (checksumEnabled) {
+      file = getChecksumFile(shuffleId, mapId, algorithm)
+      if (file.exists() && !file.delete()) {
+        logWarning(log"Error deleting checksum ${MDC(PATH, file.getPath())}")
+      }
     }
   }
 
@@ -396,8 +401,7 @@ private[spark] class IndexShuffleBlockResolver(
     val (checksumFileOpt, checksumTmpOpt) = if (checksumEnabled) {
       assert(lengths.length == checksums.length,
         "The size of partition lengths and checksums should be equal")
-      val checksumFile =
-        getChecksumFile(shuffleId, mapId, conf.get(config.SHUFFLE_CHECKSUM_ALGORITHM))
+      val checksumFile = getChecksumFile(shuffleId, mapId, algorithm)
       (Some(checksumFile), Some(createTempFile(checksumFile)))
     } else {
       (None, None)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index efffda43695cc..6902fb6d236de 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -74,12 +74,6 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
 
   import SortShuffleManager._
 
-  if (!conf.getBoolean("spark.shuffle.spill", true)) {
-    logWarning(
-      "spark.shuffle.spill was set to false, but this configuration is ignored as of Spark 1.6+." +
-        " Shuffle will continue to spill to disk when necessary.")
-  }
-
   /**
    * A mapping from shuffle ids to the task ids of mappers producing output for those shuffles.
    */
diff --git a/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala b/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala
index 551f0eb98cb87..cf881b6ea9900 100644
--- a/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala
+++ b/core/src/main/scala/org/apache/spark/ui/HttpSecurityFilter.scala
@@ -44,10 +44,6 @@ private class HttpSecurityFilter(
     conf: SparkConf,
     securityMgr: SecurityManager) extends Filter {
 
-  override def destroy(): Unit = { }
-
-  override def init(config: FilterConfig): Unit = { }
-
   override def doFilter(req: ServletRequest, res: ServletResponse, chain: FilterChain): Unit = {
     val hreq = req.asInstanceOf[HttpServletRequest]
     val hres = res.asInstanceOf[HttpServletResponse]
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/TaskThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/TaskThreadDumpPage.scala
index 49b919ce0de97..5fe542d4fa21e 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/TaskThreadDumpPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/TaskThreadDumpPage.scala
@@ -71,7 +71,7 @@ private[spark] class TaskThreadDumpPage(
 
       <div class="row">
         <div class="col-12">
-          <p>Updated at{UIUtils.formatDate(time)}</p>
+          <p>Updated at {UIUtils.formatDate(time)}</p>
           <table class={UIUtils.TABLE_CLASS_NOT_STRIPED + " accordion-group"}>
             <thead>
               <th>Thread ID</th>
diff --git a/core/src/main/scala/org/apache/spark/util/TransientLazy.scala b/core/src/main/scala/org/apache/spark/util/BestEffortLazyVal.scala
similarity index 50%
rename from core/src/main/scala/org/apache/spark/util/TransientLazy.scala
rename to core/src/main/scala/org/apache/spark/util/BestEffortLazyVal.scala
index 2833ef93669a6..83044055fe404 100644
--- a/core/src/main/scala/org/apache/spark/util/TransientLazy.scala
+++ b/core/src/main/scala/org/apache/spark/util/BestEffortLazyVal.scala
@@ -16,15 +16,21 @@
  */
 package org.apache.spark.util
 
+import java.util.concurrent.atomic.AtomicReference
+
 /**
- * Construct to lazily initialize a variable.
- * This may be helpful for avoiding deadlocks in certain scenarios. For example,
- *   a) Thread 1 entered a synchronized method, grabbing a coarse lock on the parent object.
- *   b) Thread 2 gets spawned off, and tries to initialize a lazy value on the same parent object
- *      (in our case, this was the logger). This causes scala to also try to grab a coarse lock on
- *      the parent object.
- *   c) If thread 1 waits for thread 2 to join, a deadlock occurs.
- * The main difference between this and [[LazyTry]] is that this does not cache failures.
+ * A lock-free implementation of a lazily-initialized variable.
+ * If there are concurrent initializations then the `compute()` function may be invoked
+ * multiple times. However, only a single `compute()` result will be stored and all readers
+ * will receive the same result object instance.
+ *
+ * This may be helpful for avoiding deadlocks in certain scenarios where exactly-once
+ * value computation is not a hard requirement.
+ *
+ * @note
+ * This helper class has additional requirements on the compute function:
+ *   1) The compute function MUST not return null;
+ *   2) The computation failure is not cached.
  *
  * @note
  *   Scala 3 uses a different implementation of lazy vals which doesn't have this problem.
@@ -32,12 +38,24 @@ package org.apache.spark.util
  *   href="https://docs.scala-lang.org/scala3/reference/changed-features/lazy-vals-init.html">Lazy
  *   Vals Initialization</a> for more details.
  */
-private[spark] class TransientLazy[T](initializer: => T) extends Serializable {
+private[spark] class BestEffortLazyVal[T <: AnyRef](
+    @volatile private[this] var compute: () => T) extends Serializable {
 
-  @transient
-  private[this] lazy val value: T = initializer
+  private[this] val cached: AtomicReference[T] = new AtomicReference(null.asInstanceOf[T])
 
   def apply(): T = {
-    value
+    val value = cached.get()
+    if (value != null) {
+      value
+    } else {
+      val f = compute
+      if (f != null) {
+        val newValue = f()
+        assert(newValue != null, "compute function cannot return null.")
+        cached.compareAndSet(null.asInstanceOf[T], newValue)
+        compute = null  // allow closure to be GC'd
+      }
+      cached.get()
+    }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
index f8f5bb4f72a40..5e50361b278aa 100644
--- a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
@@ -241,6 +241,13 @@ private[spark] object HadoopFSUtils extends Logging {
         logWarning(log"The directory ${MDC(PATH, path)} " +
           log"was not found. Was it deleted very recently?")
         Array.empty[FileStatus]
+      case u: UnsupportedOperationException =>
+        throw new SparkUnsupportedOperationException(
+          errorClass = "FAILED_READ_FILE.UNSUPPORTED_FILE_SYSTEM",
+          messageParameters = Map(
+            "path" -> path.toString,
+            "fileSystemClass" -> fs.getClass.getName,
+            "method" -> u.getStackTrace.head.getMethodName))
     }
 
     val filteredStatuses =
diff --git a/core/src/main/scala/org/apache/spark/util/NonFateSharingCache.scala b/core/src/main/scala/org/apache/spark/util/NonFateSharingCache.scala
index 21184d70b386a..7d01facc1e421 100644
--- a/core/src/main/scala/org/apache/spark/util/NonFateSharingCache.scala
+++ b/core/src/main/scala/org/apache/spark/util/NonFateSharingCache.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.util
 
-import java.util.concurrent.Callable
+import java.util.concurrent.{Callable, TimeUnit}
 
 import com.google.common.cache.{Cache, CacheBuilder, CacheLoader, LoadingCache}
 
@@ -68,6 +68,20 @@ private[spark] object NonFateSharingCache {
       override def load(k: K): V = loadingFunc.apply(k)
     }))
   }
+
+  def apply[K, V](
+      maximumSize: Long,
+      expireAfterAccessTime: Long,
+      expireAfterAccessTimeUnit: TimeUnit): NonFateSharingCache[K, V] = {
+    val builder = CacheBuilder.newBuilder().asInstanceOf[CacheBuilder[K, V]]
+    if (maximumSize > 0L) {
+      builder.maximumSize(maximumSize)
+    }
+    if(expireAfterAccessTime > 0) {
+      builder.expireAfterAccess(expireAfterAccessTime, expireAfterAccessTimeUnit)
+    }
+    new NonFateSharingCache(builder.build[K, V]())
+  }
 }
 
 private[spark] class NonFateSharingCache[K, V](protected val cache: Cache[K, V]) {
diff --git a/core/src/main/scala/org/apache/spark/util/TransientBestEffortLazyVal.scala b/core/src/main/scala/org/apache/spark/util/TransientBestEffortLazyVal.scala
new file mode 100644
index 0000000000000..033b783ede40b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/TransientBestEffortLazyVal.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+import java.io.{IOException, ObjectInputStream}
+import java.util.concurrent.atomic.AtomicReference
+
+/**
+ * A lock-free implementation of a lazily-initialized variable.
+ * If there are concurrent initializations then the `compute()` function may be invoked
+ * multiple times. However, only a single `compute()` result will be stored and all readers
+ * will receive the same result object instance.
+ *
+ * This may be helpful for avoiding deadlocks in certain scenarios where exactly-once
+ * value computation is not a hard requirement.
+ *
+ * The main difference between this and [[BestEffortLazyVal]] is that:
+ * [[BestEffortLazyVal]] serializes the cached value after computation, while
+ * [[TransientBestEffortLazyVal]] always serializes the compute function.
+ *
+ * @note
+ * This helper class has additional requirements on the compute function:
+ *   1) The compute function MUST not return null;
+ *   2) The computation failure is not cached.
+ *
+ * @note
+ *   Scala 3 uses a different implementation of lazy vals which doesn't have this problem.
+ *   Please refer to <a
+ *   href="https://docs.scala-lang.org/scala3/reference/changed-features/lazy-vals-init.html">Lazy
+ *   Vals Initialization</a> for more details.
+ */
+private[spark] class TransientBestEffortLazyVal[T <: AnyRef](
+    private[this] val compute: () => T) extends Serializable {
+
+  @transient
+  private[this] var cached: AtomicReference[T] = new AtomicReference(null.asInstanceOf[T])
+
+  def apply(): T = {
+    val value = cached.get()
+    if (value != null) {
+      value
+    } else {
+      val newValue = compute()
+      assert(newValue != null, "compute function cannot return null.")
+      cached.compareAndSet(null.asInstanceOf[T], newValue)
+      cached.get()
+    }
+  }
+
+  @throws(classOf[IOException])
+  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
+    ois.defaultReadObject()
+    cached = new AtomicReference(null.asInstanceOf[T])
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 5703128aacbb9..1efe181a8c38a 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -103,7 +103,8 @@ private[spark] object Utils
   with SparkErrorUtils
   with SparkFileUtils
   with SparkSerDeUtils
-  with SparkStreamUtils {
+  with SparkStreamUtils
+  with SparkStringUtils {
 
   private val sparkUncaughtExceptionHandler = new SparkUncaughtExceptionHandler
   @volatile private var cachedLocalDir: String = ""
@@ -1354,8 +1355,10 @@ private[spark] object Utils
   val TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE =
     "Full stacktrace of original doTryWithCallerStacktrace caller"
 
-  val TRY_WITH_CALLER_STACKTRACE_TRY_STACKTRACE =
-    "Stacktrace under doTryWithCallerStacktrace"
+  class OriginalTryStackTraceException()
+    extends Exception(TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE) {
+    var doTryWithCallerStacktraceDepth: Int = 0
+  }
 
   /**
    * Use Try with stacktrace substitution for the caller retrieving the error.
@@ -1383,14 +1386,19 @@ private[spark] object Utils
         val commonSuffixLen = origStackTrace.reverse.zip(currentStackTrace.reverse).takeWhile {
           case (exElem, currentElem) => exElem == currentElem
         }.length
-        val belowEx = new Exception(TRY_WITH_CALLER_STACKTRACE_TRY_STACKTRACE)
-        belowEx.setStackTrace(origStackTrace.dropRight(commonSuffixLen))
-        ex.addSuppressed(belowEx)
-
-        // keep the full original stack trace in a suppressed exception.
-        val fullEx = new Exception(TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE)
-        fullEx.setStackTrace(origStackTrace)
-        ex.addSuppressed(fullEx)
+        // Add the full stack trace of the original caller as the suppressed exception.
+        // It may already be there if it's a nested call to doTryWithCallerStacktrace.
+        val origEx = ex.getSuppressed.find { e =>
+          e.isInstanceOf[OriginalTryStackTraceException]
+        }.getOrElse {
+          val fullEx = new OriginalTryStackTraceException()
+          fullEx.setStackTrace(origStackTrace)
+          ex.addSuppressed(fullEx)
+          fullEx
+        }.asInstanceOf[OriginalTryStackTraceException]
+        // Update the depth of the stack of the current doTryWithCallerStacktrace, for stitching
+        // it with the stack of getTryWithCallerStacktrace.
+        origEx.doTryWithCallerStacktraceDepth = origStackTrace.size - commonSuffixLen
       case Success(_) => // nothing
     }
     t
@@ -1406,7 +1414,7 @@ private[spark] object Utils
    * Full stack trace of the original doTryWithCallerStacktrace caller can be retrieved with
    * ```
    * ex.getSuppressed.find { e =>
-   * e.getMessage == Utils.TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE
+   *   e.isInstanceOf[Utils.OriginalTryStackTraceException]
    * }
    * ```
    *
@@ -1416,13 +1424,15 @@ private[spark] object Utils
    */
   def getTryWithCallerStacktrace[T](t: Try[T]): T = t match {
     case Failure(ex) =>
-      val belowStacktrace = ex.getSuppressed.find { e =>
+      val originalStacktraceEx = ex.getSuppressed.find { e =>
         // added in doTryWithCallerStacktrace
-        e.getMessage == TRY_WITH_CALLER_STACKTRACE_TRY_STACKTRACE
+        e.isInstanceOf[OriginalTryStackTraceException]
       }.getOrElse {
         // If we don't have the expected stacktrace information, just rethrow
         throw ex
-      }.getStackTrace
+      }.asInstanceOf[OriginalTryStackTraceException]
+      val belowStacktrace = originalStacktraceEx.getStackTrace
+        .take(originalStacktraceEx.doTryWithCallerStacktraceDepth)
       // We are modifying and throwing the original exception. It would be better if we could
       // return a copy, but we can't easily clone it and preserve. If this is accessed from
       // multiple threads that then look at the stack trace, this could break.
@@ -2512,7 +2522,7 @@ private[spark] object Utils
    *
    * @return whether it is local mode
    */
-  def isLocalMaster(conf: SparkConf): Boolean = {
+  def isLocalMaster(conf: ReadOnlySparkConf): Boolean = {
     val master = conf.get("spark.master", "")
     master == "local" || master.startsWith("local[")
   }
@@ -2596,7 +2606,7 @@ private[spark] object Utils
   /**
    * Return whether dynamic allocation is enabled in the given conf.
    */
-  def isDynamicAllocationEnabled(conf: SparkConf): Boolean = {
+  def isDynamicAllocationEnabled(conf: ReadOnlySparkConf): Boolean = {
     val dynamicAllocationEnabled = conf.get(DYN_ALLOCATION_ENABLED)
     dynamicAllocationEnabled &&
       (!isLocalMaster(conf) || conf.get(DYN_ALLOCATION_TESTING))
@@ -2679,7 +2689,7 @@ private[spark] object Utils
    * loading SparkConf.
    */
   def resetStructuredLogging(sparkConf: SparkConf): Unit = {
-    if (sparkConf.getBoolean(STRUCTURED_LOGGING_ENABLED.key, defaultValue = true)) {
+    if (sparkConf.get(STRUCTURED_LOGGING_ENABLED)) {
       Logging.enableStructuredLogging()
     } else {
       Logging.disableStructuredLogging()
@@ -2799,10 +2809,6 @@ private[spark] object Utils
     }
   }
 
-  def stringToSeq(str: String): Seq[String] = {
-    str.split(",").map(_.trim()).filter(_.nonEmpty).toImmutableArraySeq
-  }
-
   /**
    * Create instances of extension classes.
    *
@@ -2948,6 +2954,15 @@ private[spark] object Utils
     str.replaceAll("[ :/]", "-").replaceAll("[.${}'\"]", "_").toLowerCase(Locale.ROOT)
   }
 
+  def nameForAppAndAttempt(appId: String, appAttemptId: Option[String]): String = {
+    val base = sanitizeDirName(appId)
+    if (appAttemptId.isDefined) {
+      base + "_" + sanitizeDirName(appAttemptId.get)
+    } else {
+      base
+    }
+  }
+
   def isClientMode(conf: SparkConf): Boolean = {
     "client".equals(conf.get(SparkLauncher.DEPLOY_MODE, "client"))
   }
@@ -2983,7 +2998,7 @@ private[spark] object Utils
       return props
     }
     val resultProps = new Properties()
-    props.forEach((k, v) => resultProps.put(k, v))
+    resultProps.putAll(props.clone().asInstanceOf[Properties])
     resultProps
   }
 
diff --git a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
index c3d648dccea73..7f2a1a8419a71 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
@@ -146,6 +146,6 @@ private[spark] abstract class Spillable[C](taskMemoryManager: TaskMemoryManager)
     logInfo(log"Thread ${MDC(LogKeys.THREAD_ID, threadId)} " +
       log"spilling in-memory map of ${MDC(LogKeys.BYTE_SIZE,
         org.apache.spark.util.Utils.bytesToString(size))} to disk " +
-      log"(${MDC(LogKeys.SPILL_TIMES, _spillCount)} times so far)")
+      log"(${MDC(LogKeys.NUM_SPILLS, _spillCount)} times so far)")
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index ca51e61f5ed44..65ed2684a5b00 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark
 
+import java.io.{File, FileOutputStream, InputStream, ObjectOutputStream}
 import java.util.concurrent.{Semaphore, TimeUnit}
 import java.util.concurrent.atomic.AtomicInteger
 
@@ -35,7 +36,7 @@ import org.apache.spark.executor.ExecutorExitCode
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Deploy._
 import org.apache.spark.scheduler.{JobFailed, SparkListener, SparkListenerExecutorRemoved, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart}
-import org.apache.spark.util.ThreadUtils
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
  * Test suite for cancelling running jobs. We run the cancellation tasks for single job action
@@ -712,6 +713,142 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     assert(executionOfInterruptibleCounter.get() < numElements)
  }
 
+  Seq(true, false).foreach { interruptible =>
+
+    val (hint1, hint2) = if (interruptible) {
+      (" not", "")
+    } else {
+      ("", " not")
+    }
+
+    val testName = s"SPARK-50768:$hint1 use TaskContext.createResourceUninterruptibly " +
+      s"would$hint2 cause stream leak on task interruption"
+
+    test(testName) {
+      import org.apache.spark.JobCancellationSuite._
+      withTempDir { dir =>
+
+        // `InterruptionSensitiveInputStream` is designed to easily leak the underlying
+        // stream when task thread interruption happens during its initialization, as
+        // the reference to the underlying stream is intentionally not available to
+        // `InterruptionSensitiveInputStream` at that point.
+        class InterruptionSensitiveInputStream(fileHint: String) extends InputStream {
+          private var underlying: InputStream = _
+
+          def initialize(): InputStream = {
+            val in: InputStream = new InputStream {
+
+              open()
+
+              private def dumpFile(typeName: String): Unit = {
+                var fileOut: FileOutputStream = null
+                var objOut: ObjectOutputStream = null
+                try {
+                  val file = new File(dir, s"$typeName.$fileHint")
+                  fileOut = new FileOutputStream(file)
+                  objOut = new ObjectOutputStream(fileOut)
+                  objOut.writeBoolean(true)
+                  objOut.flush()
+                } finally {
+                  if (fileOut != null) {
+                    fileOut.close()
+                  }
+                  if (objOut != null) {
+                    objOut.close()
+                  }
+                }
+
+              }
+
+              private def open(): Unit = {
+                dumpFile("open")
+              }
+
+              override def close(): Unit = {
+                dumpFile("close")
+              }
+
+              override def read(): Int = -1
+            }
+
+            // Leave some time for the task to be interrupted during the
+            // creation of `InterruptionSensitiveInputStream`.
+            Thread.sleep(10000)
+
+            underlying = in
+            underlying
+          }
+
+          override def read(): Int = -1
+
+          override def close(): Unit = {
+            if (underlying != null) {
+              underlying.close()
+            }
+          }
+        }
+
+        def createStream(fileHint: String): Unit = {
+          if (interruptible) {
+              Utils.tryInitializeResource {
+                new InterruptionSensitiveInputStream(fileHint)
+              } {
+                _.initialize()
+              }
+          } else {
+            TaskContext.get().createResourceUninterruptibly[java.io.InputStream] {
+              Utils.tryInitializeResource {
+                new InterruptionSensitiveInputStream(fileHint)
+              } {
+                _.initialize()
+              }
+            }
+          }
+        }
+
+        sc = new SparkContext("local[2]", "test interrupt streams")
+
+        sc.addSparkListener(new SparkListener {
+          override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
+            // Sleep some time to ensure task has started
+            Thread.sleep(2000)
+            taskStartedSemaphore.release()
+          }
+
+          override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
+            if (taskEnd.reason.isInstanceOf[TaskKilled]) {
+              taskCancelledSemaphore.release()
+            }
+          }
+        })
+
+        sc.setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, "true")
+
+        val fileHint = if (interruptible) "interruptible" else "uninterruptible"
+        val future = sc.parallelize(1 to 100, 1).mapPartitions { _ =>
+          createStream(fileHint)
+          Iterator.single(1)
+        }.collectAsync()
+
+        taskStartedSemaphore.acquire()
+        future.cancel()
+        taskCancelledSemaphore.acquire()
+
+        val fileOpen = new File(dir, s"open.$fileHint")
+        val fileClose = new File(dir, s"close.$fileHint")
+        assert(fileOpen.exists())
+
+        if (interruptible) {
+          // The underlying stream leaks when the stream creation is interruptible.
+          assert(!fileClose.exists())
+        } else {
+          // The underlying stream won't leak when the stream creation is uninterruptible.
+          assert(fileClose.exists())
+        }
+      }
+    }
+  }
+
   def testCount(): Unit = {
     // Cancel before launching any tasks
     {
diff --git a/core/src/test/scala/org/apache/spark/SerializerTestUtils.scala b/core/src/test/scala/org/apache/spark/SerializerTestUtils.scala
new file mode 100644
index 0000000000000..bd81003777317
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/SerializerTestUtils.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
+
+trait SerializerTestUtils {
+
+  protected def roundtripSerialize[T](obj: T): T = {
+    deserializeFromBytes(serializeToBytes(obj))
+  }
+
+  protected def serializeToBytes[T](o: T): Array[Byte] = {
+    val baos = new ByteArrayOutputStream
+    val oos = new ObjectOutputStream(baos)
+    try {
+      oos.writeObject(o)
+      baos.toByteArray
+    } finally {
+      oos.close()
+    }
+  }
+
+  protected def deserializeFromBytes[T](bytes: Array[Byte]): T = {
+    val bais = new ByteArrayInputStream(bytes)
+    val ois = new ObjectInputStream(bais)
+    ois.readObject().asInstanceOf[T]
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 6b2bd90cd4314..10092f416f9e1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -794,11 +794,6 @@ object HistoryServerSuite {
  * A filter used for auth tests; sets the request's user to the value of the "HTTP_USER" header.
  */
 class FakeAuthFilter extends Filter {
-
-  override def destroy(): Unit = { }
-
-  override def init(config: FilterConfig): Unit = { }
-
   override def doFilter(req: ServletRequest, res: ServletResponse, chain: FilterChain): Unit = {
     val hreq = req.asInstanceOf[HttpServletRequest]
     val wrapped = new HttpServletRequestWrapper(hreq) {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 243d33fe55a79..3e507df706ba5 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -185,6 +185,8 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
   private var firstInit: Boolean = _
   /** Set of TaskSets the DAGScheduler has requested executed. */
   val taskSets = scala.collection.mutable.Buffer[TaskSet]()
+  /** Track running tasks, the key is the task's stageId , the value is the task's partitionId */
+  var runningTaskInfos = new HashMap[Int, HashSet[Int]]()
 
   /** Stages for which the DAGScheduler has called TaskScheduler.killAllTaskAttempts(). */
   val cancelledStages = new HashSet[Int]()
@@ -206,12 +208,14 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
       // normally done by TaskSetManager
       taskSet.tasks.foreach(_.epoch = mapOutputTracker.getEpoch)
       taskSets += taskSet
+      runningTaskInfos.put(taskSet.stageId, new HashSet[Int]() ++ taskSet.tasks.map(_.partitionId))
     }
     override def killTaskAttempt(
       taskId: Long, interruptThread: Boolean, reason: String): Boolean = false
     override def killAllTaskAttempts(
       stageId: Int, interruptThread: Boolean, reason: String): Unit = {
       cancelledStages += stageId
+      runningTaskInfos.remove(stageId)
     }
     override def notifyPartitionCompletion(stageId: Int, partitionId: Int): Unit = {
       taskSets.filter(_.stageId == stageId).lastOption.foreach { ts =>
@@ -393,6 +397,14 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
         handleShuffleMergeFinalized(shuffleMapStage, shuffleMapStage.shuffleDep.shuffleMergeId)
       }
     }
+
+    override private[scheduler] def handleTaskCompletion(event: CompletionEvent): Unit = {
+      super.handleTaskCompletion(event)
+      runningTaskInfos.get(event.task.stageId).foreach{ partitions =>
+        partitions -= event.task.partitionId
+        if (partitions.isEmpty) runningTaskInfos.remove(event.task.stageId)
+      }
+    }
   }
 
   override def beforeEach(): Unit = {
@@ -2252,6 +2264,46 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     assert(scheduler.activeJobs.isEmpty)
   }
 
+  test("SPARK-50648: when job is cancelled during shuffle retry in parent stage, " +
+    "should kill all running tasks") {
+    val shuffleMapRdd = new MyRDD(sc, 2, Nil)
+    val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(2))
+    val reduceRdd = new MyRDD(sc, 2, List(shuffleDep))
+    submit(reduceRdd, Array(0, 1))
+    completeShuffleMapStageSuccessfully(0, 0, 2)
+    sc.listenerBus.waitUntilEmpty()
+
+    val info = new TaskInfo(
+      3, index = 1, attemptNumber = 1,
+      partitionId = taskSets(1).tasks(0).partitionId, 0L, "", "", TaskLocality.ANY, true)
+    // result task 0.0 fetch failed, but result task 1.0 is still running
+    runEvent(makeCompletionEvent(taskSets(1).tasks(0),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleDep.shuffleId, 0L, 0, 1, "ignored"),
+      null,
+      Seq.empty,
+      Array.empty,
+      info))
+    sc.listenerBus.waitUntilEmpty()
+
+    Thread.sleep(DAGScheduler.RESUBMIT_TIMEOUT * 2)
+    // map stage is running by resubmitted, result stage is waiting
+    // map tasks and the origin result task 1.0 are running
+    assert(scheduler.runningStages.size == 1, "Map stage should be running")
+    val mapStage = scheduler.runningStages.head
+    assert(mapStage.id === 0)
+    assert(mapStage.latestInfo.failureReason.isEmpty)
+    assert(scheduler.waitingStages.size == 1, "Result stage should be waiting")
+    assert(runningTaskInfos.size == 2)
+    assert(runningTaskInfos(taskSets(1).stageId).size == 1,
+      "origin result task 1.0 should be running")
+
+    scheduler.doCancelAllJobs()
+    // all tasks should be killed
+    assert(runningTaskInfos.isEmpty)
+    assert(scheduler.runningStages.isEmpty)
+    assert(scheduler.waitingStages.isEmpty)
+  }
+
   test("misbehaved accumulator should not crash DAGScheduler and SparkContext") {
     val acc = new LongAccumulator {
       override def add(v: java.lang.Long): Unit = throw new DAGSchedulerSuiteDummyException
diff --git a/core/src/test/scala/org/apache/spark/shuffle/ChecksumBenchmark.scala b/core/src/test/scala/org/apache/spark/shuffle/ChecksumBenchmark.scala
index 16a50fabb7ffd..4eb167930b0dd 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/ChecksumBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/ChecksumBenchmark.scala
@@ -19,8 +19,6 @@ package org.apache.spark.shuffle
 
 import java.util.zip.{Adler32, CRC32, CRC32C}
 
-import org.apache.hadoop.util.PureJavaCrc32C
-
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 
 /**
@@ -41,18 +39,15 @@ object ChecksumBenchmark extends BenchmarkBase {
     runBenchmark("Benchmark Checksum Algorithms") {
       val data: Array[Byte] = (1 until 32 * 1024 * 1024).map(_.toByte).toArray
       val benchmark = new Benchmark("Checksum Algorithms", N, 3, output = output)
+      benchmark.addCase(s"Adler32") { _ =>
+        (1 to N).foreach(_ => new Adler32().update(data))
+      }
       benchmark.addCase("CRC32") { _ =>
         (1 to N).foreach(_ => new CRC32().update(data))
       }
       benchmark.addCase(s"CRC32C") { _ =>
         (1 to N).foreach(_ => new CRC32C().update(data))
       }
-      benchmark.addCase(s"Adler32") { _ =>
-        (1 to N).foreach(_ => new Adler32().update(data))
-      }
-      benchmark.addCase(s"hadoop PureJavaCrc32C") { _ =>
-        (1 to N).foreach(_ => new PureJavaCrc32C().update(data))
-      }
       benchmark.run()
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 1b68ed301fb92..6d12e88e8efac 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -504,8 +504,6 @@ private[spark] class TestFilter extends Filter {
 
   private var rc: Int = HttpServletResponse.SC_OK
 
-  override def destroy(): Unit = { }
-
   override def init(config: FilterConfig): Unit = {
     if (config.getInitParameter("responseCode") != null) {
       rc = config.getInitParameter("responseCode").toInt
diff --git a/core/src/test/scala/org/apache/spark/util/BestEffortLazyValSuite.scala b/core/src/test/scala/org/apache/spark/util/BestEffortLazyValSuite.scala
new file mode 100644
index 0000000000000..a6555eca8b859
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/BestEffortLazyValSuite.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+import java.io.NotSerializableException
+import java.util.concurrent.CountDownLatch
+import java.util.concurrent.atomic.AtomicInteger
+
+import scala.concurrent.{ExecutionContext, Future}
+import scala.concurrent.duration._
+
+import org.apache.spark.{SerializerTestUtils, SparkFunSuite}
+
+class BestEffortLazyValSuite extends SparkFunSuite with SerializerTestUtils {
+
+  test("BestEffortLazy works") {
+    val numInitializerCalls = new AtomicInteger(0)
+    // Simulate a race condition where two threads concurrently
+    // initialize the lazy value:
+    val latch = new CountDownLatch(2)
+    val lazyval = new BestEffortLazyVal(() => {
+      numInitializerCalls.incrementAndGet()
+      latch.countDown()
+      latch.await()
+      new Object()
+    })
+
+    // Ensure no initialization happened before the lazy value was invoked
+    assert(numInitializerCalls.get() === 0)
+
+    // Two threads concurrently invoke the lazy value
+    implicit val ec: ExecutionContext = ExecutionContext.global
+    val future1 = Future { lazyval() }
+    val future2 = Future { lazyval() }
+    val value1 = ThreadUtils.awaitResult(future1, 10.seconds)
+    val value2 = ThreadUtils.awaitResult(future2, 10.seconds)
+
+    // The initializer should have been invoked twice (due to how we set up the
+    // race condition via the latch):
+    assert(numInitializerCalls.get() === 2)
+
+    // But the value should only have been computed once:
+    assert(value1 eq value2)
+
+    // Ensure the subsequent invocation serves the same object
+    assert(lazyval() eq value1)
+    assert(numInitializerCalls.get() === 2)
+  }
+
+  test("BestEffortLazyVal is serializable") {
+    val lazyval = new BestEffortLazyVal(() => "test")
+
+    // serialize and deserialize before first invocation
+    val lazyval2 = roundtripSerialize(lazyval)
+    assert(lazyval2() === "test")
+
+    // first invocation
+    assert(lazyval() === "test")
+
+    // serialize and deserialize after first invocation
+    val lazyval3 = roundtripSerialize(lazyval)
+    assert(lazyval3() === "test")
+  }
+
+  test("BestEffortLazyVal is serializable: unserializable value") {
+    val lazyval = new BestEffortLazyVal(() => new Object())
+
+    // serialize and deserialize before first invocation
+    val lazyval2 = roundtripSerialize(lazyval)
+    assert(lazyval2() != null)
+
+    // first invocation
+    assert(lazyval() != null)
+
+    // serialize and deserialize after first invocation
+    // try to serialize the cached value and cause NotSerializableException
+    val e = intercept[NotSerializableException] {
+      val lazyval3 = roundtripSerialize(lazyval)
+    }
+    assert(e.getMessage.contains("java.lang.Object"))
+  }
+
+  test("BestEffortLazyVal is serializable: initialization failure") {
+    val lazyval = new BestEffortLazyVal[String](() => throw new RuntimeException("test"))
+
+    // serialize and deserialize before first invocation
+    val lazyval2 = roundtripSerialize(lazyval)
+    val e2 = intercept[RuntimeException] {
+      val v = lazyval2()
+    }
+    assert(e2.getMessage.contains("test"))
+
+    // initialization failure
+    val e = intercept[RuntimeException] {
+      val v = lazyval()
+    }
+    assert(e.getMessage.contains("test"))
+
+    // serialize and deserialize after initialization failure
+    val lazyval3 = roundtripSerialize(lazyval)
+    val e3 = intercept[RuntimeException] {
+      val v = lazyval3()
+    }
+    assert(e3.getMessage.contains("test"))
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/TransientBestEffortLazyValSuite.scala b/core/src/test/scala/org/apache/spark/util/TransientBestEffortLazyValSuite.scala
new file mode 100644
index 0000000000000..3ed9f2958fd9c
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/TransientBestEffortLazyValSuite.scala
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+import java.util.concurrent.CountDownLatch
+import java.util.concurrent.atomic.AtomicInteger
+
+import scala.concurrent.{ExecutionContext, Future}
+import scala.concurrent.duration._
+
+import org.apache.spark.{SerializerTestUtils, SparkFunSuite}
+
+class TransientBestEffortLazyValSuite extends SparkFunSuite with SerializerTestUtils {
+
+  test("TransientBestEffortLazyVal works") {
+    val numInitializerCalls = new AtomicInteger(0)
+    // Simulate a race condition where two threads concurrently
+    // initialize the lazy value:
+    val latch = new CountDownLatch(2)
+    val lazyval = new TransientBestEffortLazyVal(() => {
+      numInitializerCalls.incrementAndGet()
+      latch.countDown()
+      latch.await()
+      new Object()
+    })
+
+    // Ensure no initialization happened before the lazy value was invoked
+    assert(numInitializerCalls.get() === 0)
+
+    // Two threads concurrently invoke the lazy value
+    implicit val ec: ExecutionContext = ExecutionContext.global
+    val future1 = Future { lazyval() }
+    val future2 = Future { lazyval() }
+    val value1 = ThreadUtils.awaitResult(future1, 10.seconds)
+    val value2 = ThreadUtils.awaitResult(future2, 10.seconds)
+
+    // The initializer should have been invoked twice (due to how we set up the
+    // race condition via the latch):
+    assert(numInitializerCalls.get() === 2)
+
+    // But the value should only have been computed once:
+    assert(value1 eq value2)
+
+    // Ensure the subsequent invocation serves the same object
+    assert(lazyval() eq value1)
+    assert(numInitializerCalls.get() === 2)
+  }
+
+  test("TransientBestEffortLazyVal is serializable") {
+    val lazyval = new TransientBestEffortLazyVal(() => "test")
+
+    // serialize and deserialize before first invocation
+    val lazyval2 = roundtripSerialize(lazyval)
+    assert(lazyval2() === "test")
+
+    // first invocation
+    assert(lazyval() === "test")
+
+    // serialize and deserialize after first invocation
+    val lazyval3 = roundtripSerialize(lazyval)
+    assert(lazyval3() === "test")
+  }
+
+  test("TransientBestEffortLazyVal is serializable: unserializable value") {
+    val lazyval = new TransientBestEffortLazyVal(() => new Object())
+
+    // serialize and deserialize before first invocation
+    val lazyval2 = roundtripSerialize(lazyval)
+    assert(lazyval2() != null)
+
+    // first invocation
+    assert(lazyval() != null)
+
+    // serialize and deserialize after first invocation
+    val lazyval3 = roundtripSerialize(lazyval)
+    assert(lazyval3() != null)
+  }
+
+  test("TransientBestEffortLazyVal is serializable: failure in compute function") {
+    val lazyval = new TransientBestEffortLazyVal[String](() => throw new RuntimeException("test"))
+
+    // serialize and deserialize before first invocation
+    val lazyval2 = roundtripSerialize(lazyval)
+    val e2 = intercept[RuntimeException] {
+      val v = lazyval2()
+    }
+    assert(e2.getMessage.contains("test"))
+
+    // initialization failure
+    val e = intercept[RuntimeException] {
+      val v = lazyval()
+    }
+    assert(e.getMessage.contains("test"))
+
+    // serialize and deserialize after initialization failure
+    val lazyval3 = roundtripSerialize(lazyval)
+    val e3 = intercept[RuntimeException] {
+      val v = lazyval3()
+    }
+    assert(e3.getMessage.contains("test"))
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/TransientLazySuite.scala b/core/src/test/scala/org/apache/spark/util/TransientLazySuite.scala
deleted file mode 100644
index c0754ee063d67..0000000000000
--- a/core/src/test/scala/org/apache/spark/util/TransientLazySuite.scala
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.util
-
-import java.io.{ByteArrayOutputStream, ObjectOutputStream}
-
-import org.apache.spark.SparkFunSuite
-
-class TransientLazySuite extends SparkFunSuite {
-
-  test("TransientLazy val works") {
-    var test: Option[Object] = None
-
-    val lazyval = new TransientLazy({
-      test = Some(new Object())
-      test
-    })
-
-    // Ensure no initialization happened before the lazy value was dereferenced
-    assert(test.isEmpty)
-
-    // Ensure the first invocation creates a new object
-    assert(lazyval() == test && test.isDefined)
-
-    // Ensure the subsequent invocation serves the same object
-    assert(lazyval() == test && test.isDefined)
-  }
-
-  test("TransientLazy val is serializable") {
-    val lazyval = new TransientLazy({
-      new Object()
-    })
-
-    // Ensure serializable before the dereference
-    val oos = new ObjectOutputStream(new ByteArrayOutputStream())
-    oos.writeObject(lazyval)
-
-    val dereferenced = lazyval()
-
-    // Ensure serializable after the dereference
-    val oos2 = new ObjectOutputStream(new ByteArrayOutputStream())
-    oos2.writeObject(lazyval)
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index a6e3345fc600c..baa748573d75b 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -22,6 +22,7 @@ import java.lang.reflect.Field
 import java.net.{BindException, ServerSocket, URI}
 import java.nio.{ByteBuffer, ByteOrder}
 import java.nio.charset.StandardCharsets.UTF_8
+import java.nio.file.{Files => JFiles}
 import java.text.DecimalFormatSymbols
 import java.util.Locale
 import java.util.concurrent.TimeUnit
@@ -731,6 +732,43 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties {
     assert(!sourceFile2.exists())
   }
 
+  test("SPARK-50716: deleteRecursively - SymbolicLink To File") {
+    val tempDir = Utils.createTempDir()
+    val sourceFile = new File(tempDir, "foo.txt")
+    JFiles.write(sourceFile.toPath, "Some content".getBytes)
+    assert(sourceFile.exists())
+
+    val symlinkFile = new File(tempDir, "bar.txt")
+    JFiles.createSymbolicLink(symlinkFile.toPath, sourceFile.toPath)
+
+    // Check that the symlink was created successfully
+    assert(JFiles.isSymbolicLink(symlinkFile.toPath))
+    Utils.deleteRecursively(tempDir)
+
+    // Verify that everything is deleted
+    assert(!tempDir.exists)
+  }
+
+  test("SPARK-50716: deleteRecursively - SymbolicLink To Dir") {
+    val tempDir = Utils.createTempDir()
+    val sourceDir = new File(tempDir, "sourceDir")
+    assert(sourceDir.mkdir())
+    val sourceFile = new File(sourceDir, "file.txt")
+    JFiles.write(sourceFile.toPath, "Some content".getBytes)
+
+    val symlinkDir = new File(tempDir, "targetDir")
+    JFiles.createSymbolicLink(symlinkDir.toPath, sourceDir.toPath)
+
+    // Check that the symlink was created successfully
+    assert(JFiles.isSymbolicLink(symlinkDir.toPath))
+
+    // Now delete recursively
+    Utils.deleteRecursively(tempDir)
+
+    // Verify that everything is deleted
+    assert(!tempDir.exists)
+  }
+
   test("loading properties from file") {
     withTempDir { tmpDir =>
       val outFile = File.createTempFile("test-load-spark-properties", "test", tmpDir)
@@ -1581,26 +1619,14 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties {
     //   at org.apache.spark.util.UtilsSuite.$anonfun$new$165(UtilsSuite.scala:1658)
     //   ... 56 more
     // scalastyle:on line.size.limit
-    val origSt = e1.getSuppressed.find(
-      _.getMessage == Utils.TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE)
+    val origSt = e1.getSuppressed.find(_.isInstanceOf[Utils.OriginalTryStackTraceException])
     assert(origSt.isDefined)
     assert(origSt.get.getStackTrace.exists(_.getMethodName == "throwException"))
     assert(origSt.get.getStackTrace.exists(_.getMethodName == "callDoTry"))
 
-    // The stack trace under Try should be in the suppressed exceptions.
-    // Example:
-    // Suppressed: java.lang.Exception: Stacktrace under doTryWithCallerStacktrace
-    //   at org.apache.spark.util.UtilsSuite.throwException(UtilsSuite.scala: 1640)
-    //   at org.apache.spark.util.UtilsSuite.$anonfun$callDoTry$1(UtilsSuite.scala: 1645)
-    //   at scala.util.Try$.apply(Try.scala: 213)
-    //   at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala: 1586)
-    val trySt = e1.getSuppressed.find(
-      _.getMessage == Utils.TRY_WITH_CALLER_STACKTRACE_TRY_STACKTRACE)
-    assert(trySt.isDefined)
-    // calls under callDoTry should be present.
-    assert(trySt.get.getStackTrace.exists(_.getMethodName == "throwException"))
-    // callDoTry should be removed.
-    assert(!trySt.get.getStackTrace.exists(_.getMethodName == "callDoTry"))
+    // Should save the depth of the stack trace under doTryWithCallerStacktrace.
+    assert(origSt.get.asInstanceOf[Utils.OriginalTryStackTraceException]
+      .doTryWithCallerStacktraceDepth == 4)
 
     val e2 = intercept[Exception] {
       callGetTryAgain(t)
@@ -1633,6 +1659,152 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties {
     assert(st1Again.exists(_.getMethodName == "callGetTryAgain"))
     assert(!st1Again.exists(_.getMethodName == "callGetTry"))
   }
+
+  private def callGetTryFromNested(t: Try[String]): String = {
+    Utils.getTryWithCallerStacktrace(t)
+  }
+
+  private def callDoTryNested(): Try[String] = {
+    Utils.doTryWithCallerStacktrace {
+      val t = callDoTry()
+      val e = intercept[Exception] {
+        callGetTryFromNested(t)
+      }
+
+      // Uncomment for manual inspection
+      //
+      // println("\nIntercepted in callDoTryNested:")
+      // e.printStackTrace()
+      //
+      // scalastyle:off line.size.limit
+      // java.lang.Exception: test
+      //  at org.apache.spark.util.UtilsSuite.throwException(UtilsSuite.scala:1529)
+      //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTry$1(UtilsSuite.scala:1534)
+      //  at scala.util.Try$.apply(Try.scala:217)
+      //  at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1377)
+      //  at org.apache.spark.util.Utils$.getTryWithCallerStacktrace(Utils.scala:1438)
+      // ----> at org.apache.spark.util.UtilsSuite.callGetTryFromNested(UtilsSuite.scala:1626) <---- STITCHED.
+      //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTryNested$2(UtilsSuite.scala:1633)
+      //  at org.scalatest.Assertions.intercept(Assertions.scala:749)
+      //  at org.scalatest.Assertions.intercept$(Assertions.scala:746)
+      //  at org.scalatest.funsuite.AnyFunSuite.intercept(AnyFunSuite.scala:1564)
+      //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTryNested$1(UtilsSuite.scala:1632)
+      //  at scala.util.Try$.apply(Try.scala:217)
+      //  at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1377)
+      //  at org.apache.spark.util.UtilsSuite.callDoTryNested(UtilsSuite.scala:1630)
+      //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTryNestedNested$1(UtilsSuite.scala:1655)
+      //  at scala.util.Try$.apply(Try.scala:217)
+      //  at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1377)
+      //  at org.apache.spark.util.UtilsSuite.callDoTryNestedNested(UtilsSuite.scala:1654)
+      //  at org.apache.spark.util.UtilsSuite.$anonfun$new$172(UtilsSuite.scala:1674)
+      // ...
+      // Suppressed: org.apache.spark.util.Utils$OriginalTryStackTraceException: Full stacktrace of original doTryWithCallerStacktrace caller
+      //  at org.apache.spark.util.UtilsSuite.throwException(UtilsSuite.scala:1529)
+      //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTry$1(UtilsSuite.scala:1534)
+      //  at scala.util.Try$.apply(Try.scala:217)
+      //  at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1377)
+      //  at org.apache.spark.util.UtilsSuite.callDoTry(UtilsSuite.scala:1534)
+      //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTryNested$1(UtilsSuite.scala:1631)
+      //  ...
+      // scalastyle:on line.size.limit
+
+      assert(e.getStackTrace.exists(_.getMethodName == "callGetTryFromNested"))
+      assert(!e.getStackTrace.exists(_.getMethodName == "callGetTryFromNestedNested"))
+      assert(!e.getStackTrace.exists(_.getMethodName == "callGetTry"))
+      assert(e.getSuppressed.length == 1)
+
+      Utils.getTryWithCallerStacktrace(t)
+    }
+  }
+
+  private def callGetTryFromNestedNested(t: Try[String]): String = {
+    Utils.getTryWithCallerStacktrace(t)
+  }
+
+  private def callDoTryNestedNested(): Try[String] = {
+    Utils.doTryWithCallerStacktrace {
+      val t = callDoTryNested()
+      val e = intercept[Exception] {
+        callGetTryFromNestedNested(t)
+      }
+
+      // Uncomment for manual inspection
+      //
+      // println("\nIntercepted in callDoTryNestedNested:")
+      // e.printStackTrace()
+      //
+      // scalastyle:off line.size.limit
+      // java.lang.Exception: test
+      //  at org.apache.spark.util.UtilsSuite.throwException(UtilsSuite.scala:1529)
+      //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTry$1(UtilsSuite.scala:1534)
+      //  at scala.util.Try$.apply(Try.scala:217)
+      //  at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1377)
+      //  at org.apache.spark.util.UtilsSuite.callDoTry(UtilsSuite.scala:1534)
+      //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTryNested$1(UtilsSuite.scala:1631)
+      //  at scala.util.Try$.apply(Try.scala:217)
+      //  at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1377)
+      //  at org.apache.spark.util.Utils$.getTryWithCallerStacktrace(Utils.scala:1438)
+      // ----> at org.apache.spark.util.UtilsSuite.callGetTryFromNestedNested(UtilsSuite.scala:1650) <---- STITCHED.
+      //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTryNestedNested$2(UtilsSuite.scala:1657)
+      //  at org.scalatest.Assertions.intercept(Assertions.scala:749)
+      //  at org.scalatest.Assertions.intercept$(Assertions.scala:746)
+      //  at org.scalatest.funsuite.AnyFunSuite.intercept(AnyFunSuite.scala:1564)
+      //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTryNestedNested$1(UtilsSuite.scala:1656)
+      //  at scala.util.Try$.apply(Try.scala:217)
+      //  at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1377)
+      //  at org.apache.spark.util.UtilsSuite.callDoTryNestedNested(UtilsSuite.scala:1654)
+      //  at org.apache.spark.util.UtilsSuite.$anonfun$new$172(UtilsSuite.scala:1674)
+      // scalastyle:on line.size.limit
+
+      assert(e.getStackTrace.exists(_.getMethodName == "callGetTryFromNestedNested"))
+      assert(!e.getStackTrace.exists(_.getMethodName == "callGetTryFromNested"))
+      assert(!e.getStackTrace.exists(_.getMethodName == "callGetTry"))
+      assert(e.getSuppressed.length == 1)
+
+      Utils.getTryWithCallerStacktrace(t)
+    }
+  }
+
+  test("nested doTryWithCallerStacktrace and getTryWithCallerStacktrace") {
+    val t = callDoTryNestedNested()
+
+    val e = intercept[Exception] {
+      callGetTry(t)
+    }
+
+    // Uncomment for manual inspection
+    //
+    // println("\nIntercepted in test:")
+    // e.printStackTrace()
+    //
+    // scalastyle:off line.size.limit
+    // java.lang.Exception: test
+    //  at org.apache.spark.util.UtilsSuite.throwException(UtilsSuite.scala:1529)
+    //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTry$1(UtilsSuite.scala:1534)
+    //  at scala.util.Try$.apply(Try.scala:217)
+    //  at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1377)
+    //  at org.apache.spark.util.UtilsSuite.callDoTry(UtilsSuite.scala:1534)
+    //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTryNested$1(UtilsSuite.scala:1631)
+    //  at scala.util.Try$.apply(Try.scala:217)
+    //  at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1377)
+    //  at org.apache.spark.util.UtilsSuite.callDoTryNested(UtilsSuite.scala:1630)
+    //  at org.apache.spark.util.UtilsSuite.$anonfun$callDoTryNestedNested$1(UtilsSuite.scala:1655)
+    //  at scala.util.Try$.apply(Try.scala:217)
+    //  at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1377)
+    //  at org.apache.spark.util.Utils$.getTryWithCallerStacktrace(Utils.scala:1438)
+    // ---->  at org.apache.spark.util.UtilsSuite.callGetTry(UtilsSuite.scala:1539) <---- STITCHED.
+    //  at org.apache.spark.util.UtilsSuite.$anonfun$new$173(UtilsSuite.scala:1677)
+    //  at org.scalatest.Assertions.intercept(Assertions.scala:749)
+    //  at org.scalatest.Assertions.intercept$(Assertions.scala:746)
+    //  at org.scalatest.funsuite.AnyFunSuite.intercept(AnyFunSuite.scala:1564)
+    //  at org.apache.spark.util.UtilsSuite.$anonfun$new$172(UtilsSuite.scala:1676)
+    // scalastyle:on line.size.limit
+
+    assert(e.getStackTrace.exists(_.getMethodName == "callGetTry"))
+    assert(!e.getStackTrace.exists(_.getMethodName == "callGetTryFromNested"))
+    assert(!e.getStackTrace.exists(_.getMethodName == "callGetTryFromNestedNested"))
+    assert(e.getSuppressed.length == 1)
+  }
 }
 
 private class SimpleExtension
diff --git a/dev/check-protos.py b/dev/check-protos.py
index bfca8b27be21c..4ddd1f1058820 100755
--- a/dev/check-protos.py
+++ b/dev/check-protos.py
@@ -44,8 +44,8 @@ def run_cmd(cmd):
 
 
 def check_protos(module_name, cmp_path, proto_path):
-    print(f"Start checking the generated codes in pyspark-${module_name}.")
-    with tempfile.TemporaryDirectory(prefix=f"check_${module_name}__protos") as tmp:
+    print(f"Start checking the generated codes in pyspark-{module_name}.")
+    with tempfile.TemporaryDirectory(prefix=f"check_{module_name}__protos") as tmp:
         run_cmd(f"{SPARK_HOME}/dev/gen-protos.sh {module_name} {tmp}")
         result = filecmp.dircmp(
             f"{SPARK_HOME}/{cmp_path}",
@@ -71,12 +71,12 @@ def check_protos(module_name, cmp_path, proto_path):
             success = False
 
         if success:
-            print(f"Finish checking the generated codes in pyspark-${module_name}: SUCCESS")
+            print(f"Finish checking the generated codes in pyspark-{module_name}: SUCCESS")
         else:
             fail(
-                "Generated files for pyspark-connect are out of sync! "
-                f"If you have touched files under ${proto_path}, "
-                f"please run ./dev/${module_name}-gen-protos.sh. "
+                f"Generated files for pyspark-{module_name} are out of sync! "
+                f"If you have touched files under {proto_path}, "
+                f"please run ./dev/{module_name}-gen-protos.sh. "
                 "If you haven't touched any file above, please rebase your PR against main branch."
             )
 
diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml
index 4285028109419..6c50718e27fe5 100644
--- a/dev/checkstyle.xml
+++ b/dev/checkstyle.xml
@@ -172,32 +172,10 @@
             <property name="format" value="throw new \w+Error\("/>
             <property name="message" value="Avoid throwing error in application code."/>
         </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="Objects\.toStringHelper"/>
-            <property name="message" value="Avoid using Object.toStringHelper. Use ToStringBuilder instead." />
-        </module>
         <module name="RegexpSinglelineJava">
             <property name="format" value="new (java\.lang\.)?(Byte|Integer|Long|Short)\("/>
             <property name="message" value="Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors." />
         </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="Files\.createTempDir\("/>
-            <property name="message"
-              value="Avoid using com.google.common.io.Files.createTempDir() due to CVE-2020-8908.
-                Use org.apache.spark.network.util.JavaUtils.createTempDir() instead." />
-        </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="FileBackedOutputStream"/>
-            <property name="message" value="Avoid using FileBackedOutputStream due to CVE-2023-2976." />
-        </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="AtomicDoubleArray"/>
-            <property name="message" value="Avoid using AtomicDoubleArray due to CVE-2018-10237." />
-        </module>
-        <module name="RegexpSinglelineJava">
-            <property name="format" value="CompoundOrdering"/>
-            <property name="message" value="Avoid using CompoundOrdering due to CVE-2018-10237." />
-        </module>
         <module name="RegexpSinglelineJava">
             <property name="format" value="@Test\(expected"/>
             <property name="message" value="Please use the `assertThrows` method to test for exceptions." />
diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index 8a9890bf68dde..7b97aba21205b 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -23,10 +23,10 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Release Manager Image"
 # Overwrite this label to avoid exposing the underlying Ubuntu OS version label
 LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE 20240318
+ENV FULL_REFRESH_DATE=20240318
 
-ENV DEBIAN_FRONTEND noninteractive
-ENV DEBCONF_NONINTERACTIVE_SEEN true
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
 
 RUN apt-get update && apt-get install -y \
     build-essential \
@@ -88,7 +88,7 @@ RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown',  \
     Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
 
 # See more in SPARK-39735
-ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
+ENV R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
 
 
 RUN add-apt-repository ppa:pypy/ppa
@@ -102,7 +102,7 @@ RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.3' scipy coverage matp
 
 ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1"
 # Python deps for Spark Connect
-ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.28.3 googleapis-common-protos==1.65.0"
+ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0"
 
 # Install Python 3.10 packages
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
@@ -130,7 +130,7 @@ RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CON
 # See 'docutils<0.18.0' in SPARK-39421
 RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
 ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
-'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
+'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
 RUN python3.9 -m pip list
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 4603ae2fc5548..59aabdf5fff19 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -16,11 +16,11 @@ antlr4-runtime/4.13.1//antlr4-runtime-4.13.1.jar
 aopalliance-repackaged/3.0.6//aopalliance-repackaged-3.0.6.jar
 arpack/3.0.3//arpack-3.0.3.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
-arrow-format/18.0.0//arrow-format-18.0.0.jar
-arrow-memory-core/18.0.0//arrow-memory-core-18.0.0.jar
-arrow-memory-netty-buffer-patch/18.0.0//arrow-memory-netty-buffer-patch-18.0.0.jar
-arrow-memory-netty/18.0.0//arrow-memory-netty-18.0.0.jar
-arrow-vector/18.0.0//arrow-vector-18.0.0.jar
+arrow-format/18.1.0//arrow-format-18.1.0.jar
+arrow-memory-core/18.1.0//arrow-memory-core-18.1.0.jar
+arrow-memory-netty-buffer-patch/18.1.0//arrow-memory-netty-buffer-patch-18.1.0.jar
+arrow-memory-netty/18.1.0//arrow-memory-netty-18.1.0.jar
+arrow-vector/18.1.0//arrow-vector-18.1.0.jar
 audience-annotations/0.12.0//audience-annotations-0.12.0.jar
 avro-ipc/1.12.0//avro-ipc-1.12.0.jar
 avro-mapred/1.12.0//avro-mapred-1.12.0.jar
@@ -33,11 +33,11 @@ breeze-macros_2.13/2.1.0//breeze-macros_2.13-2.1.0.jar
 breeze_2.13/2.1.0//breeze_2.13-2.1.0.jar
 bundle/2.24.6//bundle-2.24.6.jar
 cats-kernel_2.13/2.8.0//cats-kernel_2.13-2.8.0.jar
-checker-qual/3.42.0//checker-qual-3.42.0.jar
+checker-qual/3.43.0//checker-qual-3.43.0.jar
 chill-java/0.10.0//chill-java-0.10.0.jar
 chill_2.13/0.10.0//chill_2.13-0.10.0.jar
 commons-cli/1.9.0//commons-cli-1.9.0.jar
-commons-codec/1.17.1//commons-codec-1.17.1.jar
+commons-codec/1.17.2//commons-codec-1.17.2.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-collections4/4.4//commons-collections4-4.4.jar
 commons-compiler/3.1.9//commons-compiler-3.1.9.jar
@@ -49,7 +49,7 @@ commons-lang/2.6//commons-lang-2.6.jar
 commons-lang3/3.17.0//commons-lang3-3.17.0.jar
 commons-math3/3.6.1//commons-math3-3.6.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
-commons-text/1.12.0//commons-text-1.12.0.jar
+commons-text/1.13.0//commons-text-1.13.0.jar
 compress-lzf/1.1.2//compress-lzf-1.1.2.jar
 curator-client/5.7.1//curator-client-5.7.1.jar
 curator-framework/5.7.1//curator-framework-5.7.1.jar
@@ -63,14 +63,14 @@ derby/10.16.1.1//derby-10.16.1.1.jar
 derbyshared/10.16.1.1//derbyshared-10.16.1.1.jar
 derbytools/10.16.1.1//derbytools-10.16.1.1.jar
 dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
-error_prone_annotations/2.26.1//error_prone_annotations-2.26.1.jar
+error_prone_annotations/2.28.0//error_prone_annotations-2.28.0.jar
 esdk-obs-java/3.20.4.2//esdk-obs-java-3.20.4.2.jar
 failureaccess/1.0.2//failureaccess-1.0.2.jar
 flatbuffers-java/24.3.25//flatbuffers-java-24.3.25.jar
 gcs-connector/hadoop3-2.2.25/shaded/gcs-connector-hadoop3-2.2.25-shaded.jar
 gmetric4j/1.0.10//gmetric4j-1.0.10.jar
 gson/2.11.0//gson-2.11.0.jar
-guava/33.2.1-jre//guava-33.2.1-jre.jar
+guava/33.3.1-jre//guava-33.3.1-jre.jar
 hadoop-aliyun/3.4.1//hadoop-aliyun-3.4.1.jar
 hadoop-annotations/3.4.1//hadoop-annotations-3.4.1.jar
 hadoop-aws/3.4.1//hadoop-aws-3.4.1.jar
@@ -103,18 +103,17 @@ httpcore/4.4.16//httpcore-4.4.16.jar
 icu4j/76.1//icu4j-76.1.jar
 ini4j/0.5.4//ini4j-0.5.4.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
-ivy/2.5.2//ivy-2.5.2.jar
+ivy/2.5.3//ivy-2.5.3.jar
 j2objc-annotations/3.0.0//j2objc-annotations-3.0.0.jar
-jackson-annotations/2.18.1//jackson-annotations-2.18.1.jar
+jackson-annotations/2.18.2//jackson-annotations-2.18.2.jar
 jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
-jackson-core/2.18.1//jackson-core-2.18.1.jar
-jackson-databind/2.18.1//jackson-databind-2.18.1.jar
-jackson-dataformat-cbor/2.18.1//jackson-dataformat-cbor-2.18.1.jar
-jackson-dataformat-yaml/2.18.1//jackson-dataformat-yaml-2.18.1.jar
-jackson-datatype-jdk8/2.17.0//jackson-datatype-jdk8-2.17.0.jar
-jackson-datatype-jsr310/2.18.1//jackson-datatype-jsr310-2.18.1.jar
+jackson-core/2.18.2//jackson-core-2.18.2.jar
+jackson-databind/2.18.2//jackson-databind-2.18.2.jar
+jackson-dataformat-cbor/2.18.2//jackson-dataformat-cbor-2.18.2.jar
+jackson-dataformat-yaml/2.18.2//jackson-dataformat-yaml-2.18.2.jar
+jackson-datatype-jsr310/2.18.2//jackson-datatype-jsr310-2.18.2.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.13/2.18.1//jackson-module-scala_2.13-2.18.1.jar
+jackson-module-scala_2.13/2.18.2//jackson-module-scala_2.13-2.18.2.jar
 jakarta.annotation-api/2.1.1//jakarta.annotation-api-2.1.1.jar
 jakarta.inject-api/2.0.1//jakarta.inject-api-2.0.1.jar
 jakarta.servlet-api/5.0.0//jakarta.servlet-api-5.0.0.jar
@@ -159,74 +158,75 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/2.0.16//jul-to-slf4j-2.0.16.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client-api/6.13.4//kubernetes-client-api-6.13.4.jar
-kubernetes-client/6.13.4//kubernetes-client-6.13.4.jar
-kubernetes-httpclient-okhttp/6.13.4//kubernetes-httpclient-okhttp-6.13.4.jar
-kubernetes-model-admissionregistration/6.13.4//kubernetes-model-admissionregistration-6.13.4.jar
-kubernetes-model-apiextensions/6.13.4//kubernetes-model-apiextensions-6.13.4.jar
-kubernetes-model-apps/6.13.4//kubernetes-model-apps-6.13.4.jar
-kubernetes-model-autoscaling/6.13.4//kubernetes-model-autoscaling-6.13.4.jar
-kubernetes-model-batch/6.13.4//kubernetes-model-batch-6.13.4.jar
-kubernetes-model-certificates/6.13.4//kubernetes-model-certificates-6.13.4.jar
-kubernetes-model-common/6.13.4//kubernetes-model-common-6.13.4.jar
-kubernetes-model-coordination/6.13.4//kubernetes-model-coordination-6.13.4.jar
-kubernetes-model-core/6.13.4//kubernetes-model-core-6.13.4.jar
-kubernetes-model-discovery/6.13.4//kubernetes-model-discovery-6.13.4.jar
-kubernetes-model-events/6.13.4//kubernetes-model-events-6.13.4.jar
-kubernetes-model-extensions/6.13.4//kubernetes-model-extensions-6.13.4.jar
-kubernetes-model-flowcontrol/6.13.4//kubernetes-model-flowcontrol-6.13.4.jar
-kubernetes-model-gatewayapi/6.13.4//kubernetes-model-gatewayapi-6.13.4.jar
-kubernetes-model-metrics/6.13.4//kubernetes-model-metrics-6.13.4.jar
-kubernetes-model-networking/6.13.4//kubernetes-model-networking-6.13.4.jar
-kubernetes-model-node/6.13.4//kubernetes-model-node-6.13.4.jar
-kubernetes-model-policy/6.13.4//kubernetes-model-policy-6.13.4.jar
-kubernetes-model-rbac/6.13.4//kubernetes-model-rbac-6.13.4.jar
-kubernetes-model-resource/6.13.4//kubernetes-model-resource-6.13.4.jar
-kubernetes-model-scheduling/6.13.4//kubernetes-model-scheduling-6.13.4.jar
-kubernetes-model-storageclass/6.13.4//kubernetes-model-storageclass-6.13.4.jar
+kubernetes-client-api/7.0.1//kubernetes-client-api-7.0.1.jar
+kubernetes-client/7.0.1//kubernetes-client-7.0.1.jar
+kubernetes-httpclient-vertx/7.0.1//kubernetes-httpclient-vertx-7.0.1.jar
+kubernetes-model-admissionregistration/7.0.1//kubernetes-model-admissionregistration-7.0.1.jar
+kubernetes-model-apiextensions/7.0.1//kubernetes-model-apiextensions-7.0.1.jar
+kubernetes-model-apps/7.0.1//kubernetes-model-apps-7.0.1.jar
+kubernetes-model-autoscaling/7.0.1//kubernetes-model-autoscaling-7.0.1.jar
+kubernetes-model-batch/7.0.1//kubernetes-model-batch-7.0.1.jar
+kubernetes-model-certificates/7.0.1//kubernetes-model-certificates-7.0.1.jar
+kubernetes-model-common/7.0.1//kubernetes-model-common-7.0.1.jar
+kubernetes-model-coordination/7.0.1//kubernetes-model-coordination-7.0.1.jar
+kubernetes-model-core/7.0.1//kubernetes-model-core-7.0.1.jar
+kubernetes-model-discovery/7.0.1//kubernetes-model-discovery-7.0.1.jar
+kubernetes-model-events/7.0.1//kubernetes-model-events-7.0.1.jar
+kubernetes-model-extensions/7.0.1//kubernetes-model-extensions-7.0.1.jar
+kubernetes-model-flowcontrol/7.0.1//kubernetes-model-flowcontrol-7.0.1.jar
+kubernetes-model-gatewayapi/7.0.1//kubernetes-model-gatewayapi-7.0.1.jar
+kubernetes-model-metrics/7.0.1//kubernetes-model-metrics-7.0.1.jar
+kubernetes-model-networking/7.0.1//kubernetes-model-networking-7.0.1.jar
+kubernetes-model-node/7.0.1//kubernetes-model-node-7.0.1.jar
+kubernetes-model-policy/7.0.1//kubernetes-model-policy-7.0.1.jar
+kubernetes-model-rbac/7.0.1//kubernetes-model-rbac-7.0.1.jar
+kubernetes-model-resource/7.0.1//kubernetes-model-resource-7.0.1.jar
+kubernetes-model-scheduling/7.0.1//kubernetes-model-scheduling-7.0.1.jar
+kubernetes-model-storageclass/7.0.1//kubernetes-model-storageclass-7.0.1.jar
 lapack/3.0.3//lapack-3.0.3.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.16.0//libthrift-0.16.0.jar
 listenablefuture/9999.0-empty-to-avoid-conflict-with-guava//listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar
-log4j-1.2-api/2.24.1//log4j-1.2-api-2.24.1.jar
-log4j-api/2.24.1//log4j-api-2.24.1.jar
-log4j-core/2.24.1//log4j-core-2.24.1.jar
-log4j-layout-template-json/2.24.1//log4j-layout-template-json-2.24.1.jar
-log4j-slf4j2-impl/2.24.1//log4j-slf4j2-impl-2.24.1.jar
-logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
+log4j-1.2-api/2.24.3//log4j-1.2-api-2.24.3.jar
+log4j-api/2.24.3//log4j-api-2.24.3.jar
+log4j-core/2.24.3//log4j-core-2.24.3.jar
+log4j-layout-template-json/2.24.3//log4j-layout-template-json-2.24.3.jar
+log4j-slf4j2-impl/2.24.3//log4j-slf4j2-impl-2.24.3.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
-metrics-core/4.2.28//metrics-core-4.2.28.jar
-metrics-graphite/4.2.28//metrics-graphite-4.2.28.jar
-metrics-jmx/4.2.28//metrics-jmx-4.2.28.jar
-metrics-json/4.2.28//metrics-json-4.2.28.jar
-metrics-jvm/4.2.28//metrics-jvm-4.2.28.jar
+metrics-core/4.2.29//metrics-core-4.2.29.jar
+metrics-graphite/4.2.29//metrics-graphite-4.2.29.jar
+metrics-jmx/4.2.29//metrics-jmx-4.2.29.jar
+metrics-json/4.2.29//metrics-json-4.2.29.jar
+metrics-jvm/4.2.29//metrics-jvm-4.2.29.jar
 minlog/1.3.0//minlog-1.3.0.jar
-netty-all/4.1.114.Final//netty-all-4.1.114.Final.jar
-netty-buffer/4.1.114.Final//netty-buffer-4.1.114.Final.jar
-netty-codec-http/4.1.114.Final//netty-codec-http-4.1.114.Final.jar
-netty-codec-http2/4.1.114.Final//netty-codec-http2-4.1.114.Final.jar
-netty-codec-socks/4.1.114.Final//netty-codec-socks-4.1.114.Final.jar
-netty-codec/4.1.114.Final//netty-codec-4.1.114.Final.jar
-netty-common/4.1.114.Final//netty-common-4.1.114.Final.jar
-netty-handler-proxy/4.1.114.Final//netty-handler-proxy-4.1.114.Final.jar
-netty-handler/4.1.114.Final//netty-handler-4.1.114.Final.jar
-netty-resolver/4.1.114.Final//netty-resolver-4.1.114.Final.jar
+netty-all/4.1.115.Final//netty-all-4.1.115.Final.jar
+netty-buffer/4.1.115.Final//netty-buffer-4.1.115.Final.jar
+netty-codec-dns/4.1.115.Final//netty-codec-dns-4.1.115.Final.jar
+netty-codec-http/4.1.115.Final//netty-codec-http-4.1.115.Final.jar
+netty-codec-http2/4.1.115.Final//netty-codec-http2-4.1.115.Final.jar
+netty-codec-socks/4.1.115.Final//netty-codec-socks-4.1.115.Final.jar
+netty-codec/4.1.115.Final//netty-codec-4.1.115.Final.jar
+netty-common/4.1.115.Final//netty-common-4.1.115.Final.jar
+netty-handler-proxy/4.1.115.Final//netty-handler-proxy-4.1.115.Final.jar
+netty-handler/4.1.115.Final//netty-handler-4.1.115.Final.jar
+netty-resolver-dns/4.1.115.Final//netty-resolver-dns-4.1.115.Final.jar
+netty-resolver/4.1.115.Final//netty-resolver-4.1.115.Final.jar
 netty-tcnative-boringssl-static/2.0.69.Final/linux-aarch_64/netty-tcnative-boringssl-static-2.0.69.Final-linux-aarch_64.jar
 netty-tcnative-boringssl-static/2.0.69.Final/linux-x86_64/netty-tcnative-boringssl-static-2.0.69.Final-linux-x86_64.jar
 netty-tcnative-boringssl-static/2.0.69.Final/osx-aarch_64/netty-tcnative-boringssl-static-2.0.69.Final-osx-aarch_64.jar
 netty-tcnative-boringssl-static/2.0.69.Final/osx-x86_64/netty-tcnative-boringssl-static-2.0.69.Final-osx-x86_64.jar
 netty-tcnative-boringssl-static/2.0.69.Final/windows-x86_64/netty-tcnative-boringssl-static-2.0.69.Final-windows-x86_64.jar
 netty-tcnative-classes/2.0.69.Final//netty-tcnative-classes-2.0.69.Final.jar
-netty-transport-classes-epoll/4.1.114.Final//netty-transport-classes-epoll-4.1.114.Final.jar
-netty-transport-classes-kqueue/4.1.114.Final//netty-transport-classes-kqueue-4.1.114.Final.jar
-netty-transport-native-epoll/4.1.114.Final/linux-aarch_64/netty-transport-native-epoll-4.1.114.Final-linux-aarch_64.jar
-netty-transport-native-epoll/4.1.114.Final/linux-riscv64/netty-transport-native-epoll-4.1.114.Final-linux-riscv64.jar
-netty-transport-native-epoll/4.1.114.Final/linux-x86_64/netty-transport-native-epoll-4.1.114.Final-linux-x86_64.jar
-netty-transport-native-kqueue/4.1.114.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.114.Final-osx-aarch_64.jar
-netty-transport-native-kqueue/4.1.114.Final/osx-x86_64/netty-transport-native-kqueue-4.1.114.Final-osx-x86_64.jar
-netty-transport-native-unix-common/4.1.114.Final//netty-transport-native-unix-common-4.1.114.Final.jar
-netty-transport/4.1.114.Final//netty-transport-4.1.114.Final.jar
+netty-transport-classes-epoll/4.1.115.Final//netty-transport-classes-epoll-4.1.115.Final.jar
+netty-transport-classes-kqueue/4.1.115.Final//netty-transport-classes-kqueue-4.1.115.Final.jar
+netty-transport-native-epoll/4.1.115.Final/linux-aarch_64/netty-transport-native-epoll-4.1.115.Final-linux-aarch_64.jar
+netty-transport-native-epoll/4.1.115.Final/linux-riscv64/netty-transport-native-epoll-4.1.115.Final-linux-riscv64.jar
+netty-transport-native-epoll/4.1.115.Final/linux-x86_64/netty-transport-native-epoll-4.1.115.Final-linux-x86_64.jar
+netty-transport-native-kqueue/4.1.115.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.115.Final-osx-aarch_64.jar
+netty-transport-native-kqueue/4.1.115.Final/osx-x86_64/netty-transport-native-kqueue-4.1.115.Final-osx-x86_64.jar
+netty-transport-native-unix-common/4.1.115.Final//netty-transport-native-unix-common-4.1.115.Final.jar
+netty-transport/4.1.115.Final//netty-transport-4.1.115.Final.jar
 objenesis/3.3//objenesis-3.3.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.17.6//okio-1.17.6.jar
@@ -234,21 +234,21 @@ opencsv/2.3//opencsv-2.3.jar
 opentracing-api/0.33.0//opentracing-api-0.33.0.jar
 opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
 opentracing-util/0.33.0//opentracing-util-0.33.0.jar
-orc-core/2.0.3/shaded-protobuf/orc-core-2.0.3-shaded-protobuf.jar
+orc-core/2.1.0/shaded-protobuf/orc-core-2.1.0-shaded-protobuf.jar
 orc-format/1.0.0/shaded-protobuf/orc-format-1.0.0-shaded-protobuf.jar
-orc-mapreduce/2.0.3/shaded-protobuf/orc-mapreduce-2.0.3-shaded-protobuf.jar
-orc-shims/2.0.3//orc-shims-2.0.3.jar
+orc-mapreduce/2.1.0/shaded-protobuf/orc-mapreduce-2.1.0-shaded-protobuf.jar
+orc-shims/2.1.0//orc-shims-2.1.0.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
-parquet-column/1.14.4//parquet-column-1.14.4.jar
-parquet-common/1.14.4//parquet-common-1.14.4.jar
-parquet-encoding/1.14.4//parquet-encoding-1.14.4.jar
-parquet-format-structures/1.14.4//parquet-format-structures-1.14.4.jar
-parquet-hadoop/1.14.4//parquet-hadoop-1.14.4.jar
-parquet-jackson/1.14.4//parquet-jackson-1.14.4.jar
+parquet-column/1.15.0//parquet-column-1.15.0.jar
+parquet-common/1.15.0//parquet-common-1.15.0.jar
+parquet-encoding/1.15.0//parquet-encoding-1.15.0.jar
+parquet-format-structures/1.15.0//parquet-format-structures-1.15.0.jar
+parquet-hadoop/1.15.0//parquet-hadoop-1.15.0.jar
+parquet-jackson/1.15.0//parquet-jackson-1.15.0.jar
 pickle/1.5//pickle-1.5.jar
-py4j/0.10.9.7//py4j-0.10.9.7.jar
+py4j/0.10.9.9//py4j-0.10.9.9.jar
 remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar
 rocksdbjni/9.7.3//rocksdbjni-9.7.3.jar
 scala-collection-compat_2.13/2.7.0//scala-collection-compat_2.13-2.7.0.jar
@@ -259,7 +259,7 @@ scala-parser-combinators_2.13/2.4.0//scala-parser-combinators_2.13-2.4.0.jar
 scala-reflect/2.13.15//scala-reflect-2.13.15.jar
 scala-xml_2.13/2.3.0//scala-xml_2.13-2.3.0.jar
 slf4j-api/2.0.16//slf4j-api-2.0.16.jar
-snakeyaml-engine/2.7//snakeyaml-engine-2.7.jar
+snakeyaml-engine/2.8//snakeyaml-engine-2.8.jar
 snakeyaml/2.3//snakeyaml-2.3.jar
 snappy-java/1.1.10.7//snappy-java-1.1.10.7.jar
 spire-macros_2.13/0.18.0//spire-macros_2.13-0.18.0.jar
@@ -269,15 +269,19 @@ spire_2.13/0.18.0//spire_2.13-0.18.0.jar
 stax-api/1.0.1//stax-api-1.0.1.jar
 stream/2.9.8//stream-2.9.8.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
-threeten-extra/1.7.1//threeten-extra-1.7.1.jar
-tink/1.15.0//tink-1.15.0.jar
+threeten-extra/1.8.0//threeten-extra-1.8.0.jar
+tink/1.16.0//tink-1.16.0.jar
 transaction-api/1.1//transaction-api-1.1.jar
 univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar
+vertx-auth-common/4.5.11//vertx-auth-common-4.5.11.jar
+vertx-core/4.5.11//vertx-core-4.5.11.jar
+vertx-web-client/4.5.11//vertx-web-client-4.5.11.jar
+vertx-web-common/4.5.11//vertx-web-common-4.5.11.jar
 wildfly-openssl/1.1.3.Final//wildfly-openssl-1.1.3.Final.jar
 xbean-asm9-shaded/4.26//xbean-asm9-shaded-4.26.jar
 xmlschema-core/2.3.1//xmlschema-core-2.3.1.jar
 xz/1.10//xz-1.10.jar
-zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
+zjsonpatch/7.0.1//zjsonpatch-7.0.1.jar
 zookeeper-jute/3.9.3//zookeeper-jute-3.9.3.jar
 zookeeper/3.9.3//zookeeper-3.9.3.jar
-zstd-jni/1.5.6-7//zstd-jni-1.5.6-7.jar
+zstd-jni/1.5.6-9//zstd-jni-1.5.6-9.jar
diff --git a/dev/eslint.js b/dev/eslint.js
new file mode 100644
index 0000000000000..24b5170b436a9
--- /dev/null
+++ b/dev/eslint.js
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module.exports = {
+  "env": {
+    "browser": true,
+    "es6": true,
+    "jest": true
+  },
+  "extends": "eslint:recommended",
+  "rules": {
+    "indent": [
+      "error",
+      2,
+      {
+        "SwitchCase": 1,
+        "MemberExpression": "off"
+        }
+    ],
+    "no-unused-vars": ["error", {"argsIgnorePattern": "^_ignored_.*"}]
+  },
+  "ignorePatterns": [
+    "*.min.js",
+    "sorttable.js",
+    "jquery.mustache.js",
+    "dataTables.rowsGroup.js"
+  ],
+  "parserOptions": {
+    "sourceType": "module"
+  }
+}
diff --git a/dev/eslint.json b/dev/eslint.json
deleted file mode 100644
index 29692696a6df2..0000000000000
--- a/dev/eslint.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-  "env": {
-    "browser": true,
-    "es6": true,
-    "jest": true
-  },
-  "extends": "eslint:recommended",
-  "rules": {
-    "indent": [
-      "error",
-      2,
-      {
-        "SwitchCase": 1,
-        "MemberExpression": "off"
-        }
-    ],
-    "no-unused-vars": ["error", {"argsIgnorePattern": "^_ignored_.*"}]
-  },
-  "ignorePatterns": [
-    "*.min.js",
-    "sorttable.js",
-    "jquery.mustache.js",
-    "dataTables.rowsGroup.js"
-  ],
-  "parserOptions": {
-    "sourceType": "module"
-  }
-}
diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index 2817818cbc4e3..9cd6031023ca5 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -24,10 +24,10 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image"
 # Overwrite this label to avoid exposing the underlying Ubuntu OS version label
 LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE 20241119
+ENV FULL_REFRESH_DATE=20241119
 
-ENV DEBIAN_FRONTEND noninteractive
-ENV DEBCONF_NONINTERACTIVE_SEEN true
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
 
 RUN apt-get update && apt-get install -y \
     build-essential \
@@ -82,7 +82,7 @@ RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown',  \
     Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
 
 # See more in SPARK-39735
-ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
+ENV R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
 
 
 RUN add-apt-repository ppa:pypy/ppa
@@ -96,7 +96,7 @@ RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.3' scipy coverage matp
 
 ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
 # Python deps for Spark Connect
-ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.28.3 googleapis-common-protos==1.65.0 graphviz==0.20.3"
+ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3"
 
 # Install Python 3.10 packages
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
diff --git a/dev/lint-js b/dev/lint-js
index f3f7eac4f6b8e..1a94348b7430a 100755
--- a/dev/lint-js
+++ b/dev/lint-js
@@ -45,7 +45,7 @@ if ! npm ls eslint > /dev/null; then
   npm ci eslint
 fi
 
-npx eslint -c "$SPARK_ROOT_DIR/dev/eslint.json" ${LINT_TARGET_FILES[@]} | tee "$LINT_JS_REPORT_FILE_NAME"
+npx eslint -c "$SPARK_ROOT_DIR/dev/eslint.js" ${LINT_TARGET_FILES[@]} | tee "$LINT_JS_REPORT_FILE_NAME"
 lint_status=$?
 
 if [ "$lint_status" = "0" ] ; then
diff --git a/dev/lint-scala b/dev/lint-scala
index 23df146a8d1b4..30642a550401e 100755
--- a/dev/lint-scala
+++ b/dev/lint-scala
@@ -20,8 +20,10 @@
 SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 SPARK_ROOT_DIR="$(dirname $SCRIPT_DIR)"
 
+set -e
 "$SCRIPT_DIR/scalastyle" "$1"
 
+set +e
 # For Spark Connect, we actively enforce scalafmt and check that the produced diff is empty.
 ERRORS=$(./build/mvn \
     -Pscala-2.13 \
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index 4ebd3e4b951f5..415f468a11577 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -419,7 +419,7 @@ def choose_jira_assignee(issue):
                     annotations.append("Commentator")
                 print("[%d] %s (%s)" % (idx, author.displayName, ",".join(annotations)))
             raw_assignee = bold_input(
-                "Enter number of user, or userid, to assign to (blank to leave unassigned):"
+                "Enter number of user, or userid, to assign to (blank to leave unassigned): "
             )
             if raw_assignee == "":
                 return None
diff --git a/dev/package-lock.json b/dev/package-lock.json
index f676b9cec0762..e6ec1406a7620 100644
--- a/dev/package-lock.json
+++ b/dev/package-lock.json
@@ -4,6 +4,7 @@
   "requires": true,
   "packages": {
     "": {
+      "name": "dev",
       "devDependencies": {
         "ansi-regex": "^5.0.1",
         "eslint": "^7.25.0",
@@ -316,10 +317,11 @@
       "dev": true
     },
     "node_modules/cross-spawn": {
-      "version": "7.0.3",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
       "dev": true,
+      "license": "MIT",
       "dependencies": {
         "path-key": "^3.1.0",
         "shebang-command": "^2.0.0",
@@ -1469,9 +1471,9 @@
       "dev": true
     },
     "cross-spawn": {
-      "version": "7.0.3",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
       "dev": true,
       "requires": {
         "path-key": "^3.1.0",
diff --git a/dev/pyproject.toml b/dev/pyproject.toml
index f19107b3782a6..8b91943009555 100644
--- a/dev/pyproject.toml
+++ b/dev/pyproject.toml
@@ -27,7 +27,7 @@ testpaths = [
 [tool.black]
 # When changing the version, we have to update
 # GitHub workflow version and dev/reformat-python
-required-version = "23.9.1"
+required-version = "23.12.1"
 line-length = 100
 target-version = ['py39']
 include = '\.pyi?$'
diff --git a/dev/reformat-python b/dev/reformat-python
index 46b7efc931aae..9a1199faa938e 100755
--- a/dev/reformat-python
+++ b/dev/reformat-python
@@ -22,7 +22,7 @@ FWDIR="$( cd "$DIR"/.. && pwd )"
 cd "$FWDIR"
 
 BLACK_BUILD="${PYTHON_EXECUTABLE} -m black"
-BLACK_VERSION="23.9.1"
+BLACK_VERSION="23.12.1"
 $PYTHON_EXECUTABLE -c 'import black' 2> /dev/null
 if [ $? -ne 0 ]; then
     echo "The Python library providing the 'black' module was not found. Please install Black, for example, via 'pip install black==$BLACK_VERSION'."
diff --git a/dev/requirements.txt b/dev/requirements.txt
index a9874f77113ab..36548c2eae408 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -1,11 +1,11 @@
 # PySpark dependencies (required)
-py4j>=0.10.9.7
+py4j>=0.10.9.9
 
 # PySpark dependencies (optional)
 numpy>=1.21
-pyarrow>=10.0.0
+pyarrow>=11.0.0
 six==1.16.0
-pandas>=2.0.0
+pandas>=2.2.0
 scipy
 plotly>=4.8
 mlflow>=2.3.1
@@ -54,14 +54,14 @@ jira>=3.5.2
 PyGithub
 
 # pandas API on Spark Code formatter.
-black==23.9.1
+black==23.12.1
 py
 
 # Spark Connect (required)
 grpcio>=1.67.0
 grpcio-status>=1.67.0
 googleapis-common-protos>=1.65.0
-protobuf==5.28.3
+protobuf==5.29.1
 
 # Spark Connect python proto generation plugin (optional)
 mypy-protobuf==3.3.0
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index 91399ff1e25ea..f8a547b0c917c 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -87,10 +87,6 @@ for python in "${PYTHON_EXECS[@]}"; do
     VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
     rm -rf "$VIRTUALENV_PATH"
     if [ -n "$USE_CONDA" ]; then
-      if [ -f "$CONDA_PREFIX/etc/profile.d/conda.sh" ]; then
-        # See also https://github.com/conda/conda/issues/7980
-        source "$CONDA_PREFIX/etc/profile.d/conda.sh"
-      fi
       conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip setuptools
       source activate "$VIRTUALENV_PATH" || conda activate "$VIRTUALENV_PATH"
     else
diff --git a/dev/spark-test-image-util/docs/build-docs b/dev/spark-test-image-util/docs/build-docs
new file mode 100755
index 0000000000000..ca59769f24231
--- /dev/null
+++ b/dev/spark-test-image-util/docs/build-docs
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if ! [ -x "$(command -v docker)" ]; then
+  echo "Error: Docker is not installed." >&2
+  exit 1
+fi
+
+DOCKER_CACHE_IMG="ghcr.io/apache/spark/apache-spark-github-action-image-docs-cache:master"
+REPO_OWNER="apache/spark"
+REPOSITORY="apache-spark-ci-image-docs"
+IMG_TAG=$(date +%s)
+IMG_NAME="${REPOSITORY}:${IMG_TAG}"
+IMG_URL="$REPO_OWNER/$IMG_NAME"
+DOCKER_MOUNT_SPARK_HOME="/__w/spark/spark"
+BUILD_DOCS_SCRIPT_PATH="${DOCKER_MOUNT_SPARK_HOME}/dev/spark-test-image-util/docs/run-in-container"
+
+FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
+SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/../../..; pwd)"
+
+# 1.Compile spark outside the container to prepare for generating documents inside the container.
+build/sbt -Phive -Pkinesis-asl clean unidoc package
+
+# 2.Build container image.
+docker buildx build \
+  --cache-from type=registry,ref="${DOCKER_CACHE_IMG}" \
+  --tag "${IMG_URL}" "${FWDIR}" \
+  --file "${SPARK_HOME}/dev/spark-test-image/docs/Dockerfile"
+
+# 3.Build docs on container: `error docs`, `scala doc`, `python doc`, `sql doc`.
+docker run \
+  --mount type=bind,source="${SPARK_HOME}",target="${DOCKER_MOUNT_SPARK_HOME}" \
+  --interactive --tty "${IMG_URL}" \
+  /bin/bash -c "sh ${BUILD_DOCS_SCRIPT_PATH}"
+
+if [[ "$SKIP_RDOC" != "1" ]]; then
+  # 4.Build docs on host: `r doc`.
+  #
+  # Why does `r` document need to be compiled outside the container?
+  # Because when compiling inside the container, the permission of the directory
+  # `/__w/spark/spark/R/pkg/docs` automatically generated by `RScript` is `dr-xr--r-x`,
+  # and when writing to subsequent files, will throw an error as:
+  # `! [EACCES] Failed to copy '/usr/local/lib/R/site-library/pkgdown/BS5/assets/katex-auto.js'
+  # to '/__w/spark/spark/R/pkg/docs/katex-auto.js': permission denied`
+  export SKIP_ERRORDOC=1
+  export SKIP_SCALADOC=1
+  export SKIP_PYTHONDOC=1
+  export SKIP_SQLDOC=1
+  cd docs
+  bundle exec jekyll build
+fi
+
+# 5.Remove container image.
+IMG_ID=$(docker images | grep "${IMG_TAG}" | awk '{print $3}')
+docker image rm --force "${IMG_ID}"
+
+echo "Build doc done."
diff --git a/dev/spark-test-image-util/docs/run-in-container b/dev/spark-test-image-util/docs/run-in-container
new file mode 100644
index 0000000000000..1d43c602f7c72
--- /dev/null
+++ b/dev/spark-test-image-util/docs/run-in-container
@@ -0,0 +1,37 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# 1.Set env variable.
+export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-arm64
+export PATH=$JAVA_HOME/bin:$PATH
+export SPARK_DOCS_IS_BUILT_ON_HOST=1
+# We expect to compile the R document on the host.
+export SKIP_RDOC=1
+
+# 2.Install bundler.
+gem install bundler -v 2.4.22
+cd /__w/spark/spark/docs
+bundle install
+
+# 3.Build docs, includes: `error docs`, `scala doc`, `python doc`, `sql doc`, excludes: `r doc`.
+# We need this link to make sure `python3` points to `python3.9` which contains the prerequisite packages.
+ln -s "$(which python3.9)" "/usr/local/bin/python3"
+
+# Build docs first with SKIP_API to ensure they are buildable without requiring any
+# language docs to be built beforehand.
+cd /__w/spark/spark/docs
+bundle exec jekyll build
diff --git a/dev/spark-test-image/docs/Dockerfile b/dev/spark-test-image/docs/Dockerfile
index 2db7e0717cdfd..f1e33763df468 100644
--- a/dev/spark-test-image/docs/Dockerfile
+++ b/dev/spark-test-image/docs/Dockerfile
@@ -24,10 +24,10 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image for Documentat
 # Overwrite this label to avoid exposing the underlying Ubuntu OS version label
 LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE 20241029
+ENV FULL_REFRESH_DATE=20241029
 
-ENV DEBIAN_FRONTEND noninteractive
-ENV DEBCONF_NONINTERACTIVE_SEEN true
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
 
 RUN apt-get update && apt-get install -y \
     build-essential \
@@ -72,7 +72,7 @@ RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown', 'rmarkdown',
     Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
 
 # See more in SPARK-39735
-ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
+ENV R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
 
 # Install Python 3.9
 RUN add-apt-repository ppa:deadsnakes/ppa
@@ -85,7 +85,7 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
 # See 'docutils<0.18.0' in SPARK-39421
 RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
   ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
-  'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
-  'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
+  'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
+  'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
   'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' \
   && python3.9 -m pip cache purge
diff --git a/dev/spark-test-image/lint/Dockerfile b/dev/spark-test-image/lint/Dockerfile
index f9ea3124291b1..c3ffd7ba4e4b2 100644
--- a/dev/spark-test-image/lint/Dockerfile
+++ b/dev/spark-test-image/lint/Dockerfile
@@ -24,10 +24,10 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image for Linter"
 # Overwrite this label to avoid exposing the underlying Ubuntu OS version label
 LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE 20241112
+ENV FULL_REFRESH_DATE=20241112
 
-ENV DEBIAN_FRONTEND noninteractive
-ENV DEBCONF_NONINTERACTIVE_SEEN true
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
 
 RUN apt-get update && apt-get install -y \
     build-essential \
@@ -63,7 +63,7 @@ RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown', 'rmarkdown',
     && Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" \
 
 # See more in SPARK-39735
-ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
+ENV R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
 
 # Install Python 3.9
 RUN add-apt-repository ppa:deadsnakes/ppa
@@ -72,7 +72,7 @@ RUN apt-get update && apt-get install -y python3.9 python3.9-distutils \
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
 
 RUN python3.9 -m pip install \
-    'black==23.9.1' \
+    'black==23.12.1' \
     'flake8==3.9.0' \
     'googleapis-common-protos-stubs==2.2.0' \
     'grpc-stubs==1.24.11' \
diff --git a/dev/spark-test-image/pypy-310/Dockerfile b/dev/spark-test-image/pypy-310/Dockerfile
new file mode 100644
index 0000000000000..6a309d38f1d55
--- /dev/null
+++ b/dev/spark-test-image/pypy-310/Dockerfile
@@ -0,0 +1,71 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark with PyPy 3.10"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE=20241212
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libpng-dev \
+    libpython3-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libxml2-dev \
+    openjdk-17-jdk-headless \
+    pkg-config \
+    qpdf \
+    tzdata \
+    software-properties-common \
+    wget \
+    zlib1g-dev \
+    && apt-get autoremove --purge -y \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+
+RUN add-apt-repository ppa:pypy/ppa
+RUN mkdir -p /usr/local/pypy/pypy3.10 && \
+    curl -sqL https://downloads.python.org/pypy/pypy3.10-v7.3.17-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.10 --strip-components=1 && \
+    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3.10 && \
+    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
+RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.3' scipy coverage matplotlib lxml
diff --git a/dev/spark-test-image/python-309/Dockerfile b/dev/spark-test-image/python-309/Dockerfile
new file mode 100644
index 0000000000000..bfe23bf572add
--- /dev/null
+++ b/dev/spark-test-image/python-309/Dockerfile
@@ -0,0 +1,80 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark with Python 3.09"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE=20241205
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libpng-dev \
+    libpython3-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libxml2-dev \
+    openjdk-17-jdk-headless \
+    pkg-config \
+    qpdf \
+    tzdata \
+    software-properties-common \
+    wget \
+    zlib1g-dev
+
+# Install Python 3.9
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y \
+    python3.9 \
+    python3.9-distutils \
+    && apt-get autoremove --purge -y \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
+# Python deps for Spark Connect
+ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3"
+
+# Install Python 3.9 packages
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
+RUN python3.9 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
+RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
+    python3.9 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
+    python3.9 -m pip install torcheval && \
+    python3.9 -m pip cache purge
diff --git a/dev/spark-test-image/python-310/Dockerfile b/dev/spark-test-image/python-310/Dockerfile
new file mode 100644
index 0000000000000..b9875ba969f8d
--- /dev/null
+++ b/dev/spark-test-image/python-310/Dockerfile
@@ -0,0 +1,77 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark with Python 3.10"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE=20241205
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libpng-dev \
+    libpython3-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libxml2-dev \
+    openjdk-17-jdk-headless \
+    pkg-config \
+    python3.10 \
+    python3-psutil \
+    qpdf \
+    tzdata \
+    wget \
+    zlib1g-dev \
+    && apt-get autoremove --purge -y \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+
+ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
+# Python deps for Spark Connect
+ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3"
+
+# Install Python 3.10 packages
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
+RUN python3.10 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
+RUN python3.10 -m pip install --ignore-installed 'six==1.16.0'  # Avoid `python3-six` installation
+RUN python3.10 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
+    python3.10 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
+    python3.10 -m pip install deepspeed torcheval && \
+    python3.10 -m pip cache purge
diff --git a/dev/spark-test-image/python-311/Dockerfile b/dev/spark-test-image/python-311/Dockerfile
new file mode 100644
index 0000000000000..48f1fede03c05
--- /dev/null
+++ b/dev/spark-test-image/python-311/Dockerfile
@@ -0,0 +1,80 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark with Python 3.11"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE=20241212
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libpng-dev \
+    libpython3-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libxml2-dev \
+    openjdk-17-jdk-headless \
+    pkg-config \
+    qpdf \
+    tzdata \
+    software-properties-common \
+    wget \
+    zlib1g-dev
+
+# Install Python 3.11
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y \
+    python3.11 \
+    && apt-get autoremove --purge -y \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+
+ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
+# Python deps for Spark Connect
+ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3"
+
+# Install Python 3.11 packages
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
+RUN python3.11 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
+RUN python3.11 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
+    python3.11 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
+    python3.11 -m pip install deepspeed torcheval && \
+    python3.11 -m pip cache purge
diff --git a/dev/spark-test-image/python-312/Dockerfile b/dev/spark-test-image/python-312/Dockerfile
new file mode 100644
index 0000000000000..090c20742e652
--- /dev/null
+++ b/dev/spark-test-image/python-312/Dockerfile
@@ -0,0 +1,80 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark with Python 3.12"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE=20241206
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libpng-dev \
+    libpython3-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libxml2-dev \
+    openjdk-17-jdk-headless \
+    pkg-config \
+    qpdf \
+    tzdata \
+    software-properties-common \
+    wget \
+    zlib1g-dev
+
+# Install Python 3.12
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y \
+    python3.12 \
+    && apt-get autoremove --purge -y \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+
+ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
+# Python deps for Spark Connect
+ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3"
+
+# Install Python 3.12 packages
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
+RUN python3.12 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
+RUN python3.12 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS lxml && \
+    python3.12 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
+    python3.12 -m pip install torcheval && \
+    python3.12 -m pip cache purge
diff --git a/dev/spark-test-image/python-313/Dockerfile b/dev/spark-test-image/python-313/Dockerfile
new file mode 100644
index 0000000000000..473f3df8fdb7c
--- /dev/null
+++ b/dev/spark-test-image/python-313/Dockerfile
@@ -0,0 +1,79 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark with Python 3.13"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE=20241210
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libpng-dev \
+    libpython3-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libxml2-dev \
+    openjdk-17-jdk-headless \
+    pkg-config \
+    qpdf \
+    tzdata \
+    software-properties-common \
+    wget \
+    zlib1g-dev
+
+# Install Python 3.13
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y \
+    python3.13 \
+    && apt-get autoremove --purge -y \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+
+ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
+ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.29.1 googleapis-common-protos==1.65.0 graphviz==0.20.3"
+
+
+# Install Python 3.13 packages
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.13
+# TODO(SPARK-49862) Add BASIC_PIP_PKGS and CONNECT_PIP_PKGS to Python 3.13 image when it supports Python 3.13
+RUN python3.13 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
+RUN python3.13 -m pip install numpy>=2.1 pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy coverage matplotlib openpyxl grpcio==1.67.0 grpcio-status==1.67.0 lxml jinja2 && \
+    python3.13 -m pip cache purge
diff --git a/dev/spark-test-image/python-minimum/Dockerfile b/dev/spark-test-image/python-minimum/Dockerfile
new file mode 100644
index 0000000000000..82e2508ec6e32
--- /dev/null
+++ b/dev/spark-test-image/python-minimum/Dockerfile
@@ -0,0 +1,81 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark with old dependencies"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE=20241223
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libpng-dev \
+    libpython3-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libxml2-dev \
+    openjdk-17-jdk-headless \
+    pkg-config \
+    qpdf \
+    tzdata \
+    software-properties-common \
+    wget \
+    zlib1g-dev
+
+
+# Should keep the installation consistent with https://apache.github.io/spark/api/python/getting_started/install.html
+
+# Install Python 3.9
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y \
+    python3.9 \
+    python3.9-distutils \
+    && apt-get autoremove --purge -y \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+
+ARG BASIC_PIP_PKGS="numpy==1.21 pyarrow==11.0.0 pandas==2.0.0 six==1.16.0 scipy scikit-learn coverage unittest-xml-reporting"
+# Python deps for Spark Connect
+ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 googleapis-common-protos==1.65.0 graphviz==0.20 protobuf"
+
+# Install Python 3.9 packages
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
+RUN python3.9 -m pip install --force $BASIC_PIP_PKGS $CONNECT_PIP_PKGS && \
+    python3.9 -m pip cache purge
diff --git a/dev/spark-test-image/python-ps-minimum/Dockerfile b/dev/spark-test-image/python-ps-minimum/Dockerfile
new file mode 100644
index 0000000000000..913da06c551ca
--- /dev/null
+++ b/dev/spark-test-image/python-ps-minimum/Dockerfile
@@ -0,0 +1,81 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For Pandas API on Spark with old dependencies"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE=20250102
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libpng-dev \
+    libpython3-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libxml2-dev \
+    openjdk-17-jdk-headless \
+    pkg-config \
+    qpdf \
+    tzdata \
+    software-properties-common \
+    wget \
+    zlib1g-dev
+
+
+# Should keep the installation consistent with https://apache.github.io/spark/api/python/getting_started/install.html
+
+# Install Python 3.9
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y \
+    python3.9 \
+    python3.9-distutils \
+    && apt-get autoremove --purge -y \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+
+ARG BASIC_PIP_PKGS="pyarrow==11.0.0 pandas==2.2.0 six==1.16.0 numpy scipy coverage unittest-xml-reporting"
+# Python deps for Spark Connect
+ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 googleapis-common-protos==1.65.0 graphviz==0.20 protobuf"
+
+# Install Python 3.9 packages
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
+RUN python3.9 -m pip install --force $BASIC_PIP_PKGS $CONNECT_PIP_PKGS && \
+    python3.9 -m pip cache purge
diff --git a/dev/spark-test-image/sparkr/Dockerfile b/dev/spark-test-image/sparkr/Dockerfile
index 43260c714a550..3312c0852bd77 100644
--- a/dev/spark-test-image/sparkr/Dockerfile
+++ b/dev/spark-test-image/sparkr/Dockerfile
@@ -24,10 +24,10 @@ LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image for SparkR"
 # Overwrite this label to avoid exposing the underlying Ubuntu OS version label
 LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE 20241114
+ENV FULL_REFRESH_DATE=20241114
 
-ENV DEBIAN_FRONTEND noninteractive
-ENV DEBCONF_NONINTERACTIVE_SEEN true
+ENV DEBIAN_FRONTEND=noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN=true
 
 RUN apt-get update && apt-get install -y \
     build-essential \
@@ -74,4 +74,4 @@ RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown',  \
     Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
 
 # See more in SPARK-39735
-ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
+ENV R_LIBS_SITE="/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index b8702113a26c7..f785a72e6a1fe 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -309,6 +309,15 @@ def __hash__(self):
     ],
 )
 
+profiler = Module(
+    name="profiler",
+    dependencies=[],
+    build_profile_flags=["-Pjvm-profiler"],
+    source_file_regexes=[
+        "connector/profiler",
+    ],
+)
+
 protobuf = Module(
     name="protobuf",
     dependencies=[sql],
@@ -502,10 +511,6 @@ def __hash__(self):
         "pyspark.sql.observation",
         "pyspark.sql.tvf",
         # unittests
-        "pyspark.sql.tests.test_arrow",
-        "pyspark.sql.tests.test_arrow_cogrouped_map",
-        "pyspark.sql.tests.test_arrow_grouped_map",
-        "pyspark.sql.tests.test_arrow_python_udf",
         "pyspark.sql.tests.test_catalog",
         "pyspark.sql.tests.test_column",
         "pyspark.sql.tests.test_conf",
@@ -522,20 +527,24 @@ def __hash__(self):
         "pyspark.sql.tests.test_functions",
         "pyspark.sql.tests.test_group",
         "pyspark.sql.tests.test_sql",
+        "pyspark.sql.tests.arrow.test_arrow",
+        "pyspark.sql.tests.arrow.test_arrow_map",
+        "pyspark.sql.tests.arrow.test_arrow_cogrouped_map",
+        "pyspark.sql.tests.arrow.test_arrow_grouped_map",
+        "pyspark.sql.tests.arrow.test_arrow_python_udf",
         "pyspark.sql.tests.pandas.test_pandas_cogrouped_map",
         "pyspark.sql.tests.pandas.test_pandas_grouped_map",
         "pyspark.sql.tests.pandas.test_pandas_grouped_map_with_state",
         "pyspark.sql.tests.pandas.test_pandas_map",
         "pyspark.sql.tests.pandas.test_pandas_transform_with_state",
-        "pyspark.sql.tests.test_arrow_map",
         "pyspark.sql.tests.pandas.test_pandas_udf",
         "pyspark.sql.tests.pandas.test_pandas_udf_grouped_agg",
         "pyspark.sql.tests.pandas.test_pandas_udf_scalar",
         "pyspark.sql.tests.pandas.test_pandas_udf_typehints",
         "pyspark.sql.tests.pandas.test_pandas_udf_typehints_with_future_annotations",
         "pyspark.sql.tests.pandas.test_pandas_udf_window",
+        "pyspark.sql.tests.pandas.test_pandas_sqlmetrics",
         "pyspark.sql.tests.pandas.test_converter",
-        "pyspark.sql.tests.test_pandas_sqlmetrics",
         "pyspark.sql.tests.test_python_datasource",
         "pyspark.sql.tests.test_python_streaming_datasource",
         "pyspark.sql.tests.test_readwriter",
@@ -686,6 +695,7 @@ def __hash__(self):
         "pyspark.ml.tests.connect.test_legacy_mode_classification",
         "pyspark.ml.tests.connect.test_legacy_mode_pipeline",
         "pyspark.ml.tests.connect.test_legacy_mode_tuning",
+        "pyspark.ml.tests.test_classification",
     ],
     excluded_python_implementations=[
         "PyPy"  # Skip these tests under PyPy since they require numpy and it isn't available there
@@ -1029,8 +1039,6 @@ def __hash__(self):
         "pyspark.sql.tests.connect.test_connect_readwriter",
         "pyspark.sql.tests.connect.test_connect_session",
         "pyspark.sql.tests.connect.test_connect_stat",
-        "pyspark.sql.tests.connect.test_parity_arrow",
-        "pyspark.sql.tests.connect.test_parity_arrow_python_udf",
         "pyspark.sql.tests.connect.test_parity_datasources",
         "pyspark.sql.tests.connect.test_parity_errors",
         "pyspark.sql.tests.connect.test_parity_catalog",
@@ -1054,13 +1062,6 @@ def __hash__(self):
         "pyspark.sql.tests.connect.test_parity_memory_profiler",
         "pyspark.sql.tests.connect.test_parity_udtf",
         "pyspark.sql.tests.connect.test_parity_tvf",
-        "pyspark.sql.tests.connect.test_parity_pandas_udf",
-        "pyspark.sql.tests.connect.test_parity_pandas_map",
-        "pyspark.sql.tests.connect.test_parity_arrow_map",
-        "pyspark.sql.tests.connect.test_parity_pandas_grouped_map",
-        "pyspark.sql.tests.connect.test_parity_pandas_cogrouped_map",
-        "pyspark.sql.tests.connect.test_parity_arrow_grouped_map",
-        "pyspark.sql.tests.connect.test_parity_arrow_cogrouped_map",
         "pyspark.sql.tests.connect.test_parity_python_datasource",
         "pyspark.sql.tests.connect.test_parity_python_streaming_datasource",
         "pyspark.sql.tests.connect.test_parity_frame_plot",
@@ -1074,13 +1075,22 @@ def __hash__(self):
         "pyspark.sql.tests.connect.streaming.test_parity_listener",
         "pyspark.sql.tests.connect.streaming.test_parity_foreach",
         "pyspark.sql.tests.connect.streaming.test_parity_foreach_batch",
-        "pyspark.sql.tests.connect.test_parity_pandas_grouped_map_with_state",
-        "pyspark.sql.tests.connect.test_parity_pandas_udf_scalar",
-        "pyspark.sql.tests.connect.test_parity_pandas_udf_grouped_agg",
-        "pyspark.sql.tests.connect.test_parity_pandas_udf_window",
         "pyspark.sql.tests.connect.test_resources",
         "pyspark.sql.tests.connect.shell.test_progress",
         "pyspark.sql.tests.connect.test_df_debug",
+        "pyspark.sql.tests.connect.arrow.test_parity_arrow",
+        "pyspark.sql.tests.connect.arrow.test_parity_arrow_map",
+        "pyspark.sql.tests.connect.arrow.test_parity_arrow_grouped_map",
+        "pyspark.sql.tests.connect.arrow.test_parity_arrow_cogrouped_map",
+        "pyspark.sql.tests.connect.arrow.test_parity_arrow_python_udf",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_map",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_grouped_map",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_grouped_map_with_state",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_cogrouped_map",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_udf",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_udf_scalar",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_udf_grouped_agg",
+        "pyspark.sql.tests.connect.pandas.test_parity_pandas_udf_window",
     ],
     excluded_python_implementations=[
         "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
@@ -1106,6 +1116,7 @@ def __hash__(self):
         "pyspark.ml.tests.connect.test_connect_classification",
         "pyspark.ml.tests.connect.test_connect_pipeline",
         "pyspark.ml.tests.connect.test_connect_tuning",
+        "pyspark.ml.tests.connect.test_connect_spark_ml_classification",
     ],
     excluded_python_implementations=[
         "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
@@ -1438,7 +1449,7 @@ def __hash__(self):
     ],
 )
 
-pyspark_logging = Module(
+pyspark_logger = Module(
     name="pyspark-logger",
     dependencies=[],
     source_file_regexes=["python/pyspark/logger"],
diff --git a/docs/Gemfile b/docs/Gemfile
index 8177425cfb681..68727dee9e1fb 100644
--- a/docs/Gemfile
+++ b/docs/Gemfile
@@ -24,9 +24,7 @@ source "https://rubygems.org"
 
 gem "jekyll", "~> 4.3"
 gem "jekyll-redirect-from", "~> 0.16"
-# Rouge 4.0 drops support for Ruby < 2.7, which is EOL.
-# See: https://github.com/rouge-ruby/rouge/blob/61bdda18f204a661413daa93d9624bc65ad219a5/CHANGELOG.md#version-400-2022-09-04
-gem "rouge", "~> 3.26"
 # This resolves a build issue on Apple Silicon.
 # See: https://issues.apache.org/jira/browse/SPARK-38488
 gem "ffi", "~> 1.15"
+gem "rexml", "~> 3.3.9"
diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock
index e137f0f039b97..7709f07a1ceea 100644
--- a/docs/Gemfile.lock
+++ b/docs/Gemfile.lock
@@ -53,8 +53,8 @@ GEM
     rb-fsevent (0.11.2)
     rb-inotify (0.10.1)
       ffi (~> 1.0)
-    rexml (3.2.6)
-    rouge (3.30.0)
+    rexml (3.3.9)
+    rouge (4.5.1)
     safe_yaml (1.0.5)
     sass-embedded (1.63.6)
       google-protobuf (~> 3.23)
@@ -71,7 +71,7 @@ DEPENDENCIES
   ffi (~> 1.15)
   jekyll (~> 4.3)
   jekyll-redirect-from (~> 0.16)
-  rouge (~> 3.26)
+  rexml (~> 3.3.9)
 
 BUNDLED WITH
    2.4.22
diff --git a/docs/README.md b/docs/README.md
index 363f1c2076363..1235efe91812b 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -126,3 +126,16 @@ To control what API docs get built, you can set any combination of the following
 * `SKIP_RDOC=1`: Skip the R API docs.
 * `SKIP_SQLDOC=1`: Skip the SQL API docs.
 
+## Build docs with docker image (Optional)
+
+As a Spark developer, you can generate all documents locally as follows:
+
+Note: Before running it, you need to have `docker` installed.
+
+```sh
+$ dev/spark-test-image-util/docs/build-docs
+```
+
+It will generate all documents on the `container` and `host`.
+Especially when there are conflicts between the libraries required by Python development environment
+and the libraries required by generating Python docs environment, this is a good choice.
diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index 5fc1f3bcf9b5a..b1688aec57f01 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -93,6 +93,8 @@
       url: sql-ref-functions.html
     - text: Identifiers
       url: sql-ref-identifier.html
+    - text: IDENTIFIER clause
+      url: sql-ref-identifier-clause.html
     - text: Literals
       url: sql-ref-literals.html
     - text: Null Semantics
diff --git a/docs/_plugins/build_api_docs.rb b/docs/_plugins/build_api_docs.rb
index 79aad9695a3c7..e2ddcca6cdde5 100644
--- a/docs/_plugins/build_api_docs.rb
+++ b/docs/_plugins/build_api_docs.rb
@@ -34,6 +34,11 @@ def print_header(text)
 end
 
 def build_spark_if_necessary
+  # If spark has already been compiled on the host, skip here.
+  if ENV['SPARK_DOCS_IS_BUILT_ON_HOST'] == '1'
+    return
+  end
+
   if $spark_package_is_built
     return
   end
@@ -116,6 +121,16 @@ def copy_and_update_java_docs(source, dest, scala_source)
   File.open(css_file, 'a') { |f| f.write("\n" + css.join()) }
 end
 
+def build_spark_scala_and_java_docs_if_necessary
+  # If spark's docs has already been compiled on the host, skip here.
+  if ENV['SPARK_DOCS_IS_BUILT_ON_HOST'] == '1'
+    return
+  end
+
+  command = "build/sbt -Pkinesis-asl unidoc"
+  puts "Running '#{command}'..."
+  system(command) || raise("Unidoc generation failed")
+end
 
 def build_scala_and_java_docs
   build_spark_if_necessary
@@ -123,9 +138,7 @@ def build_scala_and_java_docs
   print_header "Building Scala and Java API docs."
   cd(SPARK_PROJECT_ROOT)
 
-  command = "build/sbt -Pkinesis-asl unidoc"
-  puts "Running '#{command}'..."
-  system(command) || raise("Unidoc generation failed")
+  build_spark_scala_and_java_docs_if_necessary
 
   puts "Moving back into docs dir."
   cd("docs")
diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb
index 7d0e78738095e..6fd14ce31a68c 100644
--- a/docs/_plugins/include_example.rb
+++ b/docs/_plugins/include_example.rb
@@ -114,8 +114,8 @@ def select_lines(code)
         range = Range.new(start + 1, endline - 1)
         trimmed = trim_codeblock(lines[range])
         # Filter out possible example tags of overlapped labels.
-        taggs_filtered = trimmed.select { |l| !l.include? '$example ' }
-        result += taggs_filtered.join
+        tags_filtered = trimmed.select { |l| !l.include? '$example ' }
+        result += tags_filtered.join
         result += "\n"
       end
       result
diff --git a/docs/app-dev-spark-connect.md b/docs/app-dev-spark-connect.md
new file mode 100644
index 0000000000000..218edd331aa94
--- /dev/null
+++ b/docs/app-dev-spark-connect.md
@@ -0,0 +1,243 @@
+---
+layout: global
+title: Application Development with Spark Connect
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+**Spark Connect Overview**
+
+In Apache Spark 3.4, Spark Connect introduced a decoupled client-server
+architecture that allows remote connectivity to Spark clusters using the
+DataFrame API and unresolved logical plans as the protocol. The separation
+between client and server allows Spark and its open ecosystem to be
+leveraged from everywhere. It can be embedded in modern data applications,
+in IDEs, Notebooks and programming languages.
+
+To learn more about Spark Connect, see [Spark Connect Overview](spark-connect-overview.html).
+
+# Redefining Spark Applications using Spark Connect
+
+With its decoupled client-server architecture, Spark Connect simplifies how Spark Applications are
+developed.
+The notion of Spark Client Applications and Spark Server Libraries are introduced as follows: 
+* _Spark Client Applications_ are regular Spark applications that use Spark and its rich ecosystem for
+distributed data processing. Examples include ETL pipelines, data preparation, and model training
+and inference.
+* _Spark Server Libraries_ build on, extend, and complement Spark's functionality, e.g.
+[MLlib](ml-guide.html) (distributed ML libraries that use Spark's powerful distributed processing). Spark Connect
+can be extended to expose client-side interfaces for Spark Server Libraries.
+
+With Spark 3.4 and Spark Connect, the development of Spark Client Applications is simplified, and
+clear extension points and guidelines are provided on how to build Spark Server Libraries, making
+it easy for both types of applications to evolve alongside Spark. As illustrated in Fig.1, Spark
+Client applications connect to Spark using the Spark Connect API, which is essentially the
+DataFrame API and fully declarative.
+
+<p style="text-align: center;">
+  <img src="img/extending-spark-connect.png" title="Figure 1: Architecture" alt="Extending Spark
+Connect Diagram"/>
+</p>
+Spark Server Libraries extend Spark. They typically provide additional server-side logic integrated
+with Spark, which is exposed to client applications as part of the Spark Connect API, using Spark
+Connect extension points. For example, the _Spark Server Library_ consists of custom
+service-side logic (as indicated by the blue box labeled _Custom Library Plugin_), which is exposed
+to the client via the blue box as part of the Spark Connect API. The client uses this API, e.g.,
+alongside PySpark or the Spark Scala client, making it easy for Spark client applications to work
+with the custom logic/library. 
+
+## Spark Client Applications
+
+Spark Client Applications are the _regular Spark applications_ that Spark users develop today, e.g.,
+ETL pipelines, data preparation, or model training or inference. These are typically built using
+Sparks declarative DataFrame and DataSet APIs. With Spark Connect, the core behaviour remains the
+same, but there are a few differences: 
+* Lower-level, non-declarative APIs (RDDs) can no longer be directly used from Spark Client
+applications. Alternatives for missing RDD functionality are provided as part of the higher-level
+DataFrame API.
+* Client applications no longer have direct access to the Spark driver JVM; they are fully
+separated from the server. 
+
+Client applications based on Spark Connect can be submitted in the same way as any previous job.
+In addition, Spark Client Applications based on Spark Connect have several benefits compared to
+classic Spark applications using earlier Spark versions (3.4 and below): 
+* _Upgradability_: Upgrading to new Spark Server versions is seamless, as the Spark Connect API
+abstracts any changes/improvements on the server side. Client- and server APIs are cleanly
+separated.
+* _Simplicity_: The number of APIs exposed to the user is reduced from 3 to 2. The Spark Connect API
+is fully declarative and consequently easy to learn for new users familiar with SQL.
+* _Stability_: When using Spark Connect, the client applications no longer run on the Spark driver
+and, therefore don’t cause and are not affected by any instability on the server.
+* _Remote connectivity_: The decoupled architecture allows remote connectivity to Spark beyond SQL
+and JDBC: any application can now interactively use Spark “as a service”.
+* _Backwards compatibility_: The Spark Connect API is code-compatible with earlier Spark versions,
+except for the usage of RDDs, for which a list of alternative APIs is provided in Spark Connect. 
+
+## Spark Server Libraries
+
+Until Spark 3.4, extensions to Spark (e.g., [Spark ML](ml-guide#:~:text=What%20is%20%E2%80%9CSpark%20ML%E2%80%9D%3F,to%20emphasize%20the%20pipeline%20concept.)
+or [Spark-NLP](https://github.com/JohnSnowLabs/spark-nlp)) were built and deployed like Spark
+Client Applications. With Spark 3.4 and Spark Connect,  explicit extension points are offered to
+extend Spark via Spark Server Libraries. These extension points provide functionality that can be
+exposed to a client, which differs from existing extension points in Spark such as
+[SparkSession extensions](api/java/org/apache/spark/sql/SparkSessionExtensions.html) or
+[Spark Plugins](api/java/org/apache/spark/api/plugin/SparkPlugin.html). 
+
+### Getting Started: Extending Spark with Spark Server Libraries
+
+Spark Connect is available and supports PySpark and Scala
+applications. We will walk through how to run an Apache Spark server with Spark
+Connect and connect to it from a client application using the Spark Connect client
+library.
+
+A Spark Server Library consists of the following components, illustrated in Fig. 2:
+
+1. The Spark Connect protocol extension (blue box _Proto_ API)
+2. A Spark Connect Plugin.
+3. The application logic that extends Spark.
+4. The client package that exposes the Spark Server Library application logic to the Spark Client
+Application, alongside PySpark or the Scala Spark Client. 
+<p style="text-align: center;">
+  <img src="img/extending-spark-connect-labelled.png" title="Figure 2: Labelled Architecture" alt="Extending Spark
+Connect Diagram - Labelled Steps"/>
+</p> 
+
+#### (1) Spark Connect Protocol Extension
+
+To extend Spark with a new Spark Server Library, developers can extend the three main operation
+types in the Spark Connect protocol: _Relation_, _Expression_, and _Command_. 
+
+{% highlight protobuf %}
+message Relation {
+  oneof rel_type {
+    Read read = 1;
+    // ...
+    google.protobuf.Any extension = 998;
+  } 
+}
+
+message Expression {
+  oneof expr_type {
+    Literal literal = 1;
+    // ...
+    google.protobuf.Any extension = 999;
+  } 
+}
+
+message Command {
+  oneof command_type {
+    WriteCommand write_command = 1;
+    // ...
+    google.protobuf.Any extension = 999;
+  } 
+} 
+{% endhighlight %}
+Their extension fields allow serializing arbitrary protobuf messages as part of the Spark Connect
+protocol. These messages represent the parameters or state of the extension implementation.
+To build a custom expression type, the developer first defines the custom protobuf definition
+of the expression.
+
+{% highlight protobuf %}
+message ExamplePluginExpression {
+  Expression child = 1;
+  string custom_field = 2;
+}
+{% endhighlight %}
+
+#### (2) Spark Connect Plugin implementation with (3) custom application logic
+
+As a next step, the developer implements the _ExpressionPlugin_ class of Spark Connect with custom
+application logic based on the input parameters of the protobuf message. 
+{% highlight protobuf %}
+class ExampleExpressionPlugin extends ExpressionPlugin {
+  override def transform(
+      relation: protobuf.Any,
+      planner: SparkConnectPlanner): Option[Expression] = {
+    // Check if the serialized value of protobuf.Any matches the type
+    // of our example expression.
+    if (!relation.is(classOf[proto.ExamplePluginExpression])) {
+      return None
+    }
+    val exp = relation.unpack(classOf[proto.ExamplePluginExpression])
+    Some(Alias(planner.transformExpression(
+        exp.getChild), exp.getCustomField)(explicitMetadata = None))
+  }
+}
+{% endhighlight %} 
+
+Once the application logic is developed, the code must be packaged as a jar and Spark must be
+configured to pick up the additional logic. The relevant Spark configuration options are: 
+* _spark.jars_ which define the location of the Jar file containing the application logic built for
+the custom expression. 
+* _spark.connect.extensions.expression.classes_ specifying the full class name
+of each expression extension loaded by Spark. Based on these configuration options, Spark will
+load the values at startup and make them available for processing.
+
+#### (4) Spark Server Library Client Package
+
+Once the server component is deployed, any client can use it with the right protobuf messages.
+In the example above, the following message payload sent to the Spark Connect endpoint would be
+enough to trigger the extension mechanism.
+{% highlight json %}
+{
+  "project": {
+    "input": {
+      "sql": {
+        "query": "select * from samples.nyctaxi.trips"
+      }
+    },
+    "expressions": [
+      {
+        "extension": {
+          "typeUrl": "type.googleapis.com/spark.connect.ExamplePluginExpression",
+          "value": "\n\006\022\004\n\002id\022\006testval"
+        }
+      }
+    ]
+  }
+} 
+{% endhighlight %} 
+To make the example available in Python, the application developer provides a Python library that
+wraps the new expression and embeds it into PySpark. The easiest way to provide a function for any
+expression is to take a PySpark column instance as an argument and return a new Column instance
+with the expression applied. 
+
+{% highlight python %}
+from pyspark.sql.connect.column import Expression
+import pyspark.sql.connect.proto as proto
+
+from myxample.proto import ExamplePluginExpression
+
+# Internal class that satisfies the interface by the Python client
+# of Spark Connect to generate the protobuf representation from
+# an instance of the expression.
+class ExampleExpression(Expression):
+    def to_plan(self, session) -> proto.Expression:
+        fun = proto.Expression()
+        plugin = ExamplePluginExpression()
+        plugin.child.literal.long = 10
+        plugin.custom_field = "example"
+        fun.extension.Pack(plugin)
+        return fun
+
+# Defining the function to be used from the consumers.
+def example_expression(col: Column) -> Column:
+    return Column(ExampleExpression())
+
+
+# Using the expression in the Spark Connect client code.
+df = spark.read.table("samples.nyctaxi.trips")
+df.select(example_expression(df["fare_amount"])).collect()
+{% endhighlight %} 
\ No newline at end of file
diff --git a/docs/configuration.md b/docs/configuration.md
index e095ae7a61b22..162165ffe68dd 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -565,8 +565,7 @@ of the most common options to set are:
   <td>numExecutors * 2, with minimum of 3</td>
   <td>
     The maximum number of executor failures before failing the application.
-    This configuration only takes effect on YARN, or Kubernetes when 
-    <code>spark.kubernetes.allocation.pods.allocator</code> is set to 'direct'.
+    This configuration only takes effect on YARN and Kubernetes.
   </td>
   <td>3.5.0</td>
 </tr>
@@ -576,8 +575,7 @@ of the most common options to set are:
   <td>
     Interval after which executor failures will be considered independent and
     not accumulate towards the attempt count.
-    This configuration only takes effect on YARN, or Kubernetes when 
-    <code>spark.kubernetes.allocation.pods.allocator</code> is set to 'direct'.
+    This configuration only takes effect on YARN and Kubernetes.
   </td>
   <td>3.5.0</td>
 </tr>
@@ -2116,7 +2114,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.storage.replication.proactive</code></td>
-  <td>false</td>
+  <td>true</td>
   <td>
     Enables proactive block replication for RDD blocks. Cached RDD block replicas lost due to
     executor failures are replenished if there are any existing available replicas. This tries
@@ -2852,7 +2850,7 @@ Apart from these, the following properties are also available, and may be useful
     If set to "true", prevent Spark from scheduling tasks on executors that have been excluded
     due to too many task failures. The algorithm used to exclude executors and nodes can be further
     controlled by the other "spark.excludeOnFailure" configuration options.
-    This config will be overriden by "spark.excludeOnFailure.application.enabled" and 
+    This config will be overridden by "spark.excludeOnFailure.application.enabled" and
     "spark.excludeOnFailure.taskAndStage.enabled" to specify exclusion enablement on individual
     levels.
   </td>
@@ -3753,15 +3751,20 @@ Note: When running Spark on YARN in `cluster` mode, environment variables need t
 
 # Configuring Logging
 
-Spark uses [log4j](http://logging.apache.org/log4j/) for logging. You can configure it by adding a
-`log4j2.properties` file in the `conf` directory. One way to start is to copy the existing templates `log4j2.properties.template` or `log4j2.properties.pattern-layout-template` located there.
+Spark uses [log4j](http://logging.apache.org/log4j/) for logging. You can configure it by adding a `log4j2.properties` file in the `conf` directory. To get started, copy one of the provided templates: `log4j2.properties.template` (for plain text logging) or `log4j2-json-layout.properties.template` (for structured logging).
+
+## Plain Text Logging
+The default logging format is plain text, using Log4j's [Pattern Layout](https://logging.apache.org/log4j/2.x/manual/pattern-layout.html).
+
+MDC (Mapped Diagnostic Context) information is not included by default in plain text logs. To include it, update the `PatternLayout` configuration in the `log4j2.properties` file. For example, add `%X{task_name}` to include the task name in logs. Additionally, use `spark.sparkContext.setLocalProperty("key", "value")` to add custom data to the MDC.
 
 ## Structured Logging
-Starting from version 4.0.0, `spark-submit` has adopted the [JSON Template Layout](https://logging.apache.org/log4j/2.x/manual/json-template-layout.html) for logging, which outputs logs in JSON format. This format facilitates querying logs using Spark SQL with the JSON data source. Additionally, the logs include all Mapped Diagnostic Context (MDC) information for search and debugging purposes.
+Starting with version 4.0.0, `spark-submit` supports optional structured logging using the [JSON Template Layout](https://logging.apache.org/log4j/2.x/manual/json-template-layout.html). This format enables efficient querying of logs with Spark SQL using the JSON data source and includes all MDC information for improved searchability and debugging.
 
-To configure the layout of structured logging, start with the `log4j2.properties.template` file.
+To enable structured logging and include MDC information, set the configuration `spark.log.structuredLogging.enabled` to `true` (default is `false`). For additional customization, copy `log4j2-json-layout.properties.template` to `conf/log4j2.properties` and adjust as needed.
 
-To query Spark logs using Spark SQL, you can use the following code snippets:
+### Querying Structured Logs with Spark SQL
+To query structured logs in JSON format, use the following code snippet:
 
 **Python:**
 ```python
@@ -3777,14 +3780,6 @@ import org.apache.spark.util.LogUtils.SPARK_LOG_SCHEMA
 val logDf = spark.read.schema(SPARK_LOG_SCHEMA).json("path/to/logs")
 ```
 **Note**: If you're using the interactive shell (pyspark shell or spark-shell), you can omit the import statement in the code because SPARK_LOG_SCHEMA is already available in the shell's context.
-## Plain Text Logging
-If you prefer plain text logging, you have two options:
-- Disable structured JSON logging by setting the Spark configuration `spark.log.structuredLogging.enabled` to `false`.
-- Use a custom log4j configuration file. Rename `conf/log4j2.properties.pattern-layout-template` to `conf/log4j2.properties`. This reverts to the default configuration prior to Spark 4.0, which utilizes [PatternLayout](https://logging.apache.org/log4j/2.x/manual/layouts.html#PatternLayout) for logging all messages in plain text.
-
-MDC information is not included by default when with plain text logging. In order to print it in the logs, you can update the patternLayout in the file. For example, you can add `%X{task_name}` to print the task name in the logs.
-Moreover, you can use `spark.sparkContext.setLocalProperty(s"mdc.$name", "value")` to add user specific data into MDC.
-The key in MDC will be the string of `mdc.$name`.
 
 # Overriding configuration directory
 
diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index 88bad6c5d1b9f..9dcf4ad8a2984 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -44,16 +44,18 @@ license: |
 
 - Since Spark 4.0, Spark uses the external shuffle service for deleting shuffle blocks for deallocated executors when the shuffle is no longer needed. To restore the legacy behavior, you can set `spark.shuffle.service.removeShuffle` to `false`.
 
-- Starting with Spark 4.0, the default logging format for `spark-submit` has changed from plain text to JSON lines to improve log analysis. If you prefer plain text logs, you have two options:
-  - Set the Spark configuration `spark.log.structuredLogging.enabled` to `false`. For example, you can use `JDK_JAVA_OPTIONS=-Dspark.log.structuredLogging.enabled=false`.
-  - Use a custom log4j configuration file, such as renaming the template file `conf/log4j2.properties.pattern-layout-template` to `conf/log4j2.properties`.
-
 - Since Spark 4.0, the MDC (Mapped Diagnostic Context) key for Spark task names in Spark logs has been changed from `mdc.taskName` to `task_name`. To use the key `mdc.taskName`, you can set `spark.log.legacyTaskNameMdc.enabled` to `true`.
 
 - Since Spark 4.0, Spark performs speculative executions less aggressively with `spark.speculation.multiplier=3` and `spark.speculation.quantile=0.9`. To restore the legacy behavior, you can set `spark.speculation.multiplier=1.5` and `spark.speculation.quantile=0.75`.
 
 - Since Spark 4.0, `spark.shuffle.unsafe.file.output.buffer` is deprecated though still works. Use `spark.shuffle.localDisk.file.output.buffer` instead.
 
+- Since Spark 4.0, when reading files hits `org.apache.hadoop.security.AccessControlException` and `org.apache.hadoop.hdfs.BlockMissingException`, the exception will be thrown and fail the task, even if `spark.files.ignoreCorruptFiles` is set to `true`.
+
+## Upgrading from Core 3.5.3 to 3.5.4
+
+- Since Spark 3.5.4, when reading files hits `org.apache.hadoop.security.AccessControlException` and `org.apache.hadoop.hdfs.BlockMissingException`, the exception will be thrown and fail the task, even if `spark.files.ignoreCorruptFiles` is set to `true`.
+
 ## Upgrading from Core 3.4 to 3.5
 
 - Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead.
@@ -62,7 +64,7 @@ license: |
 
 ## Upgrading from Core 3.3 to 3.4
 
-- Since Spark 3.4, Spark driver will own `PersistentVolumnClaim`s and try to reuse if they are not assigned to live executors. To restore the behavior before Spark 3.4, you can set `spark.kubernetes.driver.ownPersistentVolumeClaim` to `false` and `spark.kubernetes.driver.reusePersistentVolumeClaim` to `false`.
+- Since Spark 3.4, Spark driver will own `PersistentVolumeClaim`s and try to reuse if they are not assigned to live executors. To restore the behavior before Spark 3.4, you can set `spark.kubernetes.driver.ownPersistentVolumeClaim` to `false` and `spark.kubernetes.driver.reusePersistentVolumeClaim` to `false`.
 
 - Since Spark 3.4, Spark driver will track shuffle data when dynamic allocation is enabled without shuffle service. To restore the behavior before Spark 3.4, you can set `spark.dynamicAllocation.shuffleTracking.enabled` to `false`.
 
diff --git a/docs/img/extending-spark-connect-labelled.png b/docs/img/extending-spark-connect-labelled.png
new file mode 100644
index 0000000000000..94b8cfdc024cb
Binary files /dev/null and b/docs/img/extending-spark-connect-labelled.png differ
diff --git a/docs/img/extending-spark-connect.png b/docs/img/extending-spark-connect.png
new file mode 100644
index 0000000000000..381d99bdda865
Binary files /dev/null and b/docs/img/extending-spark-connect.png differ
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index a1adcc2f6eb03..400f8a512e7a7 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -39,7 +39,7 @@ along with if you launch Spark's interactive shell -- either `bin/spark-shell` f
 
 <div data-lang="python"  markdown="1">
 
-Spark {{site.SPARK_VERSION}} works with Python 3.8+. It can use the standard CPython interpreter,
+Spark {{site.SPARK_VERSION}} works with Python 3.9+. It can use the standard CPython interpreter,
 so C libraries like NumPy can be used. It also works with PyPy 7.3.6+.
 
 Spark applications in Python can either be run with the `bin/spark-submit` script which includes Spark at runtime, or by including it in your setup.py as:
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index a0c73813612d0..c7f5d67a6cd85 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -394,7 +394,7 @@ spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.mount.
 spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.mount.readOnly=false
 ```
 
-To enable shuffle data recovery feature via the built-in `KubernetesLocalDiskShuffleDataIO` plugin, we need to have the followings. You may want to enable `spark.kubernetes.driver.waitToReusePersistentVolumeClaim` additionally.
+To enable shuffle data recovery feature via the built-in `KubernetesLocalDiskShuffleDataIO` plugin, we need to have the following. You may want to enable `spark.kubernetes.driver.waitToReusePersistentVolumeClaim` additionally.
 
 ```
 spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.mount.path=/data/spark-x/executor-x
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index d149f9196b345..465f3a9d075a2 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -124,15 +124,15 @@ all environment variables used for launching each container. This process is use
 classpath problems in particular. (Note that enabling this requires admin privileges on cluster
 settings and a restart of all node managers. Thus, this is not applicable to hosted clusters).
 
-To use a custom log4j configuration for the application master or executors, here are the options:
+To use a custom log4j2 configuration for the application master or executors, here are the options:
 
-- upload a custom `log4j.properties` using `spark-submit`, by adding it to the `--files` list of files
+- upload a custom `log4j2.properties` using `spark-submit`, by adding it to the `--files` list of files
   to be uploaded with the application.
-- add `-Dlog4j.configuration=<location of configuration file>` to `spark.driver.extraJavaOptions`
+- add `-Dlog4j.configurationFile=<location of configuration file>` to `spark.driver.extraJavaOptions`
   (for the driver) or `spark.executor.extraJavaOptions` (for executors). Note that if using a file,
   the `file:` protocol should be explicitly provided, and the file needs to exist locally on all
   the nodes.
-- update the `$SPARK_CONF_DIR/log4j.properties` file and it will be automatically uploaded along
+- update the `$SPARK_CONF_DIR/log4j2.properties` file and it will be automatically uploaded along
   with the other configurations. Note that other 2 options has higher priority than this option if
   multiple options are specified.
 
@@ -673,7 +673,7 @@ To use a custom metrics.properties for the application master and executors, upd
   <td>false</td>
   <td>
     Set to true for applications that have higher security requirements and prefer that their
-    secret is not saved in the db. The shuffle data of such applications wll not be recovered after
+    secret is not saved in the db. The shuffle data of such applications will not be recovered after
     the External Shuffle Service restarts.
   </td>
   <td>3.5.0</td>
@@ -853,7 +853,7 @@ will include a list of all tokens obtained, and their expiry details
 To start the Spark Shuffle Service on each `NodeManager` in your YARN cluster, follow these
 instructions:
 
-1. Build Spark with the [YARN profile](building-spark.html). Skip this step if you are using a
+1. Build Spark with the [YARN profile](building-spark.html#specifying-the-hadoop-version-and-enabling-yarn). Skip this step if you are using a
 pre-packaged distribution.
 1. Locate the `spark-<version>-yarn-shuffle.jar`. This should be under
 `$SPARK_HOME/common/network-yarn/target/scala-<version>` if you are building Spark yourself, and under
diff --git a/docs/security.md b/docs/security.md
index c7d3fd5f8c36f..81173d5f01ce7 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -72,7 +72,7 @@ secrets to be secure.
   <td>false</td>
   <td>
     Set to true for applications that have higher security requirements and prefer that their
-    secret is not saved in the db. The shuffle data of such applications wll not be recovered after
+    secret is not saved in the db. The shuffle data of such applications will not be recovered after
     the External Shuffle Service restarts.
   </td>
   <td>3.5.0</td>
diff --git a/docs/spark-connect-overview.md b/docs/spark-connect-overview.md
index 1cc409bfbc007..723bae9fd9be5 100644
--- a/docs/spark-connect-overview.md
+++ b/docs/spark-connect-overview.md
@@ -370,6 +370,8 @@ one may implement their own class extending `ClassFinder` for customized search
 </div>
 </div>
 
+For more information on application development with Spark Connect as well as extending Spark Connect
+with custom functionality, see [Application Development with Spark Connect](app-dev-spark-connect.html). 
 # Client application authentication
 
 While Spark Connect does not have built-in authentication, it is designed to
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 8bc7445d17c71..51f6ca977c991 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -59,10 +59,6 @@ Finally, the following configuration options can be passed to the master and wor
     <td><code>-h HOST</code>, <code>--host HOST</code></td>
     <td>Hostname to listen on</td>
   </tr>
-  <tr>
-    <td><code>-i HOST</code>, <code>--ip HOST</code></td>
-    <td>Hostname to listen on (deprecated, use -h or --host)</td>
-  </tr>
   <tr>
     <td><code>-p PORT</code>, <code>--port PORT</code></td>
     <td>Port for service to listen on (default: 7077 for master, random for worker)</td>
@@ -355,28 +351,28 @@ SPARK_MASTER_OPTS supports the following system properties:
   <td><code>spark.deploy.appNumberModulo</code></td>
   <td>(None)</td>
   <td>
-    The modulo for app number. By default, the next of `app-yyyyMMddHHmmss-9999` is
-    `app-yyyyMMddHHmmss-10000`. If we have 10000 as modulo, it will be `app-yyyyMMddHHmmss-0000`.
-    In most cases, the prefix `app-yyyyMMddHHmmss` is increased already during creating 10000 applications.
+    The modulo for app number. By default, the next of <code>app-yyyyMMddHHmmss-9999</code> is
+    <code>app-yyyyMMddHHmmss-10000</code>. If we have 10000 as modulo, it will be <code>app-yyyyMMddHHmmss-0000</code>.
+    In most cases, the prefix <code>app-yyyyMMddHHmmss</code> is increased already during creating 10000 applications.
   </td>
   <td>4.0.0</td>
 </tr>
 <tr>
   <td><code>spark.deploy.driverIdPattern</code></td>
-  <td>driver-%s-%04d</td>
+  <td><code>driver-%s-%04d</code></td>
   <td>
-    The pattern for driver ID generation based on Java `String.format` method.
-    The default value is `driver-%s-%04d` which represents the existing driver id string, e.g., `driver-20231031224459-0019`. Please be careful to generate unique IDs.
+    The pattern for driver ID generation based on Java <code>String.format</code> method.
+    The default value is <code>driver-%s-%04d</code> which represents the existing driver id string, e.g., <code>driver-20231031224459-0019</code>. Please be careful to generate unique IDs.
   </td>
   <td>4.0.0</td>
 </tr>
 <tr>
   <td><code>spark.deploy.appIdPattern</code></td>
-  <td>app-%s-%04d</td>
+  <td><code>app-%s-%04d</code></td>
   <td>
-    The pattern for app ID generation based on Java `String.format` method.
-    The default value is `app-%s-%04d` which represents the existing app id string, e.g.,
-    `app-20231031224509-0008`. Plesae be careful to generate unique IDs.
+    The pattern for app ID generation based on Java <code>String.format</code> method.
+    The default value is <code>app-%s-%04d</code> which represents the existing app id string, e.g.,
+    <code>app-20231031224509-0008</code>. Please be careful to generate unique IDs.
   </td>
   <td>4.0.0</td>
 </tr>
@@ -457,7 +453,7 @@ SPARK_WORKER_OPTS supports the following system properties:
   <td>
     Enable periodic cleanup of worker / application directories.  Note that this only affects standalone
     mode, as YARN works differently. Only the directories of stopped applications are cleaned up.
-    This should be enabled if spark.shuffle.service.db.enabled is "true"
+    This should be enabled if <code>spark.shuffle.service.db.enabled</code> is "true"
   </td>
   <td>1.0.0</td>
 </tr>
@@ -497,8 +493,8 @@ SPARK_WORKER_OPTS supports the following system properties:
   <td>ROCKSDB</td>
   <td>
     When <code>spark.shuffle.service.db.enabled</code> is true, user can use this to specify the kind of disk-based
-    store used in shuffle service state store. This supports `ROCKSDB` and `LEVELDB` (deprecated) now and `ROCKSDB` as default value.
-    The original data store in `RocksDB/LevelDB` will not be automatically convert to another kind of storage now.
+    store used in shuffle service state store. This supports <code>ROCKSDB</code> and <code>LEVELDB</code> (deprecated) now and <code>ROCKSDB</code> as default value.
+    The original data store in <code>RocksDB/LevelDB</code> will not be automatically convert to another kind of storage now.
   </td>
   <td>3.4.0</td>
 </tr>
@@ -508,8 +504,8 @@ SPARK_WORKER_OPTS supports the following system properties:
   <td>
     Enable cleanup non-shuffle files(such as temp. shuffle blocks, cached RDD/broadcast blocks,
     spill files, etc) of worker directories following executor exits. Note that this doesn't
-    overlap with `spark.worker.cleanup.enabled`, as this enables cleanup of non-shuffle files in
-    local directories of a dead executor, while `spark.worker.cleanup.enabled` enables cleanup of
+    overlap with <code>spark.worker.cleanup.enabled</code>, as this enables cleanup of non-shuffle files in
+    local directories of a dead executor, while <code>spark.worker.cleanup.enabled</code> enables cleanup of
     all files/subdirectories of a stopped and timeout application.
     This only affects Standalone mode, support of other cluster managers can be added in the future.
   </td>
@@ -527,11 +523,11 @@ SPARK_WORKER_OPTS supports the following system properties:
 </tr>
 <tr>
   <td><code>spark.worker.idPattern</code></td>
-  <td>worker-%s-%s-%d</td>
+  <td><code>worker-%s-%s-%d</code></td>
   <td>
-    The pattern for worker ID generation based on Java `String.format` method.
-    The default value is `worker-%s-%s-%d` which represents the existing worker id string, e.g.,
-    `worker-20231109183042-[fe80::1%lo0]-39729`. Please be careful to generate unique IDs
+    The pattern for worker ID generation based on Java <code>String.format</code> method.
+    The default value is <code>worker-%s-%s-%d</code> which represents the existing worker id string, e.g.,
+    <code>worker-20231109183042-[fe80::1%lo0]-39729</code>. Please be careful to generate unique IDs
   </td>
   <td>4.0.0</td>
 </tr>
@@ -615,37 +611,37 @@ via <code>http://[host:port]/[version]/submissions/[action]</code> where
 <code>action</code> is one of the following supported actions.
 
 <table class="spark-config">
-  <thead><tr><th>Command</th><th>Description</th><th>HTTP METHOD</th><th>Since Version</th></tr></thead>
+  <thead><tr><th>Command</th><th>HTTP METHOD</th><th>Description</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>create</code></td>
+    <td>POST</td>
     <td>Create a Spark driver via <code>cluster</code> mode. Since 4.0.0, Spark master supports server-side
       variable replacements for the values of Spark properties and environment variables.
     </td>
-    <td>POST</td>
     <td>1.3.0</td>
   </tr>
   <tr>
     <td><code>kill</code></td>
-    <td>Kill a single Spark driver.</td>
     <td>POST</td>
+    <td>Kill a single Spark driver.</td>
     <td>1.3.0</td>
   </tr>
   <tr>
     <td><code>killall</code></td>
-    <td>Kill all running Spark drivers.</td>
     <td>POST</td>
+    <td>Kill all running Spark drivers.</td>
     <td>4.0.0</td>
   </tr>
   <tr>
     <td><code>status</code></td>
-    <td>Check the status of a Spark job.</td>
     <td>GET</td>
+    <td>Check the status of a Spark job.</td>
     <td>1.3.0</td>
   </tr>
   <tr>
     <td><code>clear</code></td>
-    <td>Clear the completed drivers and applications.</td>
     <td>POST</td>
+    <td>Clear the completed drivers and applications.</td>
     <td>4.0.0</td>
   </tr>
 </table>
@@ -868,13 +864,13 @@ In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spa
   <tr>
     <td><code>spark.deploy.zookeeper.url</code></td>
     <td>None</td>
-    <td>When `spark.deploy.recoveryMode` is set to ZOOKEEPER, this configuration is used to set the zookeeper URL to connect to.</td>
+    <td>When <code>spark.deploy.recoveryMode</code> is set to ZOOKEEPER, this configuration is used to set the zookeeper URL to connect to.</td>
     <td>0.8.1</td>
   </tr>
   <tr>
     <td><code>spark.deploy.zookeeper.dir</code></td>
     <td>None</td>
-    <td>When `spark.deploy.recoveryMode` is set to ZOOKEEPER, this configuration is used to set the zookeeper directory to store recovery state.</td>
+    <td>When <code>spark.deploy.recoveryMode</code> is set to ZOOKEEPER, this configuration is used to set the zookeeper directory to store recovery state.</td>
     <td>0.8.1</td>
   </tr>
 </table>
diff --git a/docs/sql-data-sources-csv.md b/docs/sql-data-sources-csv.md
index 97a7065e0598f..8008bc562082c 100644
--- a/docs/sql-data-sources-csv.md
+++ b/docs/sql-data-sources-csv.md
@@ -60,6 +60,12 @@ Data source options of CSV can be set via:
     <td>Sets a separator for each field and value. This separator can be one or more characters.</td>
     <td>read/write</td>
   </tr>
+  <tr>
+    <td><code>extension</code></td>
+    <td>csv</td>
+    <td>Sets the file extension for the output files. Limited to letters. Length must equal 3.</td>
+    <td>write</td>
+  </tr>
   <tr>
     <td><code>encoding</code><br><code>charset</code></td>
     <td>UTF-8</td>
diff --git a/docs/sql-data-sources-xml.md b/docs/sql-data-sources-xml.md
index 6168f570a81a9..949e6239e52ba 100644
--- a/docs/sql-data-sources-xml.md
+++ b/docs/sql-data-sources-xml.md
@@ -19,7 +19,7 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL provides `spark.read().xml("file_1_path","file_2_path")` to read a file or directory of files in XML format into a Spark DataFrame, and `dataframe.write().xml("path")` to write to a xml file. When reading a XML file, the `rowTag` option must be specified to indicate the XML element that maps to a `DataFrame row`. The option() function can be used to customize the behavior of reading or writing, such as controlling behavior of the XML attributes, XSD validation, compression, and so on.
+Spark SQL provides `spark.read().xml("file_1_path","file_2_path")` to read a file or directory of files in XML format into a Spark DataFrame, and `dataframe.write().xml("path")` to write to a xml file. The `rowTag` option must be specified to indicate the XML element that maps to a `DataFrame row`. The option() function can be used to customize the behavior of reading or writing, such as controlling behavior of the XML attributes, XSD validation, compression, and so on.
 
 <div class="codetabs">
 
@@ -61,7 +61,7 @@ Data source options of XML can be set via:
         <code><xmp><books><book></book>...</books></xmp></code>
         the appropriate value would be book. This is a required option for both read and write.
     </td>
-    <td>read</td>
+    <td>read/write</td>
   </tr>
 
   <tr>
diff --git a/docs/sql-error-conditions-codec-not-available-error-class.md b/docs/sql-error-conditions-codec-not-available-error-class.md
deleted file mode 100644
index bb93f56206ba1..0000000000000
--- a/docs/sql-error-conditions-codec-not-available-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: CODEC_NOT_AVAILABLE error class
-displayTitle: CODEC_NOT_AVAILABLE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-SQLSTATE: 56038
-
-The codec `<codecName>` is not available.
-
-This error class has the following derived error classes:
-
-## WITH_AVAILABLE_CODECS_SUGGESTION
-
-Available codecs are `<availableCodecs>`.
-
-## WITH_CONF_SUGGESTION
-
-Consider to set the config `<configKey>` to `<configVal>`.
-
-
diff --git a/docs/sql-error-conditions-collation-mismatch-error-class.md b/docs/sql-error-conditions-collation-mismatch-error-class.md
deleted file mode 100644
index 79aaaf00ee47c..0000000000000
--- a/docs/sql-error-conditions-collation-mismatch-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: COLLATION_MISMATCH error class
-displayTitle: COLLATION_MISMATCH error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42P21](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Could not determine which collation to use for string functions and operators.
-
-This error class has the following derived error classes:
-
-## EXPLICIT
-
-Error occurred due to the mismatch between explicit collations: `<explicitTypes>`. Decide on a single explicit collation and remove others.
-
-## IMPLICIT
-
-Error occurred due to the mismatch between implicit collations: `<implicitTypes>`. Use COLLATE function to set the collation explicitly.
-
-
diff --git a/docs/sql-error-conditions-failed-read-file-error-class.md b/docs/sql-error-conditions-failed-read-file-error-class.md
deleted file mode 100644
index a4344666c59c6..0000000000000
--- a/docs/sql-error-conditions-failed-read-file-error-class.md
+++ /dev/null
@@ -1,52 +0,0 @@
----
-layout: global
-title: FAILED_READ_FILE error class
-displayTitle: FAILED_READ_FILE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-SQLSTATE: KD001
-
-Encountered error while reading file `<path>`.
-
-This error class has the following derived error classes:
-
-## CANNOT_READ_FILE_FOOTER
-
-Could not read footer. Please ensure that the file is in either ORC or Parquet format.
-If not, please convert it to a valid format. If the file is in the valid format, please check if it is corrupt.
-If it is, you can choose to either ignore it or fix the corruption.
-
-## FILE_NOT_EXIST
-
-File does not exist. It is possible the underlying files have been updated.
-You can explicitly invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in SQL or by recreating the Dataset/DataFrame involved.
-
-## NO_HINT
-
-
-
-## PARQUET_COLUMN_DATA_TYPE_MISMATCH
-
-Data type mismatches when reading Parquet column `<column>`. Expected Spark type `<expectedType>`, actual Parquet type `<actualType>`.
-
-
diff --git a/docs/sql-error-conditions-illegal-state-store-value-error-class.md b/docs/sql-error-conditions-illegal-state-store-value-error-class.md
deleted file mode 100644
index e6457e58b7b4d..0000000000000
--- a/docs/sql-error-conditions-illegal-state-store-value-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: ILLEGAL_STATE_STORE_VALUE error class
-displayTitle: ILLEGAL_STATE_STORE_VALUE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Illegal value provided to the State Store
-
-This error class has the following derived error classes:
-
-## EMPTY_LIST_VALUE
-
-Cannot write empty list values to State Store for StateName `<stateName>`.
-
-## NULL_VALUE
-
-Cannot write null values to State Store for StateName `<stateName>`.
-
-
diff --git a/docs/sql-error-conditions-invalid-aggregate-filter-error-class.md b/docs/sql-error-conditions-invalid-aggregate-filter-error-class.md
deleted file mode 100644
index 8a3441ca133d4..0000000000000
--- a/docs/sql-error-conditions-invalid-aggregate-filter-error-class.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-layout: global
-title: INVALID_AGGREGATE_FILTER error class
-displayTitle: INVALID_AGGREGATE_FILTER error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42903](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The FILTER expression `<filterExpr>` in an aggregate function is invalid.
-
-This error class has the following derived error classes:
-
-## CONTAINS_AGGREGATE
-
-Expected a FILTER expression without an aggregation, but found `<aggExpr>`.
-
-## CONTAINS_WINDOW_FUNCTION
-
-Expected a FILTER expression without a window function, but found `<windowExpr>`.
-
-## NON_DETERMINISTIC
-
-Expected a deterministic FILTER expression.
-
-## NOT_BOOLEAN
-
-Expected a FILTER expression of the BOOLEAN type.
-
-
diff --git a/docs/sql-error-conditions-invalid-conf-value-error-class.md b/docs/sql-error-conditions-invalid-conf-value-error-class.md
deleted file mode 100644
index ac430956340f8..0000000000000
--- a/docs/sql-error-conditions-invalid-conf-value-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: INVALID_CONF_VALUE error class
-displayTitle: INVALID_CONF_VALUE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 22022](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The value '`<confValue>`' in the config "`<confName>`" is invalid.
-
-This error class has the following derived error classes:
-
-## DEFAULT_COLLATION
-
-Cannot resolve the given default collation. Did you mean '`<proposal>`'?
-
-## TIME_ZONE
-
-Cannot resolve the given timezone.
-
-
diff --git a/docs/sql-error-conditions-invalid-datetime-pattern-error-class.md b/docs/sql-error-conditions-invalid-datetime-pattern-error-class.md
deleted file mode 100644
index 10e9fc97027c0..0000000000000
--- a/docs/sql-error-conditions-invalid-datetime-pattern-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: INVALID_DATETIME_PATTERN error class
-displayTitle: INVALID_DATETIME_PATTERN error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 22007](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Unrecognized datetime pattern: `<pattern>`.
-
-This error class has the following derived error classes:
-
-## ILLEGAL_CHARACTER
-
-Illegal pattern character found in datetime pattern: `<c>`. Please provide legal character.
-
-## LENGTH
-
-Too many letters in datetime pattern: `<pattern>`. Please reduce pattern length.
-
-
diff --git a/docs/sql-error-conditions-invalid-delimiter-value-error-class.md b/docs/sql-error-conditions-invalid-delimiter-value-error-class.md
deleted file mode 100644
index 815fe78bce945..0000000000000
--- a/docs/sql-error-conditions-invalid-delimiter-value-error-class.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-layout: global
-title: INVALID_DELIMITER_VALUE error class
-displayTitle: INVALID_DELIMITER_VALUE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid value for delimiter.
-
-This error class has the following derived error classes:
-
-## DELIMITER_LONGER_THAN_EXPECTED
-
-Delimiter cannot be more than one character: `<str>`.
-
-## EMPTY_STRING
-
-Delimiter cannot be empty string.
-
-## SINGLE_BACKSLASH
-
-Single backslash is prohibited. It has special meaning as beginning of an escape sequence. To get the backslash character, pass a string with two backslashes as the delimiter.
-
-## UNSUPPORTED_SPECIAL_CHARACTER
-
-Unsupported special character for delimiter: `<str>`.
-
-
diff --git a/docs/sql-error-conditions-invalid-interval-format-error-class.md b/docs/sql-error-conditions-invalid-interval-format-error-class.md
deleted file mode 100644
index 28cccd5e12887..0000000000000
--- a/docs/sql-error-conditions-invalid-interval-format-error-class.md
+++ /dev/null
@@ -1,81 +0,0 @@
----
-layout: global
-title: INVALID_INTERVAL_FORMAT error class
-displayTitle: INVALID_INTERVAL_FORMAT error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 22006](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Error parsing '`<input>`' to interval. Please ensure that the value provided is in a valid format for defining an interval. You can reference the documentation for the correct format.
-
-This error class has the following derived error classes:
-
-## ARITHMETIC_EXCEPTION
-
-Uncaught arithmetic exception while parsing '`<input>`'.
-
-## INPUT_IS_EMPTY
-
-Interval string cannot be empty.
-
-## INPUT_IS_NULL
-
-Interval string cannot be null.
-
-## INVALID_FRACTION
-
-`<unit>` cannot have fractional part.
-
-## INVALID_PRECISION
-
-Interval can only support nanosecond precision, `<value>` is out of range.
-
-## INVALID_PREFIX
-
-Invalid interval prefix `<prefix>`.
-
-## INVALID_UNIT
-
-Invalid unit `<unit>`.
-
-## INVALID_VALUE
-
-Invalid value `<value>`.
-
-## MISSING_NUMBER
-
-Expect a number after `<word>` but hit EOL.
-
-## MISSING_UNIT
-
-Expect a unit name after `<word>` but hit EOL.
-
-## UNKNOWN_PARSING_ERROR
-
-Unknown error when parsing `<word>`.
-
-## UNRECOGNIZED_NUMBER
-
-Unrecognized number `<number>`.
-
-
diff --git a/docs/sql-error-conditions-numeric-value-out-of-range-error-class.md b/docs/sql-error-conditions-numeric-value-out-of-range-error-class.md
deleted file mode 100644
index 690bbeec07473..0000000000000
--- a/docs/sql-error-conditions-numeric-value-out-of-range-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: NUMERIC_VALUE_OUT_OF_RANGE error class
-displayTitle: NUMERIC_VALUE_OUT_OF_RANGE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-
-
-This error class has the following derived error classes:
-
-## WITHOUT_SUGGESTION
-
-The `<roundedValue>` rounded half up from `<originalValue>` cannot be represented as Decimal(`<precision>`, `<scale>`).
-
-## WITH_SUGGESTION
-
-`<value>` cannot be represented as Decimal(`<precision>`, `<scale>`). If necessary set `<config>` to "false" to bypass this error, and return NULL instead.
-
-
diff --git a/docs/sql-error-conditions-syntax-discontinued-error-class.md b/docs/sql-error-conditions-syntax-discontinued-error-class.md
deleted file mode 100644
index 966e11004364e..0000000000000
--- a/docs/sql-error-conditions-syntax-discontinued-error-class.md
+++ /dev/null
@@ -1,39 +0,0 @@
----
-layout: global
-title: SYNTAX_DISCONTINUED error class
-displayTitle: SYNTAX_DISCONTINUED error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Support of the clause or keyword: `<clause>` has been discontinued in this context.
-
-This error class has the following derived error classes:
-
-## BANG_EQUALS_NOT
-
-The '!' keyword is supported as a prefix operator in a logical operation only.
-Use the 'NOT' keyword instead for clauses such as `NOT LIKE`, `NOT IN`, `NOT BETWEEN`, etc.
-To re-enable the '!' keyword, set "spark.sql.legacy.bangEqualsNot" to "true".
-
-
diff --git a/docs/sql-error-conditions-unsupported-call-error-class.md b/docs/sql-error-conditions-unsupported-call-error-class.md
deleted file mode 100644
index 38c7859e88fe6..0000000000000
--- a/docs/sql-error-conditions-unsupported-call-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: UNSUPPORTED_CALL error class
-displayTitle: UNSUPPORTED_CALL error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Cannot call the method "`<methodName>`" of the class "`<className>`".
-
-This error class has the following derived error classes:
-
-## FIELD_INDEX
-
-The row shall have a schema to get an index of the field `<fieldName>`.
-
-## WITHOUT_SUGGESTION
-
-
-
-
diff --git a/docs/sql-error-conditions-unsupported-collation-error-class.md b/docs/sql-error-conditions-unsupported-collation-error-class.md
deleted file mode 100644
index ae410a30317a1..0000000000000
--- a/docs/sql-error-conditions-unsupported-collation-error-class.md
+++ /dev/null
@@ -1,37 +0,0 @@
----
-layout: global
-title: UNSUPPORTED_COLLATION error class
-displayTitle: UNSUPPORTED_COLLATION error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Collation `<collationName>` is not supported for:
-
-This error class has the following derived error classes:
-
-## FOR_FUNCTION
-
-function `<functionName>`. Please try to use a different collation.
-
-
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index ea4dbe926d146..254c54a414a7e 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -29,10 +29,12 @@ license: |
 - Since Spark 4.0, the default behaviour when inserting elements in a map is changed to first normalize keys -0.0 to 0.0. The affected SQL functions are `create_map`, `map_from_arrays`, `map_from_entries`, and `map_concat`. To restore the previous behaviour, set `spark.sql.legacy.disableMapKeyNormalization` to `true`.
 - Since Spark 4.0, the default value of `spark.sql.maxSinglePartitionBytes` is changed from `Long.MaxValue` to `128m`. To restore the previous behavior, set `spark.sql.maxSinglePartitionBytes` to `9223372036854775807`(`Long.MaxValue`).
 - Since Spark 4.0, any read of SQL tables takes into consideration the SQL configs `spark.sql.files.ignoreCorruptFiles`/`spark.sql.files.ignoreMissingFiles` instead of the core config `spark.files.ignoreCorruptFiles`/`spark.files.ignoreMissingFiles`.
+- Since Spark 4.0, when reading SQL tables hits `org.apache.hadoop.security.AccessControlException` and `org.apache.hadoop.hdfs.BlockMissingException`, the exception will be thrown and fail the task, even if `spark.sql.files.ignoreCorruptFiles` is set to `true`.
 - Since Spark 4.0, `spark.sql.hive.metastore` drops the support of Hive prior to 2.0.0 as they require JDK 8 that Spark does not support anymore. Users should migrate to higher versions.
 - Since Spark 4.0, `spark.sql.parquet.compression.codec` drops the support of codec name `lz4raw`, please use `lz4_raw` instead.
 - Since Spark 4.0, when overflowing during casting timestamp to byte/short/int under non-ansi mode, Spark will return null instead a wrapping value.
 - Since Spark 4.0, the `encode()` and `decode()` functions support only the following charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', 'UTF-32'. To restore the previous behavior when the function accepts charsets of the current JDK used by Spark, set `spark.sql.legacy.javaCharsets` to `true`.
+- Since Spark 4.0, the `encode()` and `decode()` functions raise `MALFORMED_CHARACTER_CODING` error when handling unmappable characters, while in Spark 3.5 and earlier versions, these characters will be replaced with mojibakes. To restore the previous behavior, set `spark.sql.legacy.codingErrorAction` to `true`. For example, if you try to `decode` a string value `tést` / [116, -23, 115, 116] (encoded in latin1) with 'UTF-8', you get `t�st`.
 - Since Spark 4.0, the legacy datetime rebasing SQL configs with the prefix `spark.sql.legacy` are removed. To restore the previous behavior, use the following configs:
   - `spark.sql.parquet.int96RebaseModeInWrite` instead of `spark.sql.legacy.parquet.int96RebaseModeInWrite`
   - `spark.sql.parquet.datetimeRebaseModeInWrite` instead of `spark.sql.legacy.parquet.datetimeRebaseModeInWrite`
@@ -62,6 +64,10 @@ license: |
 - Since Spark 4.0, The Storage-Partitioned Join feature flag `spark.sql.sources.v2.bucketing.pushPartValues.enabled` is set to `true`. To restore the previous behavior, set `spark.sql.sources.v2.bucketing.pushPartValues.enabled` to `false`.
 - Since Spark 4.0, the `sentences` function uses `Locale(language)` instead of `Locale.US` when `language` parameter is not `NULL` and `country` parameter is `NULL`.
 
+## Upgrading from Spark SQL 3.5.3 to 3.5.4
+
+- Since Spark 3.5.4, when reading SQL tables hits `org.apache.hadoop.security.AccessControlException` and `org.apache.hadoop.hdfs.BlockMissingException`, the exception will be thrown and fail the task, even if `spark.sql.files.ignoreCorruptFiles` is set to `true`.
+
 ## Upgrading from Spark SQL 3.5.1 to 3.5.2
 
 - Since 3.5.2, MySQL JDBC datasource will read TINYINT UNSIGNED as ShortType, while in 3.5.1, it was wrongly read as ByteType.
diff --git a/docs/sql-pipe-syntax.md b/docs/sql-pipe-syntax.md
index 2c7db9f456475..3d757db966239 100644
--- a/docs/sql-pipe-syntax.md
+++ b/docs/sql-pipe-syntax.md
@@ -179,7 +179,7 @@ Returns all the output rows from the source table unmodified.
 For example:
 
 ```sql
-CREATE TABLE t(a INT, b INT) AS VALUES (1, 2), (3, 4);
+CREATE TABLE t AS VALUES (1, 2), (3, 4) AS t(a, b);
 TABLE t;
 
 +---+---+
@@ -198,16 +198,26 @@ TABLE t;
 
 Evaluates the provided expressions over each of the rows of the input table.                                                                                                                                                    
 
+In general, this operator is not always required with SQL pipe syntax. It is possible to use it at
+or near the end of a query to evaluate expressions or specify a list of output columns.
+
+Since the final query result always comprises the columns returned from the last pipe operator,
+when this `SELECT` operator does not appear, the output includes all columns from the full row.
+This behavior is similar to `SELECT *` in standard SQL syntax.
+
 It is possible to use `DISTINCT` and `*` as needed.<br>
 This works like the outermost `SELECT` in a table subquery in regular Spark SQL.
 
 Window functions are supported in the `SELECT` list as well. To use them, the `OVER` clause must be
 provided. You may provide the window specification in the `WINDOW` clause.
 
+Aggregate functions are not supported in this operator. To perform aggregation, use the `AGGREGATE`
+operator instead.
+
 For example:
 
 ```sql
-CREATE TABLE t(col INT) AS VALUES (0), (1);
+CREATE TABLE t AS VALUES (0), (1) AS t(col);
 
 FROM t
 |> SELECT col * 2 AS result;
@@ -226,7 +236,12 @@ FROM t
 |> EXTEND <expr> [[AS] alias], ...
 ```
 
-Appends new columns to the input table by evaluating the specified expressions over each of the input rows.
+Appends new columns to the input table by evaluating the specified expressions over each of the
+input rows.
+
+After an `EXTEND` operation, top-level column names are updated but table aliases still refer to the
+original row values (such as an inner join between two tables `lhs` and `rhs` with a subsequent
+`EXTEND` and then `SELECT lhs.col, rhs.col`).
 
 For example:
 
@@ -248,7 +263,17 @@ VALUES (0), (1) tab(col)
 |> SET <column> = <expression>, ...
 ```
 
-Updates columns of the input table by replacing them with the result of evaluating the provided expressions.
+Updates columns of the input table by replacing them with the result of evaluating the provided
+expressions. Each such column reference must appear in the input table exactly once.
+
+This is similar to `SELECT * EXCEPT (column), <expression> AS column` in regular Spark SQL.
+
+It is possible to perform multiple assignments in a single `SET` clause. Each assignment may refer
+to the result of previous assignments.
+
+After an assignment, top-level column names are updated but table aliases still refer to the
+original row values (such as an inner join between two tables `lhs` and `rhs` with a subsequent
+`SET` and then `SELECT lhs.col, rhs.col`).
 
 For example:
 
@@ -256,6 +281,16 @@ For example:
 VALUES (0), (1) tab(col)
 |> SET col = col * 2;
 
++---+
+|col|
++---+
+|  0|
+|  2|
++---+
+
+VALUES (0), (1) tab(col)
+|> SET col = col * 2;
+
 +---+
 |col|
 +---+
@@ -270,7 +305,14 @@ VALUES (0), (1) tab(col)
 |> DROP <column>, ...
 ```
 
-Drops columns of the input table by name.
+Drops columns of the input table by name. Each such column reference must appear in the input table
+exactly once.
+
+This is similar to `SELECT * EXCEPT (column)` in regular Spark SQL.
+
+After a `DROP` operation, top-level column names are updated but table aliases still refer to the
+original row values (such as an inner join between two tables `lhs` and `rhs` with a subsequent
+`DROP` and then `SELECT lhs.col, rhs.col`).
 
 For example:
 
@@ -293,18 +335,25 @@ VALUES (0, 1) tab(col1, col2)
 
 Retains the same rows and column names of the input table but with a new table alias.
 
+This operator is useful for introducing a new alias for the input table, which can then be referred
+to in subsequent operators. Any existing alias for the table is replaced by the new alias.
+
+It is useful to use this operator after adding new columns with `SELECT` or `EXTEND` or after
+performing aggregation with `AGGREGATE`. This simplifies the process of referring to the columns
+from subsequent `JOIN` operators and allows for more readable queries.
+
 For example:
 
 ```sql
 VALUES (0, 1) tab(col1, col2)
-|> AS new_tab;
-|> SELECT * FROM new_tab;
+|> AS new_tab
+|> SELECT col1 + col2 FROM new_tab;
 
-+----+----+
-|col1|col2|
-+----+----+
-|   0|   1|
-+----+----+
++-----------+
+|col1 + col2|
++-----------+
+|          1|
++-----------+
 ```
 
 #### WHERE
@@ -357,22 +406,48 @@ VALUES (0), (0) tab(col)
 #### AGGREGATE
 
 ```sql
+-- Full-table aggregation
 |> AGGREGATE <agg_expr> [[AS] alias], ...
-```
-
-Performs full-table aggregation, returning one result row with a column for each aggregate expression.
 
-```sql
+-- Aggregation with grouping
 |> AGGREGATE [<agg_expr> [[AS] alias], ...] GROUP BY <grouping_expr> [AS alias], ...
 ```
 
-Performs aggregation with grouping, returning one row per group. The column list includes the
-grouping columns first and then the aggregate columns afterward. Aliases can be assigned directly
-on grouping expressions.
+Performs aggregation across grouped rows or across the entire input table.
+
+If no `GROUP BY` clause is present, this performs full-table aggregation, returning one result row
+with a column for each aggregate expression. Othwrise, this performs aggregation with grouping,
+returning one row per group. Aliases can be assigned directly on grouping expressions.
+
+The output column list of this operator includes the grouping columns first (if any), and then the
+aggregate columns afterward. 
+
+Each `<agg_expr>` expression can include standard aggregate function(s) like `COUNT`, `SUM`, `AVG`,
+`MIN`, or any other aggregate function(s) that Spark SQL supports. Additional expressions may appear
+below or above the aggregate function(s), such as `MIN(FLOOR(col)) + 1`. Each `<agg_expr>`
+expression must contain at least one aggregate function (or otherwise the query returns an error).
+Each `<agg_expr>` expression may include a column alias with `AS <alias>`, and may also
+include a `DISTINCT` keyword to remove duplicate values before applying the aggregate function (for
+example, `COUNT(DISTINCT col)`).
+
+If present, the `GROUP BY` clause can include any number of grouping expressions, and each
+`<agg_expr>` expression will evaluate over each unique combination of values of the grouping
+expressions. The output table contains the evaluated grouping expressions followed by the evaluated
+aggregate functions. The `GROUP BY` expressions may include one-based ordinals. Unlike regular SQL
+in which such ordinals refer to the expressions in the accompanying `SELECT` clause, in SQL pipe
+syntax, they refer to the columns of the relation produced by the preceding operator instead. For
+example, in `TABLE t |> AGGREGATE COUNT(*) GROUP BY 2`, we refer to the second column of the input
+table `t`.
+
+There is no need to repeat entire expressions between `GROUP BY` and `SELECT`, since the `AGGREGATE` 
+operator automatically includes the evaluated grouping expressions in its output. By the same token,
+after an `AGGREGATE` operator, it is often unnecessary to issue a following `SELECT` operator, since
+`AGGREGATE` returns both the grouping columns and the aggregate columns in a single step.
 
 For example:
 
 ```sql
+-- Full-table aggregation
 VALUES (0), (1) tab(col)
 |> AGGREGATE COUNT(col) AS count;
 
@@ -382,6 +457,7 @@ VALUES (0), (1) tab(col)
 |    2|
 +-----+
 
+-- Aggregation with grouping
 VALUES (0, 1), (0, 2) tab(col1, col2)
 |> AGGREGATE COUNT(col2) AS count GROUP BY col1;
 
@@ -398,19 +474,45 @@ VALUES (0, 1), (0, 2) tab(col1, col2)
 |> [LEFT | RIGHT | FULL | CROSS | SEMI | ANTI | NATURAL | LATERAL] JOIN <table> [ON <condition> | USING(col, ...)]
 ```
 
-Joins rows from both inputs, returning a filtered cross-product of the pipe input table and the table expression following the JOIN keyword.
+Joins rows from both inputs, returning a filtered cross-product of the pipe input table and the
+table expression following the JOIN keyword. This behaves a similar manner as the `JOIN` clause in
+regular SQL where the pipe operator input table becomes the left side of the join and the table
+argument becomes the right side of the join.
+
+Standard join modifiers like `LEFT`, `RIGHT`, and `FULL` are supported before the `JOIN` keyword.
+
+The join predicate may need to refer to columns from both inputs to the join. In this case, it may
+be necessary to use table aliases to differentiate between columns in the event that both inputs
+have columns with the same names. The `AS` operator can be useful here to introduce a new alias for
+the pipe input table that becomes the left side of the join. Use standard syntax to assign an alias
+to the table argument that becomes the right side of the join, if needed.
 
 For example:
 
 ```sql
-VALUES (0, 1) tab(a, b)
-|> JOIN VALUES (0, 2) tab(c, d) ON a = c;
+SELECT 0 AS a, 1 AS b
+|> AS lhs
+|> JOIN VALUES (0, 2) rhs(a, b) ON (lhs.a = rhs.a);
 
 +---+---+---+---+
 |  a|  b|  c|  d|
 +---+---+---+---+
 |  0|  1|  0|  2|
 +---+---+---+---+
+
+VALUES ('apples', 3), ('bananas', 4) t(item, sales)
+|> AS produce_sales
+|> LEFT JOIN
+     (SELECT "apples" AS item, 123 AS id) AS produce_data
+     USING (item)
+|> SELECT produce_sales.item, sales, id;
+
+/*---------+-------+------+
+ | item    | sales | id   |
+ +---------+-------+------+
+ | apples  | 3     | 123  |
+ | bananas | 4     | NULL |
+ +---------+-------+------*/
 ```
 
 #### ORDER BY
@@ -419,7 +521,8 @@ VALUES (0, 1) tab(a, b)
 |> ORDER BY <expr> [ASC | DESC], ...
 ```
 
-Returns the input rows after sorting as indicated. Standard modifiers are supported including NULLS FIRST/LAST.
+Returns the input rows after sorting as indicated. Standard modifiers are supported including NULLS
+FIRST/LAST.
 
 For example:
 
@@ -438,10 +541,10 @@ VALUES (0), (1) tab(col)
 #### UNION, INTERSECT, EXCEPT
 
 ```sql
-|> {UNION | INTERSECT | EXCEPT} {ALL | DISTINCT} (<query>), (<query>), ...
+|> {UNION | INTERSECT | EXCEPT} {ALL | DISTINCT} (<query>)
 ```
 
-Performs the union or other set operation over the combined rows from the input table plus one or more tables provided as input arguments.
+Performs the union or other set operation over the combined rows from the input table or subquery.
 
 For example:
 
@@ -469,12 +572,22 @@ For example:
 
 ```sql
 VALUES (0), (0), (0), (0) tab(col)
-|> TABLESAMPLE BERNOULLI(1 ROWS);
+|> TABLESAMPLE (1 ROWS);
+
++---+
+|col|
++---+
+|  0|
++---+
+
+VALUES (0), (0) tab(col)
+|> TABLESAMPLE (100 PERCENT);
 
 +---+
 |col|
 +---+
 |  0|
+|  0|
 +---+
 ```
 
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 7af54850f5da7..3b1138b9ee0e5 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -568,6 +568,7 @@ Below is a list of all the keywords in Spark SQL.
 |ITEMS|non-reserved|non-reserved|non-reserved|
 |ITERATE|non-reserved|non-reserved|non-reserved|
 |JOIN|reserved|strict-non-reserved|reserved|
+|JSON|non-reserved|non-reserved|non-reserved|
 |KEYS|non-reserved|non-reserved|non-reserved|
 |LANGUAGE|non-reserved|non-reserved|reserved|
 |LAST|non-reserved|non-reserved|non-reserved|
@@ -651,6 +652,7 @@ Below is a list of all the keywords in Spark SQL.
 |RECORDREADER|non-reserved|non-reserved|non-reserved|
 |RECORDWRITER|non-reserved|non-reserved|non-reserved|
 |RECOVER|non-reserved|non-reserved|non-reserved|
+|RECURSIVE|reserved|non-reserved|reserved|
 |REDUCE|non-reserved|non-reserved|non-reserved|
 |REFERENCES|reserved|non-reserved|reserved|
 |REFRESH|non-reserved|non-reserved|non-reserved|
diff --git a/docs/sql-ref-literals.md b/docs/sql-ref-literals.md
index 141f985b0beac..7a10676cce237 100644
--- a/docs/sql-ref-literals.md
+++ b/docs/sql-ref-literals.md
@@ -46,6 +46,7 @@ A string literal is used to specify a character string value.
     One character from the character set. Use `\` to escape special characters (e.g., `'` or `\`).
     To represent unicode characters, use 16-bit or 32-bit unicode escape of the form `\uxxxx` or `\Uxxxxxxxx`,
     where xxxx and xxxxxxxx are 16-bit and 32-bit code points in hexadecimal respectively (e.g., `\u3042` for `あ` and `\U0001F44D` for `👍`).
+    An ASCII character can also be represented as an octal number preceded by `\` like `\101`, which represents `A`.
 
 * **r**
 
@@ -78,14 +79,14 @@ SELECT "SPARK SQL" AS col;
 +---------+
 |      col|
 +---------+
-|Spark SQL|
+|SPARK SQL|
 +---------+
 
 SELECT 'it\'s $10.' AS col;
 +---------+
 |      col|
 +---------+
-|It's $10.|
+|it's $10.|
 +---------+
 
 SELECT r"'\n' represents newline character." AS col;
diff --git a/docs/sql-ref-syntax-aux-describe-function.md b/docs/sql-ref-syntax-aux-describe-function.md
index a871fb5bfd406..0c5a3d751a564 100644
--- a/docs/sql-ref-syntax-aux-describe-function.md
+++ b/docs/sql-ref-syntax-aux-describe-function.md
@@ -85,7 +85,7 @@ DESC FUNCTION max;
 
 -- Describe a builtin user defined aggregate function
 -- Returns function name, implementing class and usage and examples.
-DESC FUNCTION EXTENDED explode
+DESC FUNCTION EXTENDED explode;
 +---------------------------------------------------------------+
 |function_desc                                                  |
 +---------------------------------------------------------------+
diff --git a/docs/sql-ref-syntax-aux-describe-table.md b/docs/sql-ref-syntax-aux-describe-table.md
index 4b6e1e8c3461e..5f5fd27c865e6 100644
--- a/docs/sql-ref-syntax-aux-describe-table.md
+++ b/docs/sql-ref-syntax-aux-describe-table.md
@@ -29,16 +29,17 @@ to return the metadata pertaining to a partition or column respectively.
 ### Syntax
 
 ```sql
-{ DESC | DESCRIBE } [ TABLE ] [ format ] table_identifier [ partition_spec ] [ col_name ]
+{ DESC | DESCRIBE } [ TABLE ] [ format ] table_identifier [ partition_spec ] [ col_name ] [ AS JSON ]
 ```
 
 ### Parameters
 
 * **format**
 
-    Specifies the optional format of describe output. If `EXTENDED` is specified
+    Specifies the optional format of describe output. If `EXTENDED` or `FORMATTED` is specified
     then additional metadata information (such as parent database, owner, and access time)
-    is returned. 
+    is returned. Also if `EXTENDED` or `FORMATTED` is specified, then the metadata can be returned 
+    in JSON format by specifying `AS JSON` at the end of the statement.
 
 * **table_identifier**
 
@@ -60,8 +61,96 @@ to return the metadata pertaining to a partition or column respectively.
     and `col_name` are  mutually exclusive and can not be specified together. Currently
     nested columns are not allowed to be specified.
     
+    JSON format is not currently supported for individual columns.
+    
     **Syntax:** `[ database_name. ] [ table_name. ] column_name`
 
+* **AS JSON**
+
+  An optional parameter to return the table metadata in JSON format. Only supported when `EXTENDED`
+  or `FORMATTED` format is specified (both produce equivalent JSON).
+
+  **Syntax:** `[ AS JSON ]`
+
+  **Schema:**
+  
+  Below is the full JSON schema.
+  In actual output, null fields are omitted and the JSON is not pretty-printed (see Examples).
+  
+  ```sql
+    {
+      "table_name": "<table_name>",
+      "catalog_name": "<catalog_name>",
+      "schema_name": "<innermost_namespace_name>",
+      "namespace": ["<namespace_names>"],
+      "type": "<table_type>",
+      "provider": "<provider>",
+      "columns": [
+        {
+          "name": "<name>",
+          "type": <type_json>,
+          "comment": "<comment>",
+          "nullable": <boolean>,
+          "default": "<default_val>"
+        }
+      ],
+      "partition_values": {
+        "<col_name>": "<val>"
+      },
+      "location": "<path>",
+      "view_text": "<view_text>",
+      "view_original_text": "<view_original_text>",
+      "view_schema_mode": "<view_schema_mode>",
+      "view_catalog_and_namespace": "<view_catalog_and_namespace>",
+      "view_query_output_columns": ["col1", "col2"],
+      "comment": "<comment>",
+      "table_properties": {
+        "property1": "<property1>",
+        "property2": "<property2>"
+      },
+      "storage_properties": {
+        "property1": "<property1>",
+        "property2": "<property2>"
+      },
+      "serde_library": "<serde_library>",
+      "input_format": "<input_format>",
+      "output_format": "<output_format>",
+      "num_buckets": <num_buckets>,
+      "bucket_columns": ["<col_name>"],
+      "sort_columns": ["<col_name>"],
+      "created_time": "<yyyy-MM-dd'T'HH:mm:ss'Z'>",
+      "created_by": "<created_by>",
+      "last_access": "<yyyy-MM-dd'T'HH:mm:ss'Z'>",
+      "partition_provider": "<partition_provider>"
+    }
+  ```
+  
+  Below are the schema definitions for `<type_json>`:
+
+| Spark SQL Data Types  | JSON Representation                                                                                                                                              |
+|-----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| ByteType              | `{ "name" : "tinyint" }`                                                                                                                                         |
+| ShortType             | `{ "name" : "smallint" }`                                                                                                                                        |
+| IntegerType           | `{ "name" : "int" }`                                                                                                                                             |
+| LongType              | `{ "name" : "bigint" }`                                                                                                                                          |
+| FloatType             | `{ "name" : "float" }`                                                                                                                                           |
+| DoubleType            | `{ "name" : "double" }`                                                                                                                                          |
+| DecimalType           | `{ "name" : "decimal", "precision": p, "scale": s }`                                                                                                             |
+| StringType            | `{ "name" : "string" }`                                                                                                                                          |
+| VarCharType           | `{ "name" : "varchar", "length": n }`                                                                                                                            |
+| CharType              | `{ "name" : "char", "length": n }`                                                                                                                               |
+| BinaryType            | `{ "name" : "binary" }`                                                                                                                                          |
+| BooleanType           | `{ "name" : "boolean" }`                                                                                                                                         |
+| DateType              | `{ "name" : "date" }`                                                                                                                                            |
+| VariantType           | `{ "name" : "variant" }`                                                                                                                                         |
+| TimestampType         | `{ "name" : "timestamp_ltz" }`                                                                                                                                   |
+| TimestampNTZType      | `{ "name" : "timestamp_ntz" }`                                                                                                                                   |
+| YearMonthIntervalType | `{ "name" : "interval", "start_unit": "<start_unit>", "end_unit": "<end_unit>" }`                                                                                |
+| DayTimeIntervalType   | `{ "name" : "interval", "start_unit": "<start_unit>", "end_unit": "<end_unit>" }`                                                                                |
+| ArrayType             | `{ "name" : "array", "element_type": <type_json>, "element_nullable": <boolean> }`                                                                               |
+| MapType               | `{ "name" : "map", "key_type": <type_json>, "value_type": <type_json>, "value_nullable": <boolean> }`                                                            |
+| StructType            | `{ "name" : "struct", "fields": [ {"name" : "field1", "type" : <type_json>, “nullable”: <boolean>, "comment": “<comment>”, "default": “<default_val>”}, ... ] }` |
+
 ### Examples
 
 ```sql
@@ -173,6 +262,10 @@ DESCRIBE customer salesdb.customer.name;
 |data_type|    string|
 |  comment|Short name|
 +---------+----------+
+
+-- Returns the table metadata in JSON format.
+DESC FORMATTED customer AS JSON;
+{"table_name":"customer","catalog_name":"spark_catalog","schema_name":"default","namespace":["default"],"columns":[{"name":"cust_id","type":{"name":"integer"},"nullable":true},{"name":"name","type":{"name":"string"},"comment":"Short name","nullable":true},{"name":"state","type":{"name":"varchar","length":20},"nullable":true}],"location": "file:/tmp/salesdb.db/custom...","created_time":"2020-04-07T14:05:43Z","last_access":"UNKNOWN","created_by":"None","type":"MANAGED","provider":"parquet","partition_provider":"Catalog","partition_columns":["state"]}
 ```
 
 ### Related Statements
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
index 7ad56bf0657b2..9a50db992f9c8 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
@@ -54,9 +54,9 @@ ADD JAR '/some/other.jar';
 ADD JAR "/path with space/abc.jar";
 ADD JARS "/path with space/def.jar" '/path with space/ghi.jar';
 ADD JAR "ivy://group:module:version";
-ADD JAR "ivy://group:module:version?transitive=false"
-ADD JAR "ivy://group:module:version?transitive=true"
-ADD JAR "ivy://group:module:version?exclude=group:module&transitive=true"
+ADD JAR "ivy://group:module:version?transitive=false";
+ADD JAR "ivy://group:module:version?transitive=true";
+ADD JAR "ivy://group:module:version?exclude=group:module&transitive=true";
 ```
 
 ### Related Statements
diff --git a/docs/sql-ref-syntax-ddl-alter-table.md b/docs/sql-ref-syntax-ddl-alter-table.md
index adcfa8db06f12..28ecc44a5bf7e 100644
--- a/docs/sql-ref-syntax-ddl-alter-table.md
+++ b/docs/sql-ref-syntax-ddl-alter-table.md
@@ -673,12 +673,12 @@ ALTER TABLE loc_orc SET fileformat orc;
 ALTER TABLE p1 partition (month=2, day=2) SET fileformat parquet;
 
 -- Change the file Location
-ALTER TABLE dbx.tab1 PARTITION (a='1', b='2') SET LOCATION '/path/to/part/ways'
+ALTER TABLE dbx.tab1 PARTITION (a='1', b='2') SET LOCATION '/path/to/part/ways';
 
 -- SET SERDE/ SERDE Properties
 ALTER TABLE test_tab SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe';
 
-ALTER TABLE dbx.tab1 SET SERDE 'org.apache.hadoop' WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')
+ALTER TABLE dbx.tab1 SET SERDE 'org.apache.hadoop' WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee');
 
 -- SET TABLE PROPERTIES
 ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('winner' = 'loser');
diff --git a/docs/sql-ref-syntax-ddl-declare-variable.md b/docs/sql-ref-syntax-ddl-declare-variable.md
index ba9857bf1917a..41ecba1364361 100644
--- a/docs/sql-ref-syntax-ddl-declare-variable.md
+++ b/docs/sql-ref-syntax-ddl-declare-variable.md
@@ -83,7 +83,7 @@ DECLARE OR REPLACE five = 55;
 -- Explicitly declare the default value of a variable using the keyword `DEFAULT`
 DECLARE VARIABLE size DEFAULT 6;
 
--- STRING variable initialialized to `NULL`
+-- STRING variable initialized to `NULL`
 DECLARE some_var STRING;
 ```
 
diff --git a/docs/sql-ref-syntax-dml-insert-table.md b/docs/sql-ref-syntax-dml-insert-table.md
index 6ca062e081747..6f85d4401d3b1 100644
--- a/docs/sql-ref-syntax-dml-insert-table.md
+++ b/docs/sql-ref-syntax-dml-insert-table.md
@@ -379,7 +379,7 @@ SELECT * FROM persons2;
 +-------------+--------------------------+---------+
 
 -- in an atomic operation, 1) delete rows with ssn = 123456789 and 2) insert rows from persons2 
-INSERT INTO persons REPLACE WHERE ssn = 123456789 SELECT * FROM persons2
+INSERT INTO persons REPLACE WHERE ssn = 123456789 SELECT * FROM persons2;
 
 SELECT * FROM persons;
 +-------------+--------------------------+---------+
diff --git a/docs/sql-ref-syntax-qry-star.md b/docs/sql-ref-syntax-qry-star.md
index 3a997dad644b9..c575727e820e8 100644
--- a/docs/sql-ref-syntax-qry-star.md
+++ b/docs/sql-ref-syntax-qry-star.md
@@ -21,7 +21,7 @@ license: |
 
 ### Description
 
-A shorthand to name all the referencable columns in the FROM clause or a specific table reference's columns or fields in the FROM clause.
+A shorthand to name all the referenceable columns in the FROM clause or a specific table reference's columns or fields in the FROM clause.
 The star clause is most frequently used in the SELECT list.
 Spark also supports its use in function invocation and certain n-ary operations within the SELECT list and WHERE clause.
 
@@ -38,11 +38,11 @@ except_clause
 
 * **name**
 
-  If present limits the columns or fields to be named to those in the specified referencable field, column, or table.
+  If present limits the columns or fields to be named to those in the specified referenceable field, column, or table.
 
 * **except_clause**
 
-  Optionally prunes columns or fields from the referencable set of columns identified in the select_star clause.
+  Optionally prunes columns or fields from the referenceable set of columns identified in the select_star clause.
 
   * **column_name**
 
diff --git a/docs/sql-ref.md b/docs/sql-ref.md
index 6eb2bf77c6ac0..6d557caaca3d6 100644
--- a/docs/sql-ref.md
+++ b/docs/sql-ref.md
@@ -26,6 +26,7 @@ Spark SQL is Apache Spark's module for working with structured data. This guide
  * [Data Types](sql-ref-datatypes.html)
  * [Datetime Pattern](sql-ref-datetime-pattern.html)
  * [Number Pattern](sql-ref-number-pattern.html)
+ * [Operators](sql-ref-operators.html)
  * [Functions](sql-ref-functions.html)
    * [Built-in Functions](sql-ref-functions-builtin.html)
    * [Scalar User-Defined Functions (UDFs)](sql-ref-functions-udf-scalar.html)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
index 20c5eb1700155..9289b005e3ba4 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
@@ -31,7 +31,7 @@ object StreamingExamples extends Logging {
       // We first log something to initialize Spark's default logging, then we override the
       // logging level.
       logInfo("Setting log level to [WARN] for streaming example." +
-        " To override add a custom log4j.properties to the classpath.")
+        " To override add a custom log4j2.properties to the classpath.")
       Configurator.setRootLevel(Level.WARN)
     }
   }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
index 1493d8114c699..823143f9b9abb 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
@@ -126,7 +126,7 @@ object Pregel extends Logging {
     require(maxIterations > 0, s"Maximum number of iterations must be greater than 0," +
       s" but got ${maxIterations}")
 
-    val checkpointInterval = graph.vertices.sparkContext.getConf
+    val checkpointInterval = graph.vertices.sparkContext.getReadOnlyConf
       .getInt("spark.graphx.pregel.checkpointInterval", -1)
     var g = graph.mapVertices((vid, vdata) => vprog(vid, vdata, initialMsg))
     val graphCheckpointer = new PeriodicGraphCheckpointer[VD, ED](
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 9c2f21e7ab617..3361081549242 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -34,6 +34,8 @@
   </description>
   <properties>
     <sbt.project.name>hadoop-cloud</sbt.project.name>
+    <okhttp.version>3.12.12</okhttp.version>
+    <okio.version>1.17.6</okio.version>
   </properties>
 
   <dependencies>
@@ -191,6 +193,16 @@
       <version>${jetty.version}</version>
       <scope>${hadoop.deps.scope}</scope>
     </dependency>
+    <dependency>
+      <groupId>com.squareup.okhttp3</groupId>
+      <artifactId>okhttp</artifactId>
+      <version>${okhttp.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.squareup.okio</groupId>
+      <artifactId>okio</artifactId>
+      <version>${okio.version}</version>
+    </dependency>
   </dependencies>
 
   <build>
diff --git a/mllib-local/benchmarks/BLASBenchmark-jdk21-results.txt b/mllib-local/benchmarks/BLASBenchmark-jdk21-results.txt
index 97f88e2fe2de4..b2ec64828b395 100644
--- a/mllib-local/benchmarks/BLASBenchmark-jdk21-results.txt
+++ b/mllib-local/benchmarks/BLASBenchmark-jdk21-results.txt
@@ -2,337 +2,311 @@
 daxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 daxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 143            149           3        700.3           1.4       1.0X
-java                                                126            146           8        791.3           1.3       1.1X
-native                                              142            149           4        705.8           1.4       1.0X
+f2j                                                 148            155           4        676.7           1.5       1.0X
+java                                                146            153           7        684.9           1.5       1.0X
 
 
 ================================================================================================
 saxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 saxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  75             82           6       1339.1           0.7       1.0X
-java                                                 68             71           2       1471.4           0.7       1.1X
-native                                               76             84           6       1321.9           0.8       1.0X
+f2j                                                  79             85           4       1270.6           0.8       1.0X
+java                                                 69             73           2       1447.1           0.7       1.1X
 
 
 ================================================================================================
 dcopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dcopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 139            145           3        718.2           1.4       1.0X
-java                                                127            143           9        786.7           1.3       1.1X
-native                                              126            145           8        792.5           1.3       1.1X
+f2j                                                 131            151          10        766.2           1.3       1.0X
+java                                                132            150          10        757.4           1.3       1.0X
 
 
 ================================================================================================
 scopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 scopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  73             80           4       1370.0           0.7       1.0X
-java                                                 69             72           2       1450.9           0.7       1.1X
-native                                               73             80           4       1374.3           0.7       1.0X
+f2j                                                  73             83           7       1363.1           0.7       1.0X
+java                                                 72             75           2       1394.6           0.7       1.0X
 
 
 ================================================================================================
 ddot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 ddot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             95           0       1052.1           1.0       1.0X
-java                                                 45             47           1       2236.4           0.4       2.1X
-native                                               95             95           0       1053.4           0.9       1.0X
+f2j                                                  96             96           0       1044.5           1.0       1.0X
+java                                                 47             51           3       2147.8           0.5       2.1X
 
 
 ================================================================================================
 sdot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sdot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0       1074.4           0.9       1.0X
-java                                                 23             23           1       4444.2           0.2       4.1X
-native                                               93             93           0       1075.1           0.9       1.0X
+f2j                                                  94             94           0       1067.9           0.9       1.0X
+java                                                 23             25           1       4320.8           0.2       4.0X
 
 
 ================================================================================================
 dnrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dnrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 142            143           0        701.9           1.4       1.0X
-java                                                 33             33           0       3066.4           0.3       4.4X
-native                                               94             95           1       1060.7           0.9       1.5X
+f2j                                                 143            144           1        698.6           1.4       1.0X
+java                                                 34             35           1       2981.8           0.3       4.3X
 
 
 ================================================================================================
 snrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 snrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 113            114           1        885.5           1.1       1.0X
-java                                                 16             16           0       6158.1           0.2       7.0X
-native                                               93             93           0       1073.9           0.9       1.2X
+f2j                                                 125            125           2        801.4           1.2       1.0X
+java                                                 16             17           1       6092.6           0.2       7.6X
 
 
 ================================================================================================
 dscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 132            137           2        755.3           1.3       1.0X
-java                                                120            125           2        830.5           1.2       1.1X
-native                                              128            133           3        779.8           1.3       1.0X
+f2j                                                 141            150           7        710.1           1.4       1.0X
+java                                                127            135           3        784.5           1.3       1.1X
 
 
 ================================================================================================
 sscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  78             86           7       1288.3           0.8       1.0X
-java                                                 57             63           4       1756.3           0.6       1.4X
-native                                               69             77           7       1445.6           0.7       1.1X
+f2j                                                  79             88           8       1264.4           0.8       1.0X
+java                                                 58             67           5       1711.0           0.6       1.4X
 
 
 ================================================================================================
 dgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  67             68           1       1491.1           0.7       1.0X
-java                                                 22             23           1       4447.1           0.2       3.0X
-native                                               44             46           1       2264.6           0.4       1.5X
+f2j                                                 100            100           1       1003.2           1.0       1.0X
+java                                                 23             25           1       4266.2           0.2       4.3X
 
 
 ================================================================================================
 dgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0       1072.3           0.9       1.0X
-java                                                 23             23           1       4439.7           0.2       4.1X
-native                                               93             93           0       1073.1           0.9       1.0X
+f2j                                                  94             94           1       1065.2           0.9       1.0X
+java                                                 23             24           1       4374.5           0.2       4.1X
 
 
 ================================================================================================
 sgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             95           0       1053.6           0.9       1.0X
-java                                                 11             11           0       9024.3           0.1       8.6X
-native                                               34             35           1       2939.9           0.3       2.8X
+f2j                                                  96             96           0       1042.4           1.0       1.0X
+java                                                 12             12           1       8626.4           0.1       8.3X
 
 
 ================================================================================================
 sgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0       1078.0           0.9       1.0X
-java                                                 11             12           0       8986.9           0.1       8.3X
-native                                               93             93           0       1079.0           0.9       1.0X
+f2j                                                  93             94           1       1071.4           0.9       1.0X
+java                                                 11             12           1       8768.3           0.1       8.2X
 
 
 ================================================================================================
 dger
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dger:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 137            141           2        731.0           1.4       1.0X
-java                                                120            123           2        836.1           1.2       1.1X
-native                                              134            139           3        743.8           1.3       1.0X
+f2j                                                 139            144           2        717.0           1.4       1.0X
+java                                                121            126           3        828.1           1.2       1.2X
 
 
 ================================================================================================
 dspmv[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dspmv[U]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0        538.1           1.9       1.0X
-java                                                 11             12           0       4370.3           0.2       8.1X
-native                                               47             47           0       1066.7           0.9       2.0X
+f2j                                                  92             93           2        541.7           1.8       1.0X
+java                                                 12             12           1       4276.6           0.2       7.9X
 
 
 ================================================================================================
 dspr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dspr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  87             95           7        574.0           1.7       1.0X
-java                                                 87             95           7        575.3           1.7       1.0X
-native                                               62             70           6        812.0           1.2       1.4X
+f2j                                                  93             96           2        536.8           1.9       1.0X
+java                                                 88             96           7        567.6           1.8       1.1X
 
 
 ================================================================================================
 dsyr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dsyr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 142            145           2        353.3           2.8       1.0X
-java                                                142            145           2        352.9           2.8       1.0X
-native                                              117            124           3        427.3           2.3       1.2X
+f2j                                                 136            141           2        366.7           2.7       1.0X
+java                                                138            143           4        361.4           2.8       1.0X
 
 
 ================================================================================================
 dgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 661            662           1       1512.5           0.7       1.0X
-java                                                 63             68           4      15787.8           0.1      10.4X
-native                                              631            633           2       1583.8           0.6       1.0X
+f2j                                                 665            667           1       1503.9           0.7       1.0X
+java                                                 63             69           4      15804.0           0.1      10.5X
 
 
 ================================================================================================
 dgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 669            670           2       1495.3           0.7       1.0X
-java                                                 64             70           3      15673.5           0.1      10.5X
-native                                              375            377           5       2665.8           0.4       1.8X
+f2j                                                 667            669           2       1499.4           0.7       1.0X
+java                                                 64             70           4      15724.9           0.1      10.5X
 
 
 ================================================================================================
 dgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 900            901           1       1111.3           0.9       1.0X
-java                                                 63             68           4      15822.8           0.1      14.2X
-native                                              903            904           1       1107.9           0.9       1.0X
+f2j                                                 911            913           2       1097.3           0.9       1.0X
+java                                                 63             69           4      15900.2           0.1      14.5X
 
 
 ================================================================================================
 dgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 941            943           3       1062.6           0.9       1.0X
-java                                                 63             69           5      15771.6           0.1      14.8X
-native                                              915            916           1       1092.7           0.9       1.0X
+f2j                                                 950            953           5       1053.1           0.9       1.0X
+java                                                 63             69           4      15828.3           0.1      15.0X
 
 
 ================================================================================================
 sgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 650            651           1       1537.8           0.7       1.0X
-java                                                 40             41           1      24986.7           0.0      16.2X
-native                                              372            372           1       2691.6           0.4       1.8X
+f2j                                                 653            655           3       1530.4           0.7       1.0X
+java                                                 40             41           1      25035.2           0.0      16.4X
 
 
 ================================================================================================
 sgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 651            652           1       1536.7           0.7       1.0X
-java                                                 41             41           1      24643.9           0.0      16.0X
-native                                              372            373           1       2688.8           0.4       1.7X
+f2j                                                 655            657           3       1526.9           0.7       1.0X
+java                                                 40             41           1      24749.3           0.0      16.2X
 
 
 ================================================================================================
 sgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 901            902           0       1109.8           0.9       1.0X
-java                                                 40             41           1      25107.2           0.0      22.6X
-native                                              918            919           1       1089.3           0.9       1.0X
+f2j                                                 906            907           2       1104.0           0.9       1.0X
+java                                                 40             41           1      25083.9           0.0      22.7X
 
 
 ================================================================================================
 sgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 942            944           2       1061.1           0.9       1.0X
-java                                                 40             41           1      24888.3           0.0      23.5X
-native                                              914            915           0       1093.7           0.9       1.0X
+f2j                                                 946            949           4       1056.7           0.9       1.0X
+java                                                 40             41           1      24924.3           0.0      23.6X
 
 
diff --git a/mllib-local/benchmarks/BLASBenchmark-results.txt b/mllib-local/benchmarks/BLASBenchmark-results.txt
index db92355b7a3c0..cb9a670c990f4 100644
--- a/mllib-local/benchmarks/BLASBenchmark-results.txt
+++ b/mllib-local/benchmarks/BLASBenchmark-results.txt
@@ -2,337 +2,311 @@
 daxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 daxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 150            158           4        667.3           1.5       1.0X
-java                                                142            147           3        703.2           1.4       1.1X
-native                                              150            158           4        668.3           1.5       1.0X
+f2j                                                 136            141           6        733.6           1.4       1.0X
+java                                                128            131           1        782.3           1.3       1.1X
 
 
 ================================================================================================
 saxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 saxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  85             89           2       1173.5           0.9       1.0X
-java                                                 71             74           2       1409.0           0.7       1.2X
-native                                               86             89           2       1158.6           0.9       1.0X
+f2j                                                  78             80           1       1278.9           0.8       1.0X
+java                                                 65             67           2       1540.9           0.6       1.2X
 
 
 ================================================================================================
 dcopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dcopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 143            151           4        698.9           1.4       1.0X
-java                                                142            150           4        705.2           1.4       1.0X
-native                                              143            148           3        697.2           1.4       1.0X
+f2j                                                 131            136           3        766.0           1.3       1.0X
+java                                                136            141           3        734.3           1.4       1.0X
 
 
 ================================================================================================
 scopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 scopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  82             85           2       1215.8           0.8       1.0X
-java                                                 72             75           2       1398.0           0.7       1.1X
-native                                               80             83           2       1250.7           0.8       1.0X
+f2j                                                  77             81           1       1300.7           0.8       1.0X
+java                                                 69             72           1       1439.3           0.7       1.1X
 
 
 ================================================================================================
 ddot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 ddot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             95           0       1051.8           1.0       1.0X
-java                                                 44             46           2       2279.3           0.4       2.2X
-native                                               95             95           0       1057.0           0.9       1.0X
+f2j                                                  96             96           0       1043.9           1.0       1.0X
+java                                                 44             46           1       2251.5           0.4       2.2X
 
 
 ================================================================================================
 sdot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sdot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0       1074.0           0.9       1.0X
-java                                                 21             22           1       4768.4           0.2       4.4X
-native                                               93             93           1       1075.7           0.9       1.0X
+f2j                                                  94             94           2       1066.8           0.9       1.0X
+java                                                 22             23           0       4546.9           0.2       4.3X
 
 
 ================================================================================================
 dnrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dnrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 143            143           0        699.8           1.4       1.0X
-java                                                 32             33           1       3105.2           0.3       4.4X
-native                                               94             95           1       1061.0           0.9       1.5X
+f2j                                                 144            144           1        695.9           1.4       1.0X
+java                                                 32             33           0       3089.0           0.3       4.4X
 
 
 ================================================================================================
 snrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 snrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 120            120           1        834.9           1.2       1.0X
-java                                                 16             16           0       6220.1           0.2       7.5X
-native                                               93             93           2       1074.9           0.9       1.3X
+f2j                                                 121            121           1        828.5           1.2       1.0X
+java                                                 16             16           0       6186.1           0.2       7.5X
 
 
 ================================================================================================
 dscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 142            147           2        704.2           1.4       1.0X
-java                                                130            134           2        772.1           1.3       1.1X
-native                                              135            142           3        740.7           1.4       1.1X
+f2j                                                 125            130           6        799.4           1.3       1.0X
+java                                                120            123           2        832.6           1.2       1.0X
 
 
 ================================================================================================
 sscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  80             82           1       1253.8           0.8       1.0X
-java                                                 64             68           1       1554.0           0.6       1.2X
-native                                               80             83           2       1256.6           0.8       1.0X
+f2j                                                  73             75           1       1372.9           0.7       1.0X
+java                                                 54             59           4       1858.4           0.5       1.4X
 
 
 ================================================================================================
 dgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  96             96           0       1043.0           1.0       1.0X
-java                                                 22             23           1       4563.6           0.2       4.4X
-native                                               45             47           1       2229.3           0.4       2.1X
+f2j                                                  96             97           2       1036.6           1.0       1.0X
+java                                                 23             23           1       4407.0           0.2       4.3X
 
 
 ================================================================================================
 dgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  94             94           0       1065.7           0.9       1.0X
-java                                                 22             24           1       4467.6           0.2       4.2X
-native                                               93             93           0       1073.1           0.9       1.0X
+f2j                                                  95             95           0       1056.5           0.9       1.0X
+java                                                 23             23           0       4436.2           0.2       4.2X
 
 
 ================================================================================================
 sgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             96           0       1050.3           1.0       1.0X
-java                                                 11             12           0       8901.1           0.1       8.5X
-native                                               34             35           1       2956.0           0.3       2.8X
+f2j                                                  96             96           2       1042.7           1.0       1.0X
+java                                                 11             12           0       9009.9           0.1       8.6X
 
 
 ================================================================================================
 sgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0       1077.7           0.9       1.0X
-java                                                 11             12           0       8874.5           0.1       8.2X
-native                                               93             93           0       1079.6           0.9       1.0X
+f2j                                                  93             94           0       1070.0           0.9       1.0X
+java                                                 11             12           0       8956.4           0.1       8.4X
 
 
 ================================================================================================
 dger
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dger:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 150            154           2        666.9           1.5       1.0X
-java                                                125            130           2        801.5           1.2       1.2X
-native                                              143            149           3        698.6           1.4       1.0X
+f2j                                                 133            136           1        750.3           1.3       1.0X
+java                                                114            116           2        878.5           1.1       1.2X
 
 
 ================================================================================================
 dspmv[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dspmv[U]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  99            100           0        503.9           2.0       1.0X
-java                                                 11             12           0       4411.4           0.2       8.8X
-native                                               47             47           0       1067.1           0.9       2.1X
+f2j                                                 100            101           1        498.5           2.0       1.0X
+java                                                 11             12           1       4354.1           0.2       8.7X
 
 
 ================================================================================================
 dspr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dspr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  96             99           2        520.9           1.9       1.0X
-java                                                 97             98           1        517.4           1.9       1.0X
-native                                               73             77           2        681.8           1.5       1.3X
+f2j                                                  89             91           1        561.5           1.8       1.0X
+java                                                 89             91           1        559.8           1.8       1.0X
 
 
 ================================================================================================
 dsyr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dsyr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 144            149           2        347.0           2.9       1.0X
-java                                                144            148           2        346.2           2.9       1.0X
-native                                              121            126           2        413.6           2.4       1.2X
+f2j                                                 130            133           3        384.7           2.6       1.0X
+java                                                129            132           3        386.4           2.6       1.0X
 
 
 ================================================================================================
 dgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 665            666           2       1503.6           0.7       1.0X
-java                                                 65             71           3      15448.3           0.1      10.3X
-native                                              630            632           2       1586.6           0.6       1.1X
+f2j                                                 670            673           4       1493.6           0.7       1.0X
+java                                                 72             73           1      13968.9           0.1       9.4X
 
 
 ================================================================================================
 dgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 672            674           2       1487.3           0.7       1.0X
-java                                                 65             71           3      15366.3           0.1      10.3X
-native                                              376            377           1       2661.0           0.4       1.8X
+f2j                                                 675            678           4       1482.3           0.7       1.0X
+java                                                 72             73           1      13923.4           0.1       9.4X
 
 
 ================================================================================================
 dgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 921            921           1       1086.1           0.9       1.0X
-java                                                 64             70           3      15574.7           0.1      14.3X
-native                                              901            902           1       1109.5           0.9       1.0X
+f2j                                                 927            929           4       1078.8           0.9       1.0X
+java                                                 71             72           1      14079.1           0.1      13.1X
 
 
 ================================================================================================
 dgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 950            952           5       1052.4           1.0       1.0X
-java                                                 71             72           1      14034.5           0.1      13.3X
-native                                              914            914           1       1094.4           0.9       1.0X
+f2j                                                 960            964           6       1041.4           1.0       1.0X
+java                                                 71             73           1      13994.1           0.1      13.4X
 
 
 ================================================================================================
 sgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 648            649           2       1543.9           0.6       1.0X
-java                                                 41             42           1      24403.6           0.0      15.8X
-native                                              371            371           1       2699.0           0.4       1.7X
+f2j                                                 653            655           3       1531.0           0.7       1.0X
+java                                                 41             41           1      24509.8           0.0      16.0X
 
 
 ================================================================================================
 sgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 652            653           2       1533.0           0.7       1.0X
-java                                                 42             43           1      24056.0           0.0      15.7X
-native                                              371            372           5       2697.5           0.4       1.8X
+f2j                                                 658            663           4       1520.1           0.7       1.0X
+java                                                 41             43           1      24199.3           0.0      15.9X
 
 
 ================================================================================================
 sgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 900            901           2       1111.2           0.9       1.0X
-java                                                 40             41           1      24704.4           0.0      22.2X
-native                                              917            919           2       1090.4           0.9       1.0X
+f2j                                                 907            908           2       1103.0           0.9       1.0X
+java                                                 40             42           1      24844.4           0.0      22.5X
 
 
 ================================================================================================
 sgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 948            950           1       1054.4           0.9       1.0X
-java                                                 41             42           1      24366.6           0.0      23.1X
-native                                              913            915           1       1094.8           0.9       1.0X
+f2j                                                 955            958           5       1047.1           1.0       1.0X
+java                                                 41             42           1      24509.7           0.0      23.4X
 
 
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-jdk21-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-jdk21-results.txt
index 00de1f2d104f9..f3a4fbcb8fc2a 100644
--- a/mllib/benchmarks/UDTSerializationBenchmark-jdk21-results.txt
+++ b/mllib/benchmarks/UDTSerializationBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 VectorUDT de/serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-serialize                                            92            103           6          0.0       92038.9       1.0X
-deserialize                                          69             74           3          0.0       69046.7       1.3X
+serialize                                           102            107           2          0.0      101911.3       1.0X
+deserialize                                          75             78           1          0.0       74761.1       1.4X
 
 
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
index 05004e6f74f3c..e77536e502c3b 100644
--- a/mllib/benchmarks/UDTSerializationBenchmark-results.txt
+++ b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
@@ -2,11 +2,11 @@
 VectorUDT de/serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-serialize                                            92            109           9          0.0       91694.5       1.0X
-deserialize                                          69             71           1          0.0       69297.4       1.3X
+serialize                                            95             97           1          0.0       94889.7       1.0X
+deserialize                                          70             74           3          0.0       69517.2       1.4X
 
 
diff --git a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Estimator b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Estimator
new file mode 100644
index 0000000000000..e6902f62c4d60
--- /dev/null
+++ b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Estimator
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Spark Connect ML uses ServiceLoader to find out the supported Spark Ml estimators.
+# So register the supported estimator here if you're trying to add a new one.
+org.apache.spark.ml.classification.LogisticRegression
diff --git a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer
new file mode 100644
index 0000000000000..004ec8aeff8cf
--- /dev/null
+++ b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Spark Connect ML uses ServiceLoader to find out the supported Spark Ml non-model transformer.
+# So register the supported transformer here if you're trying to add a new one.
+org.apache.spark.ml.feature.VectorAssembler
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ClassificationSummary.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ClassificationSummary.scala
index 9f3428db484c2..88cfb703fca41 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/ClassificationSummary.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ClassificationSummary.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ml.classification
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.util.Summary
 import org.apache.spark.mllib.evaluation.{BinaryClassificationMetrics, MulticlassMetrics}
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions.{col, lit}
@@ -28,7 +29,7 @@ import org.apache.spark.sql.types.DoubleType
 /**
  * Abstraction for multiclass classification results for a given model.
  */
-private[classification] trait ClassificationSummary extends Serializable {
+private[classification] trait ClassificationSummary extends Summary with Serializable {
 
   /**
    * Dataframe output by the model's `transform` method.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 055c1c4d4228e..43016a32e570b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -19,11 +19,11 @@ package org.apache.spark.ml.param
 
 import java.lang.reflect.Modifier
 import java.util.{List => JList}
-import java.util.NoSuchElementException
 
 import scala.annotation.varargs
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
+import scala.reflect.ClassTag
 
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
@@ -45,9 +45,14 @@ import org.apache.spark.util.ArrayImplicits._
  *                See [[ParamValidators]] for factory methods for common validation functions.
  * @tparam T param value type
  */
-class Param[T](val parent: String, val name: String, val doc: String, val isValid: T => Boolean)
+class Param[T: ClassTag](
+    val parent: String, val name: String, val doc: String, val isValid: T => Boolean)
   extends Serializable {
 
+  // Spark Connect ML needs T type information which has been erased when compiling,
+  // Use classTag to preserve the T type.
+  val paramValueClassTag = implicitly[ClassTag[T]]
+
   def this(parent: Identifiable, name: String, doc: String, isValid: T => Boolean) =
     this(parent.uid, name, doc, isValid)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
index 4c3242c132090..e67b72e090601 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
@@ -29,8 +29,9 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes}
 import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate
 import org.apache.spark.sql.catalyst.trees.BinaryLike
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.functions.lit
-import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
+import org.apache.spark.sql.internal.ExpressionUtils.expression
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -249,13 +250,13 @@ private[ml] class SummaryBuilderImpl(
   ) extends SummaryBuilder {
 
   override def summary(featuresCol: Column, weightCol: Column): Column = {
-    SummaryBuilderImpl.MetricsAggregate(
+    Column(SummaryBuilderImpl.MetricsAggregate(
       requestedMetrics,
       requestedCompMetrics,
-      featuresCol,
-      weightCol,
+      expression(featuresCol),
+      expression(weightCol),
       mutableAggBufferOffset = 0,
-      inputAggBufferOffset = 0)
+      inputAggBufferOffset = 0))
   }
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Summary.scala b/mllib/src/main/scala/org/apache/spark/ml/util/Summary.scala
new file mode 100644
index 0000000000000..6205fea92ef83
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/Summary.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.util
+
+import org.apache.spark.annotation.Since
+
+/**
+ * Trait for the Summary
+ * All the summaries should extend from this Summary in order to
+ * support connect.
+ */
+@Since("4.0.0")
+private[spark] trait Summary
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index b5b2233ecb756..100fa13db5180 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -520,9 +520,15 @@ class Word2VecModel private[spark] (
     }
   }
 
+  // Auxiliary constructor must begin with call to 'this'.
+  // Helper constructor for `def this(model: Map[String, Array[Float]])`.
+  private def this(model: (Map[String, Int], Array[Float])) = {
+    this(model._1, model._2)
+  }
+
   @Since("1.5.0")
   def this(model: Map[String, Array[Float]]) = {
-    this(Word2VecModel.buildWordIndex(model), Word2VecModel.buildWordVectors(model))
+    this(Word2VecModel.buildFromVecMap(model))
   }
 
   @Since("1.4.0")
@@ -642,21 +648,22 @@ class Word2VecModel private[spark] (
 @Since("1.4.0")
 object Word2VecModel extends Loader[Word2VecModel] {
 
-  private def buildWordIndex(model: Map[String, Array[Float]]): Map[String, Int] = {
-    CUtils.toMapWithIndex(model.keys)
-  }
-
-  private def buildWordVectors(model: Map[String, Array[Float]]): Array[Float] = {
+  private def buildFromVecMap(
+      model: Map[String, Array[Float]]): (Map[String, Int], Array[Float]) = {
     require(model.nonEmpty, "Word2VecMap should be non-empty")
+
     val (vectorSize, numWords) = (model.head._2.length, model.size)
-    val wordList = model.keys.toArray
     val wordVectors = new Array[Float](vectorSize * numWords)
-    var i = 0
-    while (i < numWords) {
-      Array.copy(model(wordList(i)), 0, wordVectors, i * vectorSize, vectorSize)
-      i += 1
+
+    val wordIndex = collection.immutable.Map.newBuilder[String, Int]
+    wordIndex.sizeHint(numWords)
+
+    model.iterator.zipWithIndex.foreach {
+      case ((word, vector), i) =>
+        wordIndex += ((word, i))
+        Array.copy(vector, 0, wordVectors, i * vectorSize, vectorSize)
     }
-    wordVectors
+    (wordIndex.result(), wordVectors)
   }
 
   private object SaveLoadV1_0 {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index b45211c1689c7..2acc49e218f2d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -204,7 +204,7 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
       // TODO: Fix this issue for real.
       val memThreshold = 768
       if (sc.isLocal) {
-        val driverMemory = sc.getConf.getOption("spark.driver.memory")
+        val driverMemory = sc.getReadOnlyConf.getOption("spark.driver.memory")
           .orElse(Option(System.getenv("SPARK_DRIVER_MEMORY")))
           .map(Utils.memoryStringToMb)
           .getOrElse(Utils.DEFAULT_DRIVER_MEM_MB)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index 7251dfd07a1fa..af922dda13f6b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -402,7 +402,7 @@ private[tree] object TreeEnsembleModel extends Logging {
       // TODO: Fix this issue for real.
       val memThreshold = 768
       if (sc.isLocal) {
-        val driverMemory = sc.getConf.getOption("spark.driver.memory")
+        val driverMemory = sc.getReadOnlyConf.getOption("spark.driver.memory")
           .orElse(Option(System.getenv("SPARK_DRIVER_MEMORY")))
           .map(Utils.memoryStringToMb)
           .getOrElse(Utils.DEFAULT_DRIVER_MEM_MB)
diff --git a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
index 1ad5f7a442daa..b3993c453e91f 100644
--- a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
+++ b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java
@@ -21,6 +21,7 @@
 import java.util.List;
 
 import org.apache.spark.ml.util.Identifiable$;
+import scala.reflect.ClassTag;
 
 /**
  * A subclass of Params for testing.
@@ -110,7 +111,7 @@ private void init() {
       ParamValidators.inRange(0.0, 1.0));
     List<String> validStrings = Arrays.asList("a", "b");
     myStringParam_ = new Param<>(this, "myStringParam", "this is a string param",
-      ParamValidators.inArray(validStrings));
+      ParamValidators.inArray(validStrings), ClassTag.apply(String.class));
     myDoubleArrayParam_ =
       new DoubleArrayParam(this, "myDoubleArrayParam", "this is a double param");
 
diff --git a/pom.xml b/pom.xml
index 9fa0b3cc8a4b7..41a5ce0c5592c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -121,11 +121,11 @@
     <sbt.project.name>spark</sbt.project.name>
     <asm.version>9.7.1</asm.version>
     <slf4j.version>2.0.16</slf4j.version>
-    <log4j.version>2.24.1</log4j.version>
+    <log4j.version>2.24.3</log4j.version>
     <!-- make sure to update IsolatedClientLoader whenever this version is changed -->
     <hadoop.version>3.4.1</hadoop.version>
     <!-- SPARK-41247: When updating `protobuf.version`, also need to update `protoVersion` in `SparkBuild.scala` -->
-    <protobuf.version>4.28.3</protobuf.version>
+    <protobuf.version>4.29.3</protobuf.version>
     <protoc-jar-maven-plugin.version>3.11.4</protoc-jar-maven-plugin.version>
     <zookeeper.version>3.9.3</zookeeper.version>
     <curator.version>5.7.1</curator.version>
@@ -137,21 +137,21 @@
     <kafka.version>3.9.0</kafka.version>
     <!-- After 10.17.1.0, the minimum required version is JDK19 -->
     <derby.version>10.16.1.1</derby.version>
-    <parquet.version>1.14.4</parquet.version>
-    <orc.version>2.0.3</orc.version>
+    <parquet.version>1.15.0</parquet.version>
+    <orc.version>2.1.0</orc.version>
     <orc.classifier>shaded-protobuf</orc.classifier>
     <jetty.version>11.0.24</jetty.version>
     <jakartaservlet.version>5.0.0</jakartaservlet.version>
     <!-- SPARK-46938: Required by Hive / LibThrift libs -->
     <javaxservlet.version>4.0.1</javaxservlet.version>
     <chill.version>0.10.0</chill.version>
-    <ivy.version>2.5.2</ivy.version>
+    <ivy.version>2.5.3</ivy.version>
     <oro.version>2.0.8</oro.version>
     <!--
     If you change codahale.metrics.version, you also need to change
     the link to metrics.dropwizard.io in docs/monitoring.md.
     -->
-    <codahale.metrics.version>4.2.28</codahale.metrics.version>
+    <codahale.metrics.version>4.2.29</codahale.metrics.version>
     <!-- Should be consistent with SparkBuild.scala and docs -->
     <avro.version>1.12.0</avro.version>
     <aws.kinesis.client.version>1.12.0</aws.kinesis.client.version>
@@ -180,12 +180,12 @@
     <scalafmt.validateOnly>true</scalafmt.validateOnly>
     <scalafmt.changedOnly>true</scalafmt.changedOnly>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.18.1</fasterxml.jackson.version>
-    <fasterxml.jackson.databind.version>2.18.1</fasterxml.jackson.databind.version>
+    <fasterxml.jackson.version>2.18.2</fasterxml.jackson.version>
+    <fasterxml.jackson.databind.version>2.18.2</fasterxml.jackson.databind.version>
     <ws.xmlschema.version>2.3.1</ws.xmlschema.version>
     <snappy.version>1.1.10.7</snappy.version>
     <netlib.ludovic.dev.version>3.0.3</netlib.ludovic.dev.version>
-    <commons-codec.version>1.17.1</commons-codec.version>
+    <commons-codec.version>1.17.2</commons-codec.version>
     <commons-compress.version>1.27.1</commons-compress.version>
     <commons-io.version>2.18.0</commons-io.version>
     <!-- To support Hive UDF jars built by Hive 2.0.0 ~ 2.3.9 and 3.0.0 ~ 3.1.3. -->
@@ -195,7 +195,7 @@
     <!-- org.apache.commons/commons-pool2/-->
     <commons-pool2.version>2.12.0</commons-pool2.version>
     <datanucleus-core.version>4.1.17</datanucleus-core.version>
-    <guava.version>33.2.1-jre</guava.version>
+    <guava.version>33.3.1-jre</guava.version>
     <gson.version>2.11.0</gson.version>
     <janino.version>3.1.9</janino.version>
     <jersey.version>3.0.16</jersey.version>
@@ -212,31 +212,30 @@
     <commons-crypto.version>1.1.0</commons-crypto.version>
     <commons-cli.version>1.9.0</commons-cli.version>
     <bouncycastle.version>1.79</bouncycastle.version>
-    <tink.version>1.15.0</tink.version>
+    <tink.version>1.16.0</tink.version>
     <datasketches.version>6.1.1</datasketches.version>
-    <netty.version>4.1.114.Final</netty.version>
+    <netty.version>4.1.115.Final</netty.version>
     <netty-tcnative.version>2.0.69.Final</netty-tcnative.version>
     <icu4j.version>76.1</icu4j.version>
-    <junit-jupiter.version>5.11.3</junit-jupiter.version>
-    <junit-platform.version>1.11.3</junit-platform.version>
+    <junit-jupiter.version>5.11.4</junit-jupiter.version>
+    <junit-platform.version>1.11.4</junit-platform.version>
     <!--
       SPARK-50299: When updating `sbt-jupiter-interface.version`,
       also need to update the version in `SparkBuild.scala` and `plugins.sbt`.
     -->
-    <sbt-jupiter-interface.version>0.13.1</sbt-jupiter-interface.version>
+    <sbt-jupiter-interface.version>0.13.3</sbt-jupiter-interface.version>
     <!--
     If you are changing Arrow version specification, please check
     ./python/pyspark/sql/pandas/utils.py, ./python/packaging/classic/setup.py
     and ./python/packaging/connect/setup.py too.
     -->
-    <arrow.version>18.0.0</arrow.version>
+    <arrow.version>18.1.0</arrow.version>
     <ammonite.version>3.0.0</ammonite.version>
     <jjwt.version>0.12.6</jjwt.version>
 
     <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
     <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
-    <kubernetes-client.version>6.13.4</kubernetes-client.version>
-    <okio.version>1.17.6</okio.version>
+    <kubernetes-client.version>7.0.1</kubernetes-client.version>
 
     <test.java.home>${java.home}</test.java.home>
 
@@ -295,7 +294,7 @@
     <spark.test.docker.removePulledImage>true</spark.test.docker.removePulledImage>
 
     <!-- Version used in Connect -->
-    <connect.guava.version>33.2.1-jre</connect.guava.version>
+    <connect.guava.version>33.3.1-jre</connect.guava.version>
     <guava.failureaccess.version>1.0.2</guava.failureaccess.version>
     <io.grpc.version>1.67.1</io.grpc.version>
     <mima.version>1.1.4</mima.version>
@@ -334,7 +333,7 @@
     <postgresql.version>42.7.4</postgresql.version>
     <db2.jcc.version>11.5.9.0</db2.jcc.version>
     <mssql.jdbc.version>12.8.1.jre11</mssql.jdbc.version>
-    <ojdbc11.version>23.6.0.24.10</ojdbc11.version>
+    <ojdbc17.version>23.6.0.24.10</ojdbc17.version>
     <!-- Used for SBT build to retrieve the Spark version -->
     <spark.version>${project.version}</spark.version>
   </properties>
@@ -631,7 +630,7 @@
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-text</artifactId>
-        <version>1.12.0</version>
+        <version>1.13.0</version>
       </dependency>
       <dependency>
         <groupId>commons-lang</groupId>
@@ -839,7 +838,7 @@
       <dependency>
         <groupId>com.github.luben</groupId>
         <artifactId>zstd-jni</artifactId>
-        <version>1.5.6-7</version>
+        <version>1.5.6-9</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>
@@ -1348,7 +1347,7 @@
       <dependency>
         <groupId>com.oracle.database.jdbc</groupId>
         <artifactId>ojdbc17</artifactId>
-        <version>${ojdbc11.version}</version>
+        <version>${ojdbc17.version}</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -2594,11 +2593,6 @@
         <artifactId>javax.servlet-api</artifactId>
         <version>${javaxservlet.version}</version>
       </dependency>
-      <dependency>
-        <groupId>com.squareup.okio</groupId>
-        <artifactId>okio</artifactId>
-        <version>${okio.version}</version>
-      </dependency>
     </dependencies>
   </dependencyManagement>
 
@@ -3225,7 +3219,7 @@
             -->
             <groupId>com.puppycrawl.tools</groupId>
             <artifactId>checkstyle</artifactId>
-            <version>10.20.0</version>
+            <version>10.20.2</version>
           </dependency>
         </dependencies>
         <executions>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 1c3e2f16cb0f8..a3a56a6f02dad 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -201,6 +201,11 @@ object MimaExcludes {
 
     // SPARK-50112: Moving avro files from connector to sql/core
     ProblemFilters.exclude[Problem]("org.apache.spark.sql.avro.*"),
+
+    // SPARK-50768: Introduce TaskContext.createResourceUninterruptibly to avoid stream leak by task interruption
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.interruptible"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.pendingInterrupt"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.createResourceUninterruptibly"),
   ) ++ loggingExcludes("org.apache.spark.sql.DataFrameReader") ++
     loggingExcludes("org.apache.spark.sql.streaming.DataStreamReader") ++
     loggingExcludes("org.apache.spark.sql.SparkSession#Builder")
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 48b243618eea3..d84c0f17d2b2b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -65,10 +65,10 @@ object BuildCommons {
   ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ connectProjects
 
   val optionallyEnabledProjects@Seq(kubernetes, yarn,
-    sparkGangliaLgpl, streamingKinesisAsl,
+    sparkGangliaLgpl, streamingKinesisAsl, profiler,
     dockerIntegrationTests, hadoopCloud, kubernetesIntegrationTests) =
     Seq("kubernetes", "yarn",
-      "ganglia-lgpl", "streaming-kinesis-asl",
+      "ganglia-lgpl", "streaming-kinesis-asl", "profiler",
       "docker-integration-tests", "hadoop-cloud", "kubernetes-integration-tests").map(ProjectRef(buildLocation, _))
 
   val assemblyProjects@Seq(networkYarn, streamingKafka010Assembly, streamingKinesisAslAssembly) =
@@ -89,7 +89,7 @@ object BuildCommons {
 
   // Google Protobuf version used for generating the protobuf.
   // SPARK-41247: needs to be consistent with `protobuf.version` in `pom.xml`.
-  val protoVersion = "4.28.3"
+  val protoVersion = "4.29.3"
   // GRPC version used for Spark Connect.
   val grpcVersion = "1.67.1"
 }
@@ -371,7 +371,7 @@ object SparkBuild extends PomBuild {
     Seq(
       spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn,
       unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient,
-      variant, connectShims
+      variant, connectShims, profiler
     ).contains(x)
   }
 
@@ -1057,7 +1057,7 @@ object KubernetesIntegrationTests {
  * Overrides to work around sbt's dependency resolution being different from Maven's.
  */
 object DependencyOverrides {
-  lazy val guavaVersion = sys.props.get("guava.version").getOrElse("33.1.0-jre")
+  lazy val guavaVersion = sys.props.get("guava.version").getOrElse("33.3.1-jre")
   lazy val settings = Seq(
     dependencyOverrides += "com.google.guava" % "guava" % guavaVersion,
     dependencyOverrides += "jline" % "jline" % "2.14.6",
@@ -1469,11 +1469,11 @@ object SparkUnidoc extends SharedUnidocSettings {
     (ScalaUnidoc / unidoc / unidocProjectFilter) :=
       inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes,
         yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, connectClient,
-        connectShims, protobuf),
+        connectShims, protobuf, profiler),
     (JavaUnidoc / unidoc / unidocProjectFilter) :=
       inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes,
         yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, connectClient,
-        connectShims, protobuf),
+        connectShims, protobuf, profiler),
   )
 }
 
@@ -1724,7 +1724,7 @@ object TestSettings {
     (Test / testOptions) += Tests.Argument(TestFrameworks.ScalaTest, "-W", "120", "300"),
     (Test / testOptions) += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
     // Enable Junit testing.
-    libraryDependencies += "com.github.sbt.junit" % "jupiter-interface" % "0.13.1" % "test",
+    libraryDependencies += "com.github.sbt.junit" % "jupiter-interface" % "0.13.3" % "test",
     // `parallelExecutionInTest` controls whether test suites belonging to the same SBT project
     // can run in parallel with one another. It does NOT control whether tests execute in parallel
     // within the same JVM (which is controlled by `testForkedParallel`) or whether test cases
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 8ae03c0995132..4e1c282dcf315 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -19,10 +19,10 @@ addSbtPlugin("software.purpledragon" % "sbt-checkstyle-plugin" % "4.0.1")
 
 // If you are changing the dependency setting for checkstyle plugin,
 // please check pom.xml in the root of the source tree too.
-libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "10.20.0"
+libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "10.20.2"
 
-// checkstyle uses guava 33.1.0-jre.
-libraryDependencies += "com.google.guava" % "guava" % "33.1.0-jre"
+// checkstyle uses guava 33.3.1-jre.
+libraryDependencies += "com.google.guava" % "guava" % "33.3.1-jre"
 
 addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.3.0")
 
@@ -44,6 +44,6 @@ addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.3")
 
 addSbtPlugin("com.github.sbt" % "sbt-pom-reader" % "2.4.0")
 
-addSbtPlugin("com.github.sbt.junit" % "sbt-jupiter-interface" % "0.13.1")
+addSbtPlugin("com.github.sbt.junit" % "sbt-jupiter-interface" % "0.13.3")
 
 addSbtPlugin("com.thesamet" % "sbt-protoc" % "1.0.7")
diff --git a/python/docs/Makefile b/python/docs/Makefile
index 428b0d24b568e..045b03a1afd1b 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -21,7 +21,7 @@ SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     ?= source
 BUILDDIR      ?= build
 
-export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.7-src.zip)
+export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.9-src.zip)
 
 # Put it first so that "make" without argument is like "make help".
 help:
diff --git a/python/docs/make2.bat b/python/docs/make2.bat
index 41e33cd07d418..ff0c8f991b958 100644
--- a/python/docs/make2.bat
+++ b/python/docs/make2.bat
@@ -25,7 +25,7 @@ if "%SPHINXBUILD%" == "" (
 set SOURCEDIR=source
 set BUILDDIR=build
 
-set PYTHONPATH=..;..\lib\py4j-0.10.9.7-src.zip
+set PYTHONPATH=..;..\lib\py4j-0.10.9.9-src.zip
 
 if "%1" == "" goto help
 
diff --git a/python/docs/source/_static/css/pyspark.css b/python/docs/source/_static/css/pyspark.css
index 565eaea299359..6f47dd80e9503 100644
--- a/python/docs/source/_static/css/pyspark.css
+++ b/python/docs/source/_static/css/pyspark.css
@@ -91,16 +91,3 @@ u.bd-sidebar .nav>li>ul>.active:hover>a,.bd-sidebar .nav>li>ul>.active>a {
 .spec_table tr, td, th {
     border-top: none!important;
 }
-
-/* Styling to the version dropdown */
-#version-button {
-  padding-left: 0.2rem;
-  padding-right: 3.2rem;
-}
-
-#version_switcher {
-  height: auto;
-  max-height: 300px;
-  width: 165px;
-  overflow-y: auto;
-}
diff --git a/python/docs/source/_templates/version-switcher.html b/python/docs/source/_templates/version-switcher.html
deleted file mode 100644
index 16c443229f4be..0000000000000
--- a/python/docs/source/_templates/version-switcher.html
+++ /dev/null
@@ -1,77 +0,0 @@
-<!--
-Licensed to the Apache Software Foundation (ASF) under one or more
-contributor license agreements.  See the NOTICE file distributed with
-this work for additional information regarding copyright ownership.
-The ASF licenses this file to You under the Apache License, Version 2.0
-(the "License"); you may not use this file except in compliance with
-the License.  You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-<div id="version-button" class="dropdown">
-    <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
-        {{ release }}
-        <span class="caret"></span>
-    </button>
-    <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
-    <!-- dropdown will be populated by javascript on page load -->
-    </div>
-</div>
-
-<script type="text/javascript">
-// Function to construct the target URL from the JSON components
-function buildURL(entry) {
-    var template = "{{ switcher_template_url }}";  // supplied by jinja
-    template = template.replace("{version}", entry.version);
-    return template;
-}
-
-// Function to check if corresponding page path exists in other version of docs
-// and, if so, go there instead of the homepage of the other docs version
-function checkPageExistsAndRedirect(event) {
-    const currentFilePath = "{{ pagename }}.html",
-          otherDocsHomepage = event.target.getAttribute("href");
-    let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
-    $.ajax({
-        type: 'HEAD',
-        url: tryUrl,
-        // if the page exists, go there
-        success: function() {
-            location.href = tryUrl;
-        }
-    }).fail(function() {
-        location.href = otherDocsHomepage;
-    });
-    return false;
-}
-
-// Function to populate the version switcher
-(function () {
-    // get JSON config
-    $.getJSON("{{ switcher_json_url }}", function(data, textStatus, jqXHR) {
-        // create the nodes first (before AJAX calls) to ensure the order is
-        // correct (for now, links will go to doc version homepage)
-        $.each(data, function(index, entry) {
-            // if no custom name specified (e.g., "latest"), use version string
-            if (!("name" in entry)) {
-                entry.name = entry.version;
-            }
-            // construct the appropriate URL, and add it to the dropdown
-            entry.url = buildURL(entry);
-            const node = document.createElement("a");
-            node.setAttribute("class", "list-group-item list-group-item-action py-1");
-            node.setAttribute("href", `${entry.url}`);
-            node.textContent = `${entry.name}`;
-            node.onclick = checkPageExistsAndRedirect;
-            $("#version_switcher").append(node);
-        });
-    });
-})();
-</script>
diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py
index 5640ba151176d..20c13cd768deb 100644
--- a/python/docs/source/conf.py
+++ b/python/docs/source/conf.py
@@ -188,19 +188,19 @@
 # a list of builtin themes.
 html_theme = 'pydata_sphinx_theme'
 
-html_context = {
-    # When releasing a new Spark version, please update the file
-    # "site/static/versions.json" under the code repository "spark-website"
-    # (item should be added in order), and also set the local environment
-    # variable "RELEASE_VERSION".
-    "switcher_json_url": "https://spark.apache.org/static/versions.json",
-    "switcher_template_url": "https://spark.apache.org/docs/{version}/api/python/index.html",
-}
-
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
 html_theme_options = {
+    "check_switcher": False,
+    "switcher": {
+        # When releasing a new Spark version, please update the file
+        # "site/static/versions.json" under the code repository "spark-website"
+        # (item should be added in order), and also set the local environment
+        # variable "RELEASE_VERSION".
+        "json_url": "https://spark.apache.org/static/versions.json",
+        "version_match": release,
+    },
     "header_links_before_dropdown": 6,
     "navbar_end": ["version-switcher", "theme-switcher", "navbar-icon-links"],
     "footer_start": ["spark_footer", "sphinx-version"],
diff --git a/python/docs/source/development/debugging.rst b/python/docs/source/development/debugging.rst
index 9510fe0abde1e..0aa2426cf862d 100644
--- a/python/docs/source/development/debugging.rst
+++ b/python/docs/source/development/debugging.rst
@@ -669,7 +669,7 @@ Stack Traces
 
 There are Spark configurations to control stack traces:
 
-- ``spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled`` is true by default to simplify traceback from Python UDFs.
+- ``spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled`` is true by default to simplify traceback from Python UDFs and Data Sources.
 
 - ``spark.sql.pyspark.jvmStacktrace.enabled`` is false by default to hide JVM stacktrace and to show a Python-friendly exception only.
 
diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index d0dc285b5257c..c60839025eef6 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -177,7 +177,7 @@ PySpark requires the following dependencies.
 ========================== ========================= =============================
 Package                    Supported version         Note
 ========================== ========================= =============================
-`py4j`                     >=0.10.9.7                Required to interact with JVM
+`py4j`                     >=0.10.9.9                Required to interact with JVM
 ========================== ========================= =============================
 
 Additional libraries that enhance functionality but are not included in the installation packages:
@@ -207,7 +207,7 @@ Installable with ``pip install "pyspark[connect]"``.
 Package                    Supported version Note
 ========================== ================= ==========================
 `pandas`                   >=2.0.0           Required for Spark Connect
-`pyarrow`                  >=10.0.0          Required for Spark Connect
+`pyarrow`                  >=11.0.0          Required for Spark Connect
 `grpcio`                   >=1.67.0          Required for Spark Connect
 `grpcio-status`            >=1.67.0          Required for Spark Connect
 `googleapis-common-protos` >=1.65.0          Required for Spark Connect
@@ -223,7 +223,7 @@ Installable with ``pip install "pyspark[sql]"``.
 Package   Supported version Note
 ========= ================= ======================
 `pandas`  >=2.0.0           Required for Spark SQL
-`pyarrow` >=10.0.0          Required for Spark SQL
+`pyarrow` >=11.0.0          Required for Spark SQL
 ========= ================= ======================
 
 Additional libraries that enhance functionality but are not included in the installation packages:
@@ -239,8 +239,8 @@ Installable with ``pip install "pyspark[pandas_on_spark]"``.
 ========= ================= ================================
 Package   Supported version Note
 ========= ================= ================================
-`pandas`  >=2.0.0           Required for Pandas API on Spark
-`pyarrow` >=10.0.0          Required for Pandas API on Spark
+`pandas`  >=2.2.0           Required for Pandas API on Spark
+`pyarrow` >=11.0.0          Required for Pandas API on Spark
 ========= ================= ================================
 
 Additional libraries that enhance functionality but are not included in the installation packages:
diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst b/python/docs/source/migration_guide/pyspark_upgrade.rst
index 5292530420025..55d067eb5fa2d 100644
--- a/python/docs/source/migration_guide/pyspark_upgrade.rst
+++ b/python/docs/source/migration_guide/pyspark_upgrade.rst
@@ -25,7 +25,7 @@ Upgrading from PySpark 3.5 to 4.0
 * In Spark 4.0, Python 3.8 support was dropped in PySpark.
 * In Spark 4.0, the minimum supported version for Pandas has been raised from 1.0.5 to 2.0.0 in PySpark.
 * In Spark 4.0, the minimum supported version for Numpy has been raised from 1.15 to 1.21 in PySpark.
-* In Spark 4.0, the minimum supported version for PyArrow has been raised from 4.0.0 to 10.0.0 in PySpark.
+* In Spark 4.0, the minimum supported version for PyArrow has been raised from 4.0.0 to 11.0.0 in PySpark.
 * In Spark 4.0, ``Int64Index`` and ``Float64Index`` have been removed from pandas API on Spark, ``Index`` should be used directly.
 * In Spark 4.0, ``DataFrame.iteritems`` has been removed from pandas API on Spark, use ``DataFrame.items`` instead.
 * In Spark 4.0, ``Series.iteritems`` has been removed from pandas API on Spark, use ``Series.items`` instead.
diff --git a/python/docs/source/reference/pyspark.sql/dataframe.rst b/python/docs/source/reference/pyspark.sql/dataframe.rst
index 569c5cec69557..5aaea4c32577f 100644
--- a/python/docs/source/reference/pyspark.sql/dataframe.rst
+++ b/python/docs/source/reference/pyspark.sql/dataframe.rst
@@ -30,6 +30,7 @@ DataFrame
     DataFrame.agg
     DataFrame.alias
     DataFrame.approxQuantile
+    DataFrame.asTable
     DataFrame.cache
     DataFrame.checkpoint
     DataFrame.coalesce
@@ -56,6 +57,7 @@ DataFrame
     DataFrame.dtypes
     DataFrame.exceptAll
     DataFrame.executionInfo
+    DataFrame.exists
     DataFrame.explain
     DataFrame.fillna
     DataFrame.filter
@@ -75,9 +77,11 @@ DataFrame
     DataFrame.isStreaming
     DataFrame.join
     DataFrame.limit
+    DataFrame.lateralJoin
     DataFrame.localCheckpoint
     DataFrame.mapInPandas
     DataFrame.mapInArrow
+    DataFrame.metadataColumn
     DataFrame.melt
     DataFrame.na
     DataFrame.observe
@@ -96,6 +100,7 @@ DataFrame
     DataFrame.sameSemantics
     DataFrame.sample
     DataFrame.sampleBy
+    DataFrame.scalar
     DataFrame.schema
     DataFrame.select
     DataFrame.selectExpr
@@ -117,6 +122,7 @@ DataFrame
     DataFrame.toLocalIterator
     DataFrame.toPandas
     DataFrame.transform
+    DataFrame.transpose
     DataFrame.union
     DataFrame.unionAll
     DataFrame.unionByName
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
index 430e353dd701c..a1ba153110f10 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -451,6 +451,8 @@ Aggregate Functions
     kurtosis
     last
     last_value
+    listagg
+    listagg_distinct
     max
     max_by
     mean
@@ -476,6 +478,8 @@ Aggregate Functions
     stddev
     stddev_pop
     stddev_samp
+    string_agg
+    string_agg_distinct
     sum
     sum_distinct
     try_avg
diff --git a/python/docs/source/reference/pyspark.sql/spark_session.rst b/python/docs/source/reference/pyspark.sql/spark_session.rst
index 859332fa5e428..0d6a1bc79b902 100644
--- a/python/docs/source/reference/pyspark.sql/spark_session.rst
+++ b/python/docs/source/reference/pyspark.sql/spark_session.rst
@@ -44,13 +44,20 @@ See also :class:`SparkSession`.
 .. autosummary::
     :toctree: api/
 
+    SparkSession.addTag
     SparkSession.catalog
+    SparkSession.clearTags
     SparkSession.conf
     SparkSession.createDataFrame
     SparkSession.dataSource
     SparkSession.getActiveSession
+    SparkSession.getTags
+    SparkSession.interruptAll
+    SparkSession.interruptOperation
+    SparkSession.interruptTag
     SparkSession.newSession
     SparkSession.profile
+    SparkSession.removeTag
     SparkSession.range
     SparkSession.read
     SparkSession.readStream
@@ -79,15 +86,8 @@ Spark Connect Only
 
     SparkSession.addArtifact
     SparkSession.addArtifacts
-    SparkSession.addTag
     SparkSession.clearProgressHandlers
-    SparkSession.clearTags
     SparkSession.client
     SparkSession.copyFromLocalToFs
-    SparkSession.getTags
-    SparkSession.interruptAll
-    SparkSession.interruptOperation
-    SparkSession.interruptTag
     SparkSession.registerProgressHandler
     SparkSession.removeProgressHandler
-    SparkSession.removeTag
diff --git a/python/docs/source/reference/pyspark.sql/variant_val.rst b/python/docs/source/reference/pyspark.sql/variant_val.rst
index 8630ae8aace14..883b4c8fdc3d5 100644
--- a/python/docs/source/reference/pyspark.sql/variant_val.rst
+++ b/python/docs/source/reference/pyspark.sql/variant_val.rst
@@ -26,3 +26,4 @@ VariantVal
 
     VariantVal.toPython
     VariantVal.toJson
+    VariantVal.parseJson
diff --git a/python/docs/source/reference/pyspark.ss/index.rst b/python/docs/source/reference/pyspark.ss/index.rst
index 2cb0b1216eff9..440228134fac9 100644
--- a/python/docs/source/reference/pyspark.ss/index.rst
+++ b/python/docs/source/reference/pyspark.ss/index.rst
@@ -20,7 +20,7 @@
 Structured Streaming
 ====================
 
-This page gives an overview of all public Structed Streaming API.
+This page gives an overview of all public Structured Streaming API.
 
 .. toctree::
     :maxdepth: 2
diff --git a/python/docs/source/user_guide/sql/python_data_source.rst b/python/docs/source/user_guide/sql/python_data_source.rst
index 832987d19e5a4..22b2a0b5f3c7b 100644
--- a/python/docs/source/user_guide/sql/python_data_source.rst
+++ b/python/docs/source/user_guide/sql/python_data_source.rst
@@ -516,3 +516,8 @@ The following example demonstrates how to implement a basic Data Source using Ar
     df = spark.read.format("arrowbatch").load()
 
     df.show()
+
+Usage Notes
+-----------
+
+- During Data Source resolution, built-in and Scala/Java Data Sources take precedence over Python Data Sources with the same name; to explicitly use a Python Data Source, make sure its name does not conflict with the other Data Sources.
diff --git a/python/lib/py4j-0.10.9.7-src.zip b/python/lib/py4j-0.10.9.7-src.zip
deleted file mode 100644
index 6abba4efa0f42..0000000000000
Binary files a/python/lib/py4j-0.10.9.7-src.zip and /dev/null differ
diff --git a/python/lib/py4j-0.10.9.9-src.zip b/python/lib/py4j-0.10.9.9-src.zip
new file mode 100644
index 0000000000000..035bbd38ba852
Binary files /dev/null and b/python/lib/py4j-0.10.9.9-src.zip differ
diff --git a/python/packaging/classic/setup.py b/python/packaging/classic/setup.py
index d799af1216345..ae20fc1efdef6 100755
--- a/python/packaging/classic/setup.py
+++ b/python/packaging/classic/setup.py
@@ -152,7 +152,7 @@ def _supports_symlinks():
 # python/packaging/connect/setup.py
 _minimum_pandas_version = "2.0.0"
 _minimum_numpy_version = "1.21"
-_minimum_pyarrow_version = "10.0.0"
+_minimum_pyarrow_version = "11.0.0"
 _minimum_grpc_version = "1.67.0"
 _minimum_googleapis_common_protos_version = "1.65.0"
 
@@ -343,7 +343,7 @@ def run(self):
         license="http://www.apache.org/licenses/LICENSE-2.0",
         # Don't forget to update python/docs/source/getting_started/install.rst
         # if you're updating the versions or dependencies.
-        install_requires=["py4j==0.10.9.7"],
+        install_requires=["py4j==0.10.9.9"],
         extras_require={
             "ml": ["numpy>=%s" % _minimum_numpy_version],
             "mllib": ["numpy>=%s" % _minimum_numpy_version],
diff --git a/python/packaging/connect/setup.py b/python/packaging/connect/setup.py
index de76d51d0cfdc..51d0a4c9e3601 100755
--- a/python/packaging/connect/setup.py
+++ b/python/packaging/connect/setup.py
@@ -72,9 +72,12 @@
         "pyspark.testing",
         "pyspark.resource.tests",
         "pyspark.sql.tests",
+        "pyspark.sql.tests.arrow",
         "pyspark.sql.tests.connect",
+        "pyspark.sql.tests.connect.arrow",
         "pyspark.sql.tests.connect.streaming",
         "pyspark.sql.tests.connect.client",
+        "pyspark.sql.tests.connect.pandas",
         "pyspark.sql.tests.connect.shell",
         "pyspark.sql.tests.pandas",
         "pyspark.sql.tests.plot",
@@ -129,7 +132,7 @@
     # python/packaging/classic/setup.py
     _minimum_pandas_version = "2.0.0"
     _minimum_numpy_version = "1.21"
-    _minimum_pyarrow_version = "10.0.0"
+    _minimum_pyarrow_version = "11.0.0"
     _minimum_grpc_version = "1.59.3"
     _minimum_googleapis_common_protos_version = "1.56.4"
 
diff --git a/python/pyspark/cloudpickle/__init__.py b/python/pyspark/cloudpickle/__init__.py
index a3348e8b3da28..bdb1738611b3b 100644
--- a/python/pyspark/cloudpickle/__init__.py
+++ b/python/pyspark/cloudpickle/__init__.py
@@ -3,7 +3,7 @@
 
 __doc__ = cloudpickle.__doc__
 
-__version__ = "3.0.0"
+__version__ = "3.1.1"
 
 __all__ = [  # noqa
     "__version__",
diff --git a/python/pyspark/cloudpickle/cloudpickle.py b/python/pyspark/cloudpickle/cloudpickle.py
index eb43a9676bbb1..4d532e5de9f2c 100644
--- a/python/pyspark/cloudpickle/cloudpickle.py
+++ b/python/pyspark/cloudpickle/cloudpickle.py
@@ -63,7 +63,7 @@
 import logging
 import opcode
 import pickle
-from pickle import _getattribute
+from pickle import _getattribute as _pickle_getattribute
 import platform
 import struct
 import sys
@@ -126,7 +126,7 @@ def _lookup_class_or_track(class_tracker_id, class_def):
 
 
 def register_pickle_by_value(module):
-    """Register a module to make it functions and classes picklable by value.
+    """Register a module to make its functions and classes picklable by value.
 
     By default, functions and classes that are attributes of an importable
     module are to be pickled by reference, that is relying on re-importing
@@ -192,6 +192,14 @@ def _is_registered_pickle_by_value(module):
     return False
 
 
+if sys.version_info >= (3, 14):
+    def _getattribute(obj, name):
+        return _pickle_getattribute(obj, name.split('.'))
+else:
+    def _getattribute(obj, name):
+        return _pickle_getattribute(obj, name)[0]
+
+
 def _whichmodule(obj, name):
     """Find the module an object belongs to.
 
@@ -213,12 +221,13 @@ def _whichmodule(obj, name):
         # sys.modules
         if (
             module_name == "__main__"
+            or module_name == "__mp_main__"
             or module is None
             or not isinstance(module, types.ModuleType)
         ):
             continue
         try:
-            if _getattribute(module, name)[0] is obj:
+            if _getattribute(module, name) is obj:
                 return module_name
         except Exception:
             pass
@@ -292,7 +301,7 @@ def _lookup_module_and_qualname(obj, name=None):
         return None
 
     try:
-        obj2, parent = _getattribute(module, name)
+        obj2 = _getattribute(module, name)
     except AttributeError:
         # obj was not found inside the module it points to
         return None
@@ -409,7 +418,10 @@ def _walk_global_ops(code):
 
 def _extract_class_dict(cls):
     """Retrieve a copy of the dict of a class without the inherited method."""
-    clsdict = dict(cls.__dict__)  # copy dict proxy to a dict
+    # Hack to circumvent non-predictable memoization caused by string interning.
+    # See the inline comment in _class_setstate for details.
+    clsdict = {"".join(k): cls.__dict__[k] for k in sorted(cls.__dict__)}
+
     if len(cls.__bases__) == 1:
         inherited_dict = cls.__bases__[0].__dict__
     else:
@@ -533,9 +545,15 @@ class id will also reuse this class definition.
     The "extra" variable is meant to be a dict (or None) that can be used for
     forward compatibility shall the need arise.
     """
+    # We need to intern the keys of the type_kwargs dict to avoid having
+    # different pickles for the same dynamic class depending on whether it was
+    # dynamically created or reconstructed from a pickled stream.
+    type_kwargs = {sys.intern(k): v for k, v in type_kwargs.items()}
+
     skeleton_class = types.new_class(
         name, bases, {"metaclass": type_constructor}, lambda ns: ns.update(type_kwargs)
     )
+
     return _lookup_class_or_track(class_tracker_id, skeleton_class)
 
 
@@ -694,8 +712,10 @@ def _function_getstate(func):
     #   unpickling time by iterating over slotstate and calling setattr(func,
     #   slotname, slotvalue)
     slotstate = {
-        "__name__": func.__name__,
-        "__qualname__": func.__qualname__,
+        # Hack to circumvent non-predictable memoization caused by string interning.
+        # See the inline comment in _class_setstate for details.
+        "__name__": "".join(func.__name__),
+        "__qualname__": "".join(func.__qualname__),
         "__annotations__": func.__annotations__,
         "__kwdefaults__": func.__kwdefaults__,
         "__defaults__": func.__defaults__,
@@ -721,7 +741,9 @@ def _function_getstate(func):
     )
     slotstate["__globals__"] = f_globals
 
-    state = func.__dict__
+    # Hack to circumvent non-predictable memoization caused by string interning.
+    # See the inline comment in _class_setstate for details.
+    state = {"".join(k): v for k, v in func.__dict__.items()}
     return state, slotstate
 
 
@@ -802,6 +824,19 @@ def _code_reduce(obj):
     # of the specific type from types, for example:
     # >>> from types import CodeType
     # >>> help(CodeType)
+
+    # Hack to circumvent non-predictable memoization caused by string interning.
+    # See the inline comment in _class_setstate for details.
+    co_name = "".join(obj.co_name)
+
+    # Create shallow copies of these tuple to make cloudpickle payload deterministic.
+    # When creating a code object during load, copies of these four tuples are
+    # created, while in the main process, these tuples can be shared.
+    # By always creating copies, we make sure the resulting payload is deterministic.
+    co_names = tuple(name for name in obj.co_names)
+    co_varnames = tuple(name for name in obj.co_varnames)
+    co_freevars = tuple(name for name in obj.co_freevars)
+    co_cellvars = tuple(name for name in obj.co_cellvars)
     if hasattr(obj, "co_exceptiontable"):
         # Python 3.11 and later: there are some new attributes
         # related to the enhanced exceptions.
@@ -814,16 +849,16 @@ def _code_reduce(obj):
             obj.co_flags,
             obj.co_code,
             obj.co_consts,
-            obj.co_names,
-            obj.co_varnames,
+            co_names,
+            co_varnames,
             obj.co_filename,
-            obj.co_name,
+            co_name,
             obj.co_qualname,
             obj.co_firstlineno,
             obj.co_linetable,
             obj.co_exceptiontable,
-            obj.co_freevars,
-            obj.co_cellvars,
+            co_freevars,
+            co_cellvars,
         )
     elif hasattr(obj, "co_linetable"):
         # Python 3.10 and later: obj.co_lnotab is deprecated and constructor
@@ -837,14 +872,14 @@ def _code_reduce(obj):
             obj.co_flags,
             obj.co_code,
             obj.co_consts,
-            obj.co_names,
-            obj.co_varnames,
+            co_names,
+            co_varnames,
             obj.co_filename,
-            obj.co_name,
+            co_name,
             obj.co_firstlineno,
             obj.co_linetable,
-            obj.co_freevars,
-            obj.co_cellvars,
+            co_freevars,
+            co_cellvars,
         )
     elif hasattr(obj, "co_nmeta"):  # pragma: no cover
         # "nogil" Python: modified attributes from 3.9
@@ -859,15 +894,15 @@ def _code_reduce(obj):
             obj.co_flags,
             obj.co_code,
             obj.co_consts,
-            obj.co_varnames,
+            co_varnames,
             obj.co_filename,
-            obj.co_name,
+            co_name,
             obj.co_firstlineno,
             obj.co_lnotab,
             obj.co_exc_handlers,
             obj.co_jump_table,
-            obj.co_freevars,
-            obj.co_cellvars,
+            co_freevars,
+            co_cellvars,
             obj.co_free2reg,
             obj.co_cell2reg,
         )
@@ -882,14 +917,14 @@ def _code_reduce(obj):
             obj.co_flags,
             obj.co_code,
             obj.co_consts,
-            obj.co_names,
-            obj.co_varnames,
+            co_names,
+            co_varnames,
             obj.co_filename,
-            obj.co_name,
+            co_name,
             obj.co_firstlineno,
             obj.co_lnotab,
-            obj.co_freevars,
-            obj.co_cellvars,
+            co_freevars,
+            co_cellvars,
         )
     return types.CodeType, args
 
@@ -1127,7 +1162,30 @@ def _class_setstate(obj, state):
         if attrname == "_abc_impl":
             registry = attr
         else:
+            # Note: setting attribute names on a class automatically triggers their
+            # interning in CPython:
+            # https://github.com/python/cpython/blob/v3.12.0/Objects/object.c#L957
+            #
+            # This means that to get deterministic pickling for a dynamic class that
+            # was initially defined in a different Python process, the pickler
+            # needs to ensure that dynamic class and function attribute names are
+            # systematically copied into a non-interned version to avoid
+            # unpredictable pickle payloads.
+            #
+            # Indeed the Pickler's memoizer relies on physical object identity to break
+            # cycles in the reference graph of the object being serialized.
             setattr(obj, attrname, attr)
+
+    if sys.version_info >= (3, 13) and "__firstlineno__" in state:
+        # Set the Python 3.13+ only __firstlineno__  attribute one more time, as it
+        # will be automatically deleted by the `setattr(obj, attrname, attr)` call
+        # above when `attrname` is "__firstlineno__". We assume that preserving this
+        # information might be important for some users and that it not stale in the
+        # context of cloudpickle usage, hence legitimate to propagate. Furthermore it
+        # is necessary to do so to keep deterministic chained pickling as tested in
+        # test_deterministic_str_interning_for_chained_dynamic_class_pickling.
+        obj.__firstlineno__ = state["__firstlineno__"]
+
     if registry is not None:
         for subclass in registry:
             obj.register(subclass)
diff --git a/python/pyspark/cloudpickle/cloudpickle_fast.py b/python/pyspark/cloudpickle/cloudpickle_fast.py
index 52d6732e44ebc..20280f0ca354a 100644
--- a/python/pyspark/cloudpickle/cloudpickle_fast.py
+++ b/python/pyspark/cloudpickle/cloudpickle_fast.py
@@ -6,6 +6,7 @@
 
 See: tests/test_backward_compat.py
 """
+
 from . import cloudpickle
 
 
diff --git a/python/pyspark/cloudpickle/compat.py b/python/pyspark/cloudpickle/compat.py
deleted file mode 100644
index 5e9b52773d279..0000000000000
--- a/python/pyspark/cloudpickle/compat.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import sys
-
-
-if sys.version_info < (3, 8):
-    try:
-        import pickle5 as pickle  # noqa: F401
-        from pickle5 import Pickler  # noqa: F401
-    except ImportError:
-        import pickle  # noqa: F401
-
-        # Use the Python pickler for old CPython versions
-        from pickle import _Pickler as Pickler  # noqa: F401
-else:
-    import pickle  # noqa: F401
-
-    # Pickler will the C implementation in CPython and the Python
-    # implementation in PyPy
-    from pickle import Pickler  # noqa: F401
diff --git a/python/pyspark/core/context.py b/python/pyspark/core/context.py
index 6ea793a118389..5fcd4ffb09210 100644
--- a/python/pyspark/core/context.py
+++ b/python/pyspark/core/context.py
@@ -75,6 +75,7 @@
 
 if TYPE_CHECKING:
     from pyspark.accumulators import AccumulatorParam
+    from pyspark.sql.types import DataType, StructType
 
 __all__ = ["SparkContext"]
 
@@ -362,10 +363,14 @@ def _do_init(
 
         # Create a temporary directory inside spark.local.dir:
         assert self._jvm is not None
-        local_dir = self._jvm.org.apache.spark.util.Utils.getLocalDir(self._jsc.sc().conf())
-        self._temp_dir = self._jvm.org.apache.spark.util.Utils.createTempDir(
-            local_dir, "pyspark"
-        ).getAbsolutePath()
+        local_dir = getattr(self._jvm, "org.apache.spark.util.Utils").getLocalDir(
+            self._jsc.sc().conf()
+        )
+        self._temp_dir = (
+            getattr(self._jvm, "org.apache.spark.util.Utils")
+            .createTempDir(local_dir, "pyspark")
+            .getAbsolutePath()
+        )
 
         # profiling stats collected for each PythonRDD
         if (
@@ -554,7 +559,7 @@ def setSystemProperty(cls, key: str, value: str) -> None:
         """
         SparkContext._ensure_initialized()
         assert SparkContext._jvm is not None
-        SparkContext._jvm.java.lang.System.setProperty(key, value)
+        getattr(SparkContext._jvm, "java.lang.System").setProperty(key, value)
 
     @classmethod
     def getSystemProperty(cls, key: str) -> str:
@@ -576,7 +581,7 @@ def getSystemProperty(cls, key: str) -> str:
         """
         SparkContext._ensure_initialized()
         assert SparkContext._jvm is not None
-        return SparkContext._jvm.java.lang.System.getProperty(key)
+        return getattr(SparkContext._jvm, "java.lang.System").getProperty(key)
 
     @property
     def version(self) -> str:
@@ -1201,7 +1206,7 @@ def binaryRecords(self, path: str, recordLength: int) -> RDD[bytes]:
 
     def _dictToJavaMap(self, d: Optional[Dict[str, str]]) -> JavaMap:
         assert self._jvm is not None
-        jm = self._jvm.java.util.HashMap()
+        jm = getattr(self._jvm, "java.util.HashMap")()
         if not d:
             d = {}
         for k, v in d.items():
@@ -1740,9 +1745,9 @@ def union(self, rdds: List[RDD[T]]) -> RDD[T]:
         assert gw is not None
         jvm = SparkContext._jvm
         assert jvm is not None
-        jrdd_cls = jvm.org.apache.spark.api.java.JavaRDD
-        jpair_rdd_cls = jvm.org.apache.spark.api.java.JavaPairRDD
-        jdouble_rdd_cls = jvm.org.apache.spark.api.java.JavaDoubleRDD
+        jrdd_cls = getattr(jvm, "org.apache.spark.api.java.JavaRDD")
+        jpair_rdd_cls = getattr(jvm, "org.apache.spark.api.java.JavaPairRDD")
+        jdouble_rdd_cls = getattr(jvm, "org.apache.spark.api.java.JavaDoubleRDD")
         if is_instance_of(gw, rdds[0]._jrdd, jrdd_cls):
             cls = jrdd_cls
         elif is_instance_of(gw, rdds[0]._jrdd, jpair_rdd_cls):
@@ -1933,7 +1938,7 @@ def listFiles(self) -> List[str]:
         :meth:`SparkContext.addFile`
         """
         return list(
-            self._jvm.scala.jdk.javaapi.CollectionConverters.asJava(  # type: ignore[union-attr]
+            getattr(self._jvm, "scala.jdk.javaapi.CollectionConverters").asJava(
                 self._jsc.sc().listFiles()
             )
         )
@@ -2061,7 +2066,7 @@ def listArchives(self) -> List[str]:
         :meth:`SparkContext.addArchive`
         """
         return list(
-            self._jvm.scala.jdk.javaapi.CollectionConverters.asJava(  # type: ignore[union-attr]
+            getattr(self._jvm, "scala.jdk.javaapi.CollectionConverters").asJava(
                 self._jsc.sc().listArchives()
             )
         )
@@ -2111,7 +2116,7 @@ def _getJavaStorageLevel(self, storageLevel: StorageLevel) -> JavaObject:
         if not isinstance(storageLevel, StorageLevel):
             raise TypeError("storageLevel must be of type pyspark.StorageLevel")
         assert self._jvm is not None
-        newStorageLevel = self._jvm.org.apache.spark.storage.StorageLevel
+        newStorageLevel = getattr(self._jvm, "org.apache.spark.storage.StorageLevel")
         return newStorageLevel(
             storageLevel.useDisk,
             storageLevel.useMemory,
@@ -2619,6 +2624,16 @@ def _assert_on_driver() -> None:
                 messageParameters={},
             )
 
+    def _to_ddl(self, struct: "StructType") -> str:
+        assert self._jvm is not None
+        return self._jvm.PythonSQLUtils.jsonToDDL(struct.json())
+
+    def _parse_ddl(self, ddl: str) -> "DataType":
+        from pyspark.sql.types import _parse_datatype_json_string
+
+        assert self._jvm is not None
+        return _parse_datatype_json_string(self._jvm.PythonSQLUtils.ddlToJson(ddl))
+
 
 def _test() -> None:
     import doctest
diff --git a/python/pyspark/core/files.py b/python/pyspark/core/files.py
index 83b98726aee70..a2544425af0cf 100644
--- a/python/pyspark/core/files.py
+++ b/python/pyspark/core/files.py
@@ -145,7 +145,7 @@ def getRootDirectory(cls) -> str:
             # This will have to change if we support multiple SparkContexts:
             assert cls._sc is not None
             assert cls._sc._jvm is not None
-            return cls._sc._jvm.org.apache.spark.SparkFiles.getRootDirectory()
+            return getattr(cls._sc._jvm, "org.apache.spark.SparkFiles").getRootDirectory()
 
 
 def _test() -> None:
diff --git a/python/pyspark/core/rdd.py b/python/pyspark/core/rdd.py
index a40af3e551584..bbf17dbed7fa2 100644
--- a/python/pyspark/core/rdd.py
+++ b/python/pyspark/core/rdd.py
@@ -3286,7 +3286,9 @@ def func(split: int, iterator: Iterable[Any]) -> Iterable[bytes]:
         assert self.ctx._jvm is not None
 
         if compressionCodecClass:
-            compressionCodec = self.ctx._jvm.java.lang.Class.forName(compressionCodecClass)
+            compressionCodec = getattr(self.ctx._jvm, "java.lang.Class").forName(
+                compressionCodecClass
+            )
             keyed._jrdd.map(self.ctx._jvm.BytesToString()).saveAsTextFile(path, compressionCodec)
         else:
             keyed._jrdd.map(self.ctx._jvm.BytesToString()).saveAsTextFile(path)
@@ -4998,8 +5000,8 @@ def barrier(self: "RDD[T]") -> "RDDBarrier[T]":
         -----
         For additional information see
 
-        - `SPIP: Barrier Execution Mode <http://jira.apache.org/jira/browse/SPARK-24374>`_
-        - `Design Doc <https://jira.apache.org/jira/browse/SPARK-24582>`_
+        - `SPIP: Barrier Execution Mode <https://issues.apache.org/jira/browse/SPARK-24374>`_
+        - `Design Doc <https://issues.apache.org/jira/browse/SPARK-24582>`_
 
         This API is experimental
         """
@@ -5044,7 +5046,7 @@ def withResources(self: "RDD[T]", profile: ResourceProfile) -> "RDD[T]":
         else:
             assert self.ctx._jvm is not None
 
-            builder = self.ctx._jvm.org.apache.spark.resource.ResourceProfileBuilder()
+            builder = getattr(self.ctx._jvm, "org.apache.spark.resource.ResourceProfileBuilder")()
             ereqs = ExecutorResourceRequests(self.ctx._jvm, profile._executor_resource_requests)
             treqs = TaskResourceRequests(self.ctx._jvm, profile._task_resource_requests)
             builder.require(ereqs._java_executor_resource_requests)
diff --git a/python/pyspark/errors/error-conditions.json b/python/pyspark/errors/error-conditions.json
index b2a68a83bfa70..b7c1ec23c3af9 100644
--- a/python/pyspark/errors/error-conditions.json
+++ b/python/pyspark/errors/error-conditions.json
@@ -189,11 +189,6 @@
       "Remote client cannot create a SparkContext. Create SparkSession instead."
     ]
   },
-  "DATA_SOURCE_CREATE_ERROR": {
-    "message": [
-      "Failed to create python data source instance, error: <error>."
-    ]
-  },
   "DATA_SOURCE_INVALID_RETURN_TYPE": {
     "message": [
       "Unsupported return type ('<type>') from Python data source '<name>'. Expected types: <supported_types>."
@@ -1075,7 +1070,7 @@
   },
   "UNSUPPORTED_JOIN_TYPE": {
     "message": [
-      "Unsupported join type: <join_type>. Supported join types include: 'inner', 'outer', 'full', 'fullouter', 'full_outer', 'leftouter', 'left', 'left_outer', 'rightouter', 'right', 'right_outer', 'leftsemi', 'left_semi', 'semi', 'leftanti', 'left_anti', 'anti', 'cross'."
+      "Unsupported join type: '<typ>'. Supported join types include: <supported>."
     ]
   },
   "UNSUPPORTED_LITERAL": {
@@ -1108,6 +1103,11 @@
       "Function `<func_name>` should use only POSITIONAL or POSITIONAL OR KEYWORD arguments."
     ]
   },
+  "UNSUPPORTED_PIE_PLOT_PARAM": {
+    "message": [
+      "Pie plot requires either a `y` column or `subplots=True`."
+    ]
+  },
   "UNSUPPORTED_PLOT_BACKEND": {
     "message": [
       "`<backend>` is not supported, it should be one of the values from <supported_backends>"
diff --git a/python/pyspark/errors/exceptions/captured.py b/python/pyspark/errors/exceptions/captured.py
index 749b0cca96b78..b27c61d7563fb 100644
--- a/python/pyspark/errors/exceptions/captured.py
+++ b/python/pyspark/errors/exceptions/captured.py
@@ -67,7 +67,7 @@ def __init__(
         self._stackTrace = (
             stackTrace
             if stackTrace is not None
-            else (SparkContext._jvm.org.apache.spark.util.Utils.exceptionString(origin))
+            else (getattr(SparkContext._jvm, "org.apache.spark.util.Utils").exceptionString(origin))
         )
         self._cause = convert_exception(cause) if cause is not None else None
         if self._cause is None and origin is not None and origin.getCause() is not None:
@@ -85,7 +85,7 @@ def __str__(self) -> str:
         # SPARK-42752: default to True to see issues with initialization
         debug_enabled = True
         try:
-            sql_conf = jvm.org.apache.spark.sql.internal.SQLConf.get()
+            sql_conf = getattr(jvm, "org.apache.spark.sql.internal.SQLConf").get()
             debug_enabled = sql_conf.pysparkJVMStacktraceEnabled()
         except BaseException:
             pass
@@ -149,7 +149,7 @@ def getMessage(self) -> str:
             errorClass = self._origin.getErrorClass()
             messageParameters = self._origin.getMessageParameters()
 
-            error_message = gw.jvm.org.apache.spark.SparkThrowableHelper.getMessage(
+            error_message = getattr(gw.jvm, "org.apache.spark.SparkThrowableHelper").getMessage(
                 errorClass, messageParameters
             )
 
@@ -220,7 +220,7 @@ def convert_exception(e: "Py4JJavaError") -> CapturedException:
         return SparkNoSuchElementException(origin=e)
 
     c: "Py4JJavaError" = e.getCause()
-    stacktrace: str = jvm.org.apache.spark.util.Utils.exceptionString(e)
+    stacktrace: str = getattr(jvm, "org.apache.spark.util.Utils").exceptionString(e)
     if c is not None and (
         is_instance_of(gw, c, "org.apache.spark.api.python.PythonException")
         # To make sure this only catches Python UDFs.
diff --git a/python/pyspark/errors/exceptions/connect.py b/python/pyspark/errors/exceptions/connect.py
index c24b25af01631..ef90f8559b425 100644
--- a/python/pyspark/errors/exceptions/connect.py
+++ b/python/pyspark/errors/exceptions/connect.py
@@ -54,206 +54,67 @@ def convert_exception(
     resp: Optional[pb2.FetchErrorDetailsResponse],
     display_server_stacktrace: bool = False,
 ) -> SparkConnectException:
-    classes = []
-    sql_state = None
-    errorClass = None
-    messageParameters = None
-    contexts: Optional[List[BaseQueryContext]] = None
-
-    if "classes" in info.metadata:
-        classes = json.loads(info.metadata["classes"])
-
-    if "sqlState" in info.metadata:
-        sql_state = info.metadata["sqlState"]
-
-    if "errorClass" in info.metadata:
-        errorClass = info.metadata["errorClass"]
-
-    if "messageParameters" in info.metadata:
-        messageParameters = json.loads(info.metadata["messageParameters"])
-
+    raw_classes = info.metadata.get("classes")
+    classes: List[str] = json.loads(raw_classes) if raw_classes else []
+    sql_state = info.metadata.get("sqlState")
+    error_class = info.metadata.get("errorClass")
+    raw_message_parameters = info.metadata.get("messageParameters")
+    message_parameters: Dict[str, str] = (
+        json.loads(raw_message_parameters) if raw_message_parameters else {}
+    )
     stacktrace: Optional[str] = None
+
     if resp is not None and resp.HasField("root_error_idx"):
         message = resp.errors[resp.root_error_idx].message
         stacktrace = _extract_jvm_stacktrace(resp)
     else:
         message = truncated_message
-        stacktrace = info.metadata["stackTrace"] if "stackTrace" in info.metadata else None
-        display_server_stacktrace = display_server_stacktrace if stacktrace is not None else False
-
-    if (
-        resp is not None
-        and resp.errors
-        and hasattr(resp.errors[resp.root_error_idx], "spark_throwable")
-    ):
-        messageParameters = dict(
-            resp.errors[resp.root_error_idx].spark_throwable.message_parameters
-        )
-        contexts = []
-        for context in resp.errors[resp.root_error_idx].spark_throwable.query_contexts:
-            if context.context_type == pb2.FetchErrorDetailsResponse.QueryContext.SQL:
-                contexts.append(SQLQueryContext(context))
-            else:
-                contexts.append(DataFrameQueryContext(context))
-
-    if "org.apache.spark.sql.catalyst.parser.ParseException" in classes:
-        return ParseException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    # Order matters. ParseException inherits AnalysisException.
-    elif "org.apache.spark.sql.AnalysisException" in classes:
-        return AnalysisException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    elif "org.apache.spark.sql.streaming.StreamingQueryException" in classes:
-        return StreamingQueryException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    elif "org.apache.spark.sql.execution.QueryExecutionException" in classes:
-        return QueryExecutionException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    # Order matters. NumberFormatException inherits IllegalArgumentException.
-    elif "java.lang.NumberFormatException" in classes:
-        return NumberFormatException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    elif "java.lang.IllegalArgumentException" in classes:
-        return IllegalArgumentException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    elif "java.lang.ArithmeticException" in classes:
-        return ArithmeticException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    elif "java.lang.UnsupportedOperationException" in classes:
-        return UnsupportedOperationException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    elif "java.lang.ArrayIndexOutOfBoundsException" in classes:
-        return ArrayIndexOutOfBoundsException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    elif "java.time.DateTimeException" in classes:
-        return DateTimeException(
-            message,
-            errorClass=errorClass,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    elif "org.apache.spark.SparkRuntimeException" in classes:
-        return SparkRuntimeException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    elif "org.apache.spark.SparkUpgradeException" in classes:
-        return SparkUpgradeException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    elif "org.apache.spark.api.python.PythonException" in classes:
+        stacktrace = info.metadata.get("stackTrace")
+        display_server_stacktrace = display_server_stacktrace if stacktrace else False
+
+    contexts = None
+    if resp and resp.HasField("root_error_idx"):
+        root_error = resp.errors[resp.root_error_idx]
+        if hasattr(root_error, "spark_throwable"):
+            message_parameters = dict(root_error.spark_throwable.message_parameters)
+            contexts = [
+                SQLQueryContext(c)
+                if c.context_type == pb2.FetchErrorDetailsResponse.QueryContext.SQL
+                else DataFrameQueryContext(c)
+                for c in root_error.spark_throwable.query_contexts
+            ]
+
+    if "org.apache.spark.api.python.PythonException" in classes:
         return PythonException(
             "\n  An exception was thrown from the Python worker. "
             "Please see the stack trace below.\n%s" % message
         )
-    elif "org.apache.spark.SparkNoSuchElementException" in classes:
-        return SparkNoSuchElementException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    # Make sure that the generic SparkException is handled last.
-    elif "org.apache.spark.SparkException" in classes:
-        return SparkException(
-            message,
-            errorClass=errorClass,
-            messageParameters=messageParameters,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
-    else:
-        return SparkConnectGrpcException(
-            message,
-            reason=info.reason,
-            messageParameters=messageParameters,
-            errorClass=errorClass,
-            sql_state=sql_state,
-            server_stacktrace=stacktrace,
-            display_server_stacktrace=display_server_stacktrace,
-            contexts=contexts,
-        )
+
+    # Return exception based on class mapping
+    for error_class_name in classes:
+        ExceptionClass = EXCEPTION_CLASS_MAPPING.get(error_class_name)
+        if ExceptionClass:
+            return ExceptionClass(
+                message,
+                errorClass=error_class,
+                messageParameters=message_parameters,
+                sql_state=sql_state,
+                server_stacktrace=stacktrace,
+                display_server_stacktrace=display_server_stacktrace,
+                contexts=contexts,
+            )
+
+    # Return SparkConnectGrpcException if there is no matched exception class
+    return SparkConnectGrpcException(
+        message,
+        reason=info.reason,
+        messageParameters=message_parameters,
+        errorClass=error_class,
+        sql_state=sql_state,
+        server_stacktrace=stacktrace,
+        display_server_stacktrace=display_server_stacktrace,
+        contexts=contexts,
+    )
 
 
 def _extract_jvm_stacktrace(resp: pb2.FetchErrorDetailsResponse) -> str:
@@ -434,6 +295,26 @@ class SparkNoSuchElementException(SparkConnectGrpcException, BaseNoSuchElementEx
     """
 
 
+# Update EXCEPTION_CLASS_MAPPING here when adding a new exception
+EXCEPTION_CLASS_MAPPING = {
+    "org.apache.spark.sql.catalyst.parser.ParseException": ParseException,
+    "org.apache.spark.sql.AnalysisException": AnalysisException,
+    "org.apache.spark.sql.streaming.StreamingQueryException": StreamingQueryException,
+    "org.apache.spark.sql.execution.QueryExecutionException": QueryExecutionException,
+    "java.lang.NumberFormatException": NumberFormatException,
+    "java.lang.IllegalArgumentException": IllegalArgumentException,
+    "java.lang.ArithmeticException": ArithmeticException,
+    "java.lang.UnsupportedOperationException": UnsupportedOperationException,
+    "java.lang.ArrayIndexOutOfBoundsException": ArrayIndexOutOfBoundsException,
+    "java.time.DateTimeException": DateTimeException,
+    "org.apache.spark.SparkRuntimeException": SparkRuntimeException,
+    "org.apache.spark.SparkUpgradeException": SparkUpgradeException,
+    "org.apache.spark.api.python.PythonException": PythonException,
+    "org.apache.spark.SparkNoSuchElementException": SparkNoSuchElementException,
+    "org.apache.spark.SparkException": SparkException,
+}
+
+
 class SQLQueryContext(BaseQueryContext):
     def __init__(self, q: pb2.FetchErrorDetailsResponse.QueryContext):
         self._q = q
diff --git a/python/pyspark/errors/tests/test_connect_errors_conversion.py b/python/pyspark/errors/tests/test_connect_errors_conversion.py
new file mode 100644
index 0000000000000..a6ed5e7d391ee
--- /dev/null
+++ b/python/pyspark/errors/tests/test_connect_errors_conversion.py
@@ -0,0 +1,169 @@
+# -*- encoding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+from pyspark.errors.exceptions.connect import (
+    convert_exception,
+    EXCEPTION_CLASS_MAPPING,
+    SparkConnectGrpcException,
+    PythonException,
+    AnalysisException,
+)
+from pyspark.sql.connect.proto import FetchErrorDetailsResponse as pb2
+from google.rpc.error_details_pb2 import ErrorInfo
+
+
+class ConnectErrorsTest(unittest.TestCase):
+    def test_convert_exception_known_class(self):
+        # Mock ErrorInfo with a known error class
+        info = {
+            "reason": "org.apache.spark.sql.AnalysisException",
+            "metadata": {
+                "classes": '["org.apache.spark.sql.AnalysisException"]',
+                "sqlState": "42000",
+                "errorClass": "ANALYSIS.ERROR",
+                "messageParameters": '{"param1": "value1"}',
+            },
+        }
+        truncated_message = "Analysis error occurred"
+        exception = convert_exception(
+            info=ErrorInfo(**info), truncated_message=truncated_message, resp=None
+        )
+
+        self.assertIsInstance(exception, AnalysisException)
+        self.assertEqual(exception.getSqlState(), "42000")
+        self.assertEqual(exception._errorClass, "ANALYSIS.ERROR")
+        self.assertEqual(exception._messageParameters, {"param1": "value1"})
+
+    def test_convert_exception_python_exception(self):
+        # Mock ErrorInfo for PythonException
+        info = {
+            "reason": "org.apache.spark.api.python.PythonException",
+            "metadata": {
+                "classes": '["org.apache.spark.api.python.PythonException"]',
+            },
+        }
+        truncated_message = "Python worker error occurred"
+        exception = convert_exception(
+            info=ErrorInfo(**info), truncated_message=truncated_message, resp=None
+        )
+
+        self.assertIsInstance(exception, PythonException)
+        self.assertIn("An exception was thrown from the Python worker", exception.getMessage())
+
+    def test_convert_exception_unknown_class(self):
+        # Mock ErrorInfo with an unknown error class
+        info = {
+            "reason": "org.apache.spark.UnknownException",
+            "metadata": {"classes": '["org.apache.spark.UnknownException"]'},
+        }
+        truncated_message = "Unknown error occurred"
+        exception = convert_exception(
+            info=ErrorInfo(**info), truncated_message=truncated_message, resp=None
+        )
+
+        self.assertIsInstance(exception, SparkConnectGrpcException)
+        self.assertEqual(
+            exception.getMessage(), "(org.apache.spark.UnknownException) Unknown error occurred"
+        )
+
+    def test_exception_class_mapping(self):
+        # Ensure that all keys in EXCEPTION_CLASS_MAPPING are valid
+        for error_class_name, exception_class in EXCEPTION_CLASS_MAPPING.items():
+            self.assertTrue(
+                hasattr(exception_class, "__name__"),
+                f"{exception_class} in EXCEPTION_CLASS_MAPPING is not a valid class",
+            )
+
+    def test_convert_exception_with_stacktrace(self):
+        # Mock FetchErrorDetailsResponse with stacktrace
+        resp = pb2(
+            root_error_idx=0,
+            errors=[
+                pb2.Error(
+                    message="Root error message",
+                    error_type_hierarchy=["org.apache.spark.SparkException"],
+                    stack_trace=[
+                        pb2.StackTraceElement(
+                            declaring_class="org.apache.spark.Main",
+                            method_name="main",
+                            file_name="Main.scala",
+                            line_number=42,
+                        ),
+                    ],
+                    cause_idx=1,
+                ),
+                pb2.Error(
+                    message="Cause error message",
+                    error_type_hierarchy=["java.lang.RuntimeException"],
+                    stack_trace=[
+                        pb2.StackTraceElement(
+                            declaring_class="org.apache.utils.Helper",
+                            method_name="help",
+                            file_name="Helper.java",
+                            line_number=10,
+                        ),
+                    ],
+                ),
+            ],
+        )
+
+        info = {
+            "reason": "org.apache.spark.SparkException",
+            "metadata": {
+                "classes": '["org.apache.spark.SparkException"]',
+                "sqlState": "42000",
+            },
+        }
+        truncated_message = "Root error message"
+        exception = convert_exception(
+            info=ErrorInfo(**info), truncated_message=truncated_message, resp=resp
+        )
+
+        self.assertIsInstance(exception, SparkConnectGrpcException)
+        self.assertIn("Root error message", exception.getMessage())
+        self.assertIn("Caused by", exception.getMessage())
+
+    def test_convert_exception_fallback(self):
+        # Mock ErrorInfo with missing class information
+        info = {
+            "reason": "org.apache.spark.UnknownReason",
+            "metadata": {},
+        }
+        truncated_message = "Fallback error occurred"
+        exception = convert_exception(
+            info=ErrorInfo(**info), truncated_message=truncated_message, resp=None
+        )
+
+        self.assertIsInstance(exception, SparkConnectGrpcException)
+        self.assertEqual(
+            exception.getMessage(), "(org.apache.spark.UnknownReason) Fallback error occurred"
+        )
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.errors.tests.test_errors import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/errors/utils.py b/python/pyspark/errors/utils.py
index cbe5739204ac1..0d01cbb961bb6 100644
--- a/python/pyspark/errors/utils.py
+++ b/python/pyspark/errors/utils.py
@@ -31,21 +31,44 @@
     Type,
     Optional,
     Union,
-    TYPE_CHECKING,
     overload,
     cast,
 )
+from types import FrameType
+
 import pyspark
 from pyspark.errors.error_classes import ERROR_CLASSES_MAP
 
-if TYPE_CHECKING:
-    from pyspark.sql import SparkSession
-
 T = TypeVar("T")
 FuncT = TypeVar("FuncT", bound=Callable[..., Any])
 
 _current_origin = threading.local()
 
+# Providing DataFrame debugging options to reduce performance slowdown.
+# Default is True.
+_enable_debugging_cache = None
+
+
+def is_debugging_enabled() -> bool:
+    global _enable_debugging_cache
+
+    if _enable_debugging_cache is None:
+        from pyspark.sql import SparkSession
+
+        spark = SparkSession.getActiveSession()
+        if spark is not None:
+            _enable_debugging_cache = (
+                spark.conf.get(
+                    "spark.python.sql.dataFrameDebugging.enabled",
+                    "true",  # type: ignore[union-attr]
+                ).lower()
+                == "true"
+            )
+        else:
+            _enable_debugging_cache = False
+
+    return _enable_debugging_cache
+
 
 def current_origin() -> threading.local:
     global _current_origin
@@ -164,17 +187,12 @@ def get_message_template(self, errorClass: str) -> str:
         return message_template
 
 
-def _capture_call_site(spark_session: "SparkSession", depth: int) -> str:
+def _capture_call_site(depth: int) -> str:
     """
     Capture the call site information including file name, line number, and function name.
     This function updates the thread-local storage from JVM side (PySparkCurrentOrigin)
     with the current call site information when a PySpark API function is called.
 
-    Parameters
-    ----------
-    spark_session : SparkSession
-        Current active Spark session.
-
     Notes
     -----
     The call site information is used to enhance error messages with the exact location
@@ -183,18 +201,15 @@ def _capture_call_site(spark_session: "SparkSession", depth: int) -> str:
     # Filtering out PySpark code and keeping user code only
     pyspark_root = os.path.dirname(pyspark.__file__)
 
-    def inspect_stack() -> Iterator[inspect.FrameInfo]:
+    def inspect_stack() -> Iterator[FrameType]:
         frame = inspect.currentframe()
         while frame:
-            frameinfo = (frame,) + inspect.getframeinfo(frame, context=0)
-            yield inspect.FrameInfo(*frameinfo)
+            yield frame
             frame = frame.f_back
 
-    stack = (
-        frame_info for frame_info in inspect_stack() if pyspark_root not in frame_info.filename
-    )
+    stack = (f for f in inspect_stack() if pyspark_root not in f.f_code.co_filename)
 
-    selected_frames: Iterator[inspect.FrameInfo] = itertools.islice(stack, depth)
+    selected_frames: Iterator[FrameType] = itertools.islice(stack, depth)
 
     # We try import here since IPython is not a required dependency
     try:
@@ -210,7 +225,8 @@ def inspect_stack() -> Iterator[inspect.FrameInfo]:
         selected_frames = (
             frame
             for frame in selected_frames
-            if (ipy_root not in frame.filename) and (ipykernel_root not in frame.filename)
+            if (ipy_root not in frame.f_code.co_filename)
+            and (ipykernel_root not in frame.f_code.co_filename)
         )
     except ImportError:
         ipython = None
@@ -218,10 +234,11 @@ def inspect_stack() -> Iterator[inspect.FrameInfo]:
     # Identifying the cell is useful when the error is generated from IPython Notebook
     if ipython:
         call_sites = [
-            f"line {frame.lineno} in cell [{ipython.execution_count}]" for frame in selected_frames
+            f"line {frame.f_lineno} in cell [{ipython.execution_count}]"
+            for frame in selected_frames
         ]
     else:
-        call_sites = [f"{frame.filename}:{frame.lineno}" for frame in selected_frames]
+        call_sites = [f"{frame.f_code.co_filename}:{frame.f_lineno}" for frame in selected_frames]
     call_sites_str = "\n".join(call_sites)
 
     return call_sites_str
@@ -239,13 +256,12 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
         from pyspark.sql.utils import is_remote
 
         spark = SparkSession.getActiveSession()
-        if spark is not None and hasattr(func, "__name__"):
-            if is_remote():
-                global current_origin
 
+        if spark is not None and hasattr(func, "__name__") and is_debugging_enabled():
+            if is_remote():
                 # Getting the configuration requires RPC call. Uses the default value for now.
                 depth = 1
-                set_current_origin(func.__name__, _capture_call_site(spark, depth))
+                set_current_origin(func.__name__, _capture_call_site(depth))
 
                 try:
                     return func(*args, **kwargs)
@@ -253,8 +269,8 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
                     set_current_origin(None, None)
             else:
                 assert spark._jvm is not None
-                jvm_pyspark_origin = (
-                    spark._jvm.org.apache.spark.sql.catalyst.trees.PySparkCurrentOrigin
+                jvm_pyspark_origin = getattr(
+                    spark._jvm, "org.apache.spark.sql.catalyst.trees.PySparkCurrentOrigin"
                 )
                 depth = int(
                     spark.conf.get(  # type: ignore[arg-type]
@@ -262,7 +278,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
                     )
                 )
                 # Update call site when the function is called
-                jvm_pyspark_origin.set(func.__name__, _capture_call_site(spark, depth))
+                jvm_pyspark_origin.set(func.__name__, _capture_call_site(depth))
 
                 try:
                     return func(*args, **kwargs)
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index b89755d9c18a5..e003ba43ec7c8 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -62,6 +62,7 @@
     HasSolver,
     HasParallelism,
 )
+from pyspark.ml.remote.util import try_remote_attribute_relation
 from pyspark.ml.tree import (
     _DecisionTreeModel,
     _DecisionTreeParams,
@@ -336,6 +337,7 @@ class _ClassificationSummary(JavaWrapper):
 
     @property
     @since("3.1.0")
+    @try_remote_attribute_relation
     def predictions(self) -> DataFrame:
         """
         Dataframe outputted by the model's `transform` method.
@@ -521,6 +523,7 @@ def scoreCol(self) -> str:
         return self._call_java("scoreCol")
 
     @property
+    @try_remote_attribute_relation
     def roc(self) -> DataFrame:
         """
         Returns the receiver operating characteristic (ROC) curve,
@@ -546,6 +549,7 @@ def areaUnderROC(self) -> float:
 
     @property
     @since("3.1.0")
+    @try_remote_attribute_relation
     def pr(self) -> DataFrame:
         """
         Returns the precision-recall curve, which is a Dataframe
@@ -556,6 +560,7 @@ def pr(self) -> DataFrame:
 
     @property
     @since("3.1.0")
+    @try_remote_attribute_relation
     def fMeasureByThreshold(self) -> DataFrame:
         """
         Returns a dataframe with two fields (threshold, F-Measure) curve
@@ -565,6 +570,7 @@ def fMeasureByThreshold(self) -> DataFrame:
 
     @property
     @since("3.1.0")
+    @try_remote_attribute_relation
     def precisionByThreshold(self) -> DataFrame:
         """
         Returns a dataframe with two fields (threshold, precision) curve.
@@ -575,6 +581,7 @@ def precisionByThreshold(self) -> DataFrame:
 
     @property
     @since("3.1.0")
+    @try_remote_attribute_relation
     def recallByThreshold(self) -> DataFrame:
         """
         Returns a dataframe with two fields (threshold, recall) curve.
@@ -3788,7 +3795,8 @@ def __init__(self, models: List[ClassificationModel]):
         assert sc is not None and sc._gateway is not None
 
         java_models_array = JavaWrapper._new_java_array(
-            java_models, sc._gateway.jvm.org.apache.spark.ml.classification.ClassificationModel
+            java_models,
+            getattr(sc._gateway.jvm, "org.apache.spark.ml.classification.ClassificationModel"),
         )
         # TODO: need to set metadata
         metadata = JavaParams._new_java_obj("org.apache.spark.sql.types.Metadata")
@@ -3928,7 +3936,8 @@ def _to_java(self) -> "JavaObject":
 
         java_models = [cast(_JavaClassificationModel, model)._to_java() for model in self.models]
         java_models_array = JavaWrapper._new_java_array(
-            java_models, sc._gateway.jvm.org.apache.spark.ml.classification.ClassificationModel
+            java_models,
+            getattr(sc._gateway.jvm, "org.apache.spark.ml.classification.ClassificationModel"),
         )
         metadata = JavaParams._new_java_obj("org.apache.spark.sql.types.Metadata")
         _java_obj = JavaParams._new_java_obj(
diff --git a/python/pyspark/ml/common.py b/python/pyspark/ml/common.py
index 1ae15fdf547eb..2417df6ab9eb3 100644
--- a/python/pyspark/ml/common.py
+++ b/python/pyspark/ml/common.py
@@ -74,7 +74,7 @@ def _to_java_object_rdd(rdd: "RDD") -> "JavaObject":
     """
     rdd = rdd._reserialize(AutoBatchedSerializer(CPickleSerializer()))
     assert rdd.ctx._jvm is not None
-    return rdd.ctx._jvm.org.apache.spark.ml.python.MLSerDe.pythonToJava(rdd._jrdd, True)
+    return getattr(rdd.ctx._jvm, "org.apache.spark.ml.python.MLSerDe").pythonToJava(rdd._jrdd, True)
 
 
 def _py2java(sc: "SparkContext", obj: Any) -> "JavaObject":
@@ -98,7 +98,7 @@ def _py2java(sc: "SparkContext", obj: Any) -> "JavaObject":
     else:
         data = bytearray(CPickleSerializer().dumps(obj))
         assert sc._jvm is not None
-        obj = sc._jvm.org.apache.spark.ml.python.MLSerDe.loads(data)
+        obj = getattr(sc._jvm, "org.apache.spark.ml.python.MLSerDe").loads(data)
     return obj
 
 
@@ -117,17 +117,17 @@ def _java2py(sc: "SparkContext", r: "JavaObjectOrPickleDump", encoding: str = "b
         assert sc._jvm is not None
 
         if clsName == "JavaRDD":
-            jrdd = sc._jvm.org.apache.spark.ml.python.MLSerDe.javaToPython(r)
+            jrdd = getattr(sc._jvm, "org.apache.spark.ml.python.MLSerDe").javaToPython(r)
             return RDD(jrdd, sc)
 
         if clsName == "Dataset":
             return DataFrame(r, SparkSession._getActiveSessionOrCreate())
 
         if clsName in _picklable_classes:
-            r = sc._jvm.org.apache.spark.ml.python.MLSerDe.dumps(r)
+            r = getattr(sc._jvm, "org.apache.spark.ml.python.MLSerDe").dumps(r)
         elif isinstance(r, (JavaArray, JavaList)):
             try:
-                r = sc._jvm.org.apache.spark.ml.python.MLSerDe.dumps(r)
+                r = getattr(sc._jvm, "org.apache.spark.ml.python.MLSerDe").dumps(r)
             except Py4JJavaError:
                 pass  # not picklable
 
diff --git a/python/pyspark/ml/connect/io_utils.py b/python/pyspark/ml/connect/io_utils.py
index c401e3e76676a..fdaa23ff9011c 100644
--- a/python/pyspark/ml/connect/io_utils.py
+++ b/python/pyspark/ml/connect/io_utils.py
@@ -38,7 +38,9 @@ def _copy_file_from_local_to_fs(local_path: str, dest_path: str) -> None:
         session.copyFromLocalToFs(local_path, dest_path)
     else:
         jvm = session.sparkContext._gateway.jvm  # type: ignore[union-attr]
-        jvm.org.apache.spark.ml.python.MLUtil.copyFileFromLocalToFs(local_path, dest_path)
+        getattr(jvm, "org.apache.spark.ml.python.MLUtil").copyFileFromLocalToFs(
+            local_path, dest_path
+        )
 
 
 def _copy_dir_from_local_to_fs(local_path: str, dest_path: str) -> None:
@@ -74,7 +76,7 @@ class ParamsReadWrite(Params):
 
     def _get_extra_metadata(self) -> Any:
         """
-        Returns exta metadata of the instance
+        Returns extra metadata of the instance
         """
         return None
 
diff --git a/python/pyspark/ml/connect/tuning.py b/python/pyspark/ml/connect/tuning.py
index cdb606048a59a..190fc683acf7d 100644
--- a/python/pyspark/ml/connect/tuning.py
+++ b/python/pyspark/ml/connect/tuning.py
@@ -170,7 +170,7 @@ def _parallelFitTasks(
 
     if active_session is None:
         raise RuntimeError(
-            "An active SparkSession is required for running cross valiator fit tasks."
+            "An active SparkSession is required for running cross validator fit tasks."
         )
 
     def get_single_task(index: int, param_map: Any) -> Callable[[], Tuple[int, float]]:
diff --git a/python/pyspark/ml/deepspeed/deepspeed_distributor.py b/python/pyspark/ml/deepspeed/deepspeed_distributor.py
index 4ac5ff2fb4207..3fd1d3bb32463 100644
--- a/python/pyspark/ml/deepspeed/deepspeed_distributor.py
+++ b/python/pyspark/ml/deepspeed/deepspeed_distributor.py
@@ -49,7 +49,7 @@ def __init__(
         Parameters
         ----------
         numGpus: int
-            The number of GPUs to use per node (analagous to num_gpus in deepspeed command).
+            The number of GPUs to use per node (analogous to num_gpus in deepspeed command).
         nnodes: int
             The number of nodes that should be used for the run.
         localMode: bool
diff --git a/python/pyspark/ml/dl_util.py b/python/pyspark/ml/dl_util.py
index 8ead529d7b729..3b87049ef2777 100644
--- a/python/pyspark/ml/dl_util.py
+++ b/python/pyspark/ml/dl_util.py
@@ -27,7 +27,7 @@ class FunctionPickler:
     This class provides a way to pickle a function and its arguments.
     It also provides a way to create a script that can run a
     function with arguments if they have them pickled to a file.
-    It also provides a way of extracting the conents of a pickle file.
+    It also provides a way of extracting the contents of a pickle file.
     """
 
     @staticmethod
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index e053ea273140c..cf12a5390746f 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -1208,7 +1208,7 @@ def from_vocabulary(
 
         sc = SparkContext._active_spark_context
         assert sc is not None and sc._gateway is not None
-        java_class = sc._gateway.jvm.java.lang.String
+        java_class = getattr(sc._gateway.jvm, "java.lang.String")
         jvocab = CountVectorizerModel._new_java_array(vocabulary, java_class)
         model = CountVectorizerModel._create_from_java_class(
             "org.apache.spark.ml.feature.CountVectorizerModel", jvocab
@@ -4799,7 +4799,7 @@ def from_labels(
 
         sc = SparkContext._active_spark_context
         assert sc is not None and sc._gateway is not None
-        java_class = sc._gateway.jvm.java.lang.String
+        java_class = getattr(sc._gateway.jvm, "java.lang.String")
         jlabels = StringIndexerModel._new_java_array(labels, java_class)
         model = StringIndexerModel._create_from_java_class(
             "org.apache.spark.ml.feature.StringIndexerModel", jlabels
@@ -4828,7 +4828,7 @@ def from_arrays_of_labels(
 
         sc = SparkContext._active_spark_context
         assert sc is not None and sc._gateway is not None
-        java_class = sc._gateway.jvm.java.lang.String
+        java_class = getattr(sc._gateway.jvm, "java.lang.String")
         jlabels = StringIndexerModel._new_java_array(arrayOfLabels, java_class)
         model = StringIndexerModel._create_from_java_class(
             "org.apache.spark.ml.feature.StringIndexerModel", jlabels
@@ -5198,7 +5198,7 @@ def loadDefaultStopWords(language: str) -> List[str]:
         Supported languages: danish, dutch, english, finnish, french, german, hungarian,
         italian, norwegian, portuguese, russian, spanish, swedish, turkish
         """
-        stopWordsObj = _jvm().org.apache.spark.ml.feature.StopWordsRemover
+        stopWordsObj = getattr(_jvm(), "org.apache.spark.ml.feature.StopWordsRemover")
         return list(stopWordsObj.loadDefaultStopWords(language))
 
 
diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py
index 32941b33c4603..de5539afd4a1a 100644
--- a/python/pyspark/ml/functions.py
+++ b/python/pyspark/ml/functions.py
@@ -121,7 +121,9 @@ def vector_to_array(col: Column, dtype: str = "float64") -> Column:
     sc = SparkContext._active_spark_context
     assert sc is not None and sc._jvm is not None
     return Column(
-        sc._jvm.org.apache.spark.ml.functions.vector_to_array(_to_java_column(col), dtype)
+        getattr(sc._jvm, "org.apache.spark.ml.functions").vector_to_array(
+            _to_java_column(col), dtype
+        )
     )
 
 
@@ -164,7 +166,9 @@ def array_to_vector(col: Column) -> Column:
 
     sc = SparkContext._active_spark_context
     assert sc is not None and sc._jvm is not None
-    return Column(sc._jvm.org.apache.spark.ml.functions.array_to_vector(_to_java_column(col)))
+    return Column(
+        getattr(sc._jvm, "org.apache.spark.ml.functions").array_to_vector(_to_java_column(col))
+    )
 
 
 def _batched(
diff --git a/python/pyspark/ml/image.py b/python/pyspark/ml/image.py
index d0223739ffdf8..325992c085802 100644
--- a/python/pyspark/ml/image.py
+++ b/python/pyspark/ml/image.py
@@ -25,7 +25,8 @@
 """
 
 import sys
-from typing import Any, Dict, List, NoReturn, Optional, cast
+from typing import Any, Dict, List, NoReturn, cast
+from functools import cached_property
 
 import numpy as np
 
@@ -42,14 +43,7 @@ class _ImageSchema:
     APIs of this class.
     """
 
-    def __init__(self) -> None:
-        self._imageSchema: Optional[StructType] = None
-        self._ocvTypes: Optional[Dict[str, int]] = None
-        self._columnSchema: Optional[StructType] = None
-        self._imageFields: Optional[List[str]] = None
-        self._undefinedImageType: Optional[str] = None
-
-    @property
+    @cached_property
     def imageSchema(self) -> StructType:
         """
         Returns the image schema.
@@ -64,14 +58,12 @@ def imageSchema(self) -> StructType:
         """
         from pyspark.core.context import SparkContext
 
-        if self._imageSchema is None:
-            ctx = SparkContext._active_spark_context
-            assert ctx is not None and ctx._jvm is not None
-            jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.imageSchema()
-            self._imageSchema = cast(StructType, _parse_datatype_json_string(jschema.json()))
-        return self._imageSchema
+        ctx = SparkContext._active_spark_context
+        assert ctx is not None and ctx._jvm is not None
+        jschema = getattr(ctx._jvm, "org.apache.spark.ml.image.ImageSchema").imageSchema()
+        return cast(StructType, _parse_datatype_json_string(jschema.json()))
 
-    @property
+    @cached_property
     def ocvTypes(self) -> Dict[str, int]:
         """
         Returns the OpenCV type mapping supported.
@@ -85,13 +77,11 @@ def ocvTypes(self) -> Dict[str, int]:
         """
         from pyspark.core.context import SparkContext
 
-        if self._ocvTypes is None:
-            ctx = SparkContext._active_spark_context
-            assert ctx is not None and ctx._jvm is not None
-            self._ocvTypes = dict(ctx._jvm.org.apache.spark.ml.image.ImageSchema.javaOcvTypes())
-        return self._ocvTypes
+        ctx = SparkContext._active_spark_context
+        assert ctx is not None and ctx._jvm is not None
+        return dict(getattr(ctx._jvm, "org.apache.spark.ml.image.ImageSchema").javaOcvTypes())
 
-    @property
+    @cached_property
     def columnSchema(self) -> StructType:
         """
         Returns the schema for the image column.
@@ -106,14 +96,12 @@ def columnSchema(self) -> StructType:
         """
         from pyspark.core.context import SparkContext
 
-        if self._columnSchema is None:
-            ctx = SparkContext._active_spark_context
-            assert ctx is not None and ctx._jvm is not None
-            jschema = ctx._jvm.org.apache.spark.ml.image.ImageSchema.columnSchema()
-            self._columnSchema = cast(StructType, _parse_datatype_json_string(jschema.json()))
-        return self._columnSchema
+        ctx = SparkContext._active_spark_context
+        assert ctx is not None and ctx._jvm is not None
+        jschema = getattr(ctx._jvm, "org.apache.spark.ml.image.ImageSchema").columnSchema()
+        return cast(StructType, _parse_datatype_json_string(jschema.json()))
 
-    @property
+    @cached_property
     def imageFields(self) -> List[str]:
         """
         Returns field names of image columns.
@@ -127,13 +115,11 @@ def imageFields(self) -> List[str]:
         """
         from pyspark.core.context import SparkContext
 
-        if self._imageFields is None:
-            ctx = SparkContext._active_spark_context
-            assert ctx is not None and ctx._jvm is not None
-            self._imageFields = list(ctx._jvm.org.apache.spark.ml.image.ImageSchema.imageFields())
-        return self._imageFields
+        ctx = SparkContext._active_spark_context
+        assert ctx is not None and ctx._jvm is not None
+        return list(getattr(ctx._jvm, "org.apache.spark.ml.image.ImageSchema").imageFields())
 
-    @property
+    @cached_property
     def undefinedImageType(self) -> str:
         """
         Returns the name of undefined image type for the invalid image.
@@ -142,13 +128,9 @@ def undefinedImageType(self) -> str:
         """
         from pyspark.core.context import SparkContext
 
-        if self._undefinedImageType is None:
-            ctx = SparkContext._active_spark_context
-            assert ctx is not None and ctx._jvm is not None
-            self._undefinedImageType = (
-                ctx._jvm.org.apache.spark.ml.image.ImageSchema.undefinedImageType()
-            )
-        return self._undefinedImageType
+        ctx = SparkContext._active_spark_context
+        assert ctx is not None and ctx._jvm is not None
+        return getattr(ctx._jvm, "org.apache.spark.ml.image.ImageSchema").undefinedImageType()
 
     def toNDArray(self, image: Row) -> np.ndarray:
         """
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index 01339283839e1..0ffacde3bb423 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -207,7 +207,7 @@ def _to_java(self) -> "JavaObject":
         gateway = SparkContext._gateway
         assert gateway is not None and SparkContext._jvm is not None
 
-        cls = SparkContext._jvm.org.apache.spark.ml.PipelineStage
+        cls = getattr(SparkContext._jvm, "org.apache.spark.ml.PipelineStage")
         java_stages = gateway.new_array(cls, len(self.getStages()))
         for idx, stage in enumerate(self.getStages()):
             java_stages[idx] = cast(JavaParams, stage)._to_java()
@@ -361,7 +361,7 @@ def _to_java(self) -> "JavaObject":
         gateway = SparkContext._gateway
         assert gateway is not None and SparkContext._jvm is not None
 
-        cls = SparkContext._jvm.org.apache.spark.ml.Transformer
+        cls = getattr(SparkContext._jvm, "org.apache.spark.ml.Transformer")
         java_stages = gateway.new_array(cls, len(self.stages))
         for idx, stage in enumerate(self.stages):
             java_stages[idx] = cast(JavaParams, stage)._to_java()
diff --git a/python/pyspark/ml/remote/__init__.py b/python/pyspark/ml/remote/__init__.py
new file mode 100644
index 0000000000000..cce3acad34a49
--- /dev/null
+++ b/python/pyspark/ml/remote/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pyspark/ml/remote/proto.py b/python/pyspark/ml/remote/proto.py
new file mode 100644
index 0000000000000..3a81e74b6aec3
--- /dev/null
+++ b/python/pyspark/ml/remote/proto.py
@@ -0,0 +1,76 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Optional, TYPE_CHECKING, List
+
+import pyspark.sql.connect.proto as pb2
+from pyspark.sql.connect.plan import LogicalPlan
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect.client import SparkConnectClient
+
+
+class TransformerRelation(LogicalPlan):
+    """A logical plan for transforming of a transformer which could be a cached model
+    or a non-model transformer like VectorAssembler."""
+
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        name: str,
+        ml_params: pb2.MlParams,
+        uid: str = "",
+        is_model: bool = True,
+    ) -> None:
+        super().__init__(child)
+        self._name = name
+        self._ml_params = ml_params
+        self._uid = uid
+        self._is_model = is_model
+
+    def plan(self, session: "SparkConnectClient") -> pb2.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.ml_relation.transform.input.CopyFrom(self._child.plan(session))
+
+        if self._is_model:
+            plan.ml_relation.transform.obj_ref.CopyFrom(pb2.ObjectRef(id=self._name))
+        else:
+            plan.ml_relation.transform.transformer.CopyFrom(
+                pb2.MlOperator(name=self._name, uid=self._uid, type=pb2.MlOperator.TRANSFORMER)
+            )
+
+        if self._ml_params is not None:
+            plan.ml_relation.transform.params.CopyFrom(self._ml_params)
+
+        return plan
+
+
+class AttributeRelation(LogicalPlan):
+    """A logical plan used in ML to represent an attribute of an instance, which
+    could be a model or a summary. This attribute returns a DataFrame.
+    """
+
+    def __init__(self, ref_id: str, methods: List[pb2.Fetch.Method]) -> None:
+        super().__init__(None)
+        self._ref_id = ref_id
+        self._methods = methods
+
+    def plan(self, session: "SparkConnectClient") -> pb2.Relation:
+        plan = self._create_proto_relation()
+        plan.ml_relation.fetch.obj_ref.CopyFrom(pb2.ObjectRef(id=self._ref_id))
+        plan.ml_relation.fetch.methods.extend(self._methods)
+        return plan
diff --git a/python/pyspark/ml/remote/readwrite.py b/python/pyspark/ml/remote/readwrite.py
new file mode 100644
index 0000000000000..9149ab3bfd454
--- /dev/null
+++ b/python/pyspark/ml/remote/readwrite.py
@@ -0,0 +1,134 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import cast, Type, TYPE_CHECKING
+
+import pyspark.sql.connect.proto as pb2
+from pyspark.ml.remote.serialize import serialize_ml_params, deserialize, deserialize_param
+from pyspark.ml.util import MLWriter, MLReader, RL
+from pyspark.ml.wrapper import JavaWrapper
+
+if TYPE_CHECKING:
+    from pyspark.ml.util import JavaMLReadable, JavaMLWritable
+    from pyspark.core.context import SparkContext
+
+
+class RemoteMLWriter(MLWriter):
+    def __init__(self, instance: "JavaMLWritable") -> None:
+        super().__init__()
+        self._instance = instance
+
+    @property
+    def sc(self) -> "SparkContext":
+        raise RuntimeError("Accessing SparkContext is not supported on Connect")
+
+    def save(self, path: str) -> None:
+        from pyspark.ml.wrapper import JavaModel, JavaEstimator
+        from pyspark.sql.connect.session import SparkSession
+
+        session = SparkSession.getActiveSession()
+        assert session is not None
+
+        # Spark Connect ML is built on scala Spark.ML, that means we're only
+        # supporting JavaModel or JavaEstimator or JavaEvaluator
+        if isinstance(self._instance, JavaModel):
+            model = cast("JavaModel", self._instance)
+            params = serialize_ml_params(model, session.client)
+            assert isinstance(model._java_obj, str)
+            writer = pb2.MlCommand.Write(
+                obj_ref=pb2.ObjectRef(id=model._java_obj),
+                params=params,
+                path=path,
+                should_overwrite=self.shouldOverwrite,
+                options=self.optionMap,
+            )
+        elif isinstance(self._instance, JavaEstimator):
+            estimator = cast("JavaEstimator", self._instance)
+            params = serialize_ml_params(estimator, session.client)
+            assert isinstance(estimator._java_obj, str)
+            writer = pb2.MlCommand.Write(
+                operator=pb2.MlOperator(
+                    name=estimator._java_obj, uid=estimator.uid, type=pb2.MlOperator.ESTIMATOR
+                ),
+                params=params,
+                path=path,
+                should_overwrite=self.shouldOverwrite,
+                options=self.optionMap,
+            )
+        else:
+            raise NotImplementedError(f"Unsupported writing for {self._instance}")
+
+        command = pb2.Command()
+        command.ml_command.write.CopyFrom(writer)
+        session.client.execute_command(command)
+
+
+class RemoteMLReader(MLReader[RL]):
+    def __init__(self, clazz: Type["JavaMLReadable[RL]"]) -> None:
+        super().__init__()
+        self._clazz = clazz
+
+    def load(self, path: str) -> RL:
+        from pyspark.sql.connect.session import SparkSession
+        from pyspark.ml.wrapper import JavaModel, JavaEstimator
+
+        session = SparkSession.getActiveSession()
+        assert session is not None
+        # to get the java corresponding qualified class name
+        java_qualified_class_name = (
+            self._clazz.__module__.replace("pyspark", "org.apache.spark")
+            + "."
+            + self._clazz.__name__
+        )
+
+        if issubclass(self._clazz, JavaModel):
+            ml_type = pb2.MlOperator.MODEL
+        elif issubclass(self._clazz, JavaEstimator):
+            ml_type = pb2.MlOperator.ESTIMATOR
+        else:
+            raise ValueError(f"Unsupported reading for {java_qualified_class_name}")
+
+        command = pb2.Command()
+        command.ml_command.read.CopyFrom(
+            pb2.MlCommand.Read(
+                operator=pb2.MlOperator(name=java_qualified_class_name, type=ml_type), path=path
+            )
+        )
+        (_, properties, _) = session.client.execute_command(command)
+        result = deserialize(properties)
+
+        # Get the python type
+        def _get_class() -> Type[RL]:
+            parts = (self._clazz.__module__ + "." + self._clazz.__name__).split(".")
+            module = ".".join(parts[:-1])
+            m = __import__(module, fromlist=[parts[-1]])
+            return getattr(m, parts[-1])
+
+        py_type = _get_class()
+        # It must be JavaWrapper, since we're passing the string to the _java_obj
+        if issubclass(py_type, JavaWrapper):
+            if ml_type == pb2.MlOperator.MODEL:
+                session.client.add_ml_cache(result.obj_ref.id)
+                instance = py_type(result.obj_ref.id)
+            else:
+                instance = py_type()
+            instance._resetUid(result.uid)
+            params = {k: deserialize_param(v) for k, v in result.params.params.items()}
+            instance._set(**params)
+            return instance
+        else:
+            raise RuntimeError(f"Unsupported class {self._clazz}")
diff --git a/python/pyspark/ml/remote/serialize.py b/python/pyspark/ml/remote/serialize.py
new file mode 100644
index 0000000000000..69e3af1f4c787
--- /dev/null
+++ b/python/pyspark/ml/remote/serialize.py
@@ -0,0 +1,132 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Any, List, TYPE_CHECKING, Mapping, Dict
+
+import pyspark.sql.connect.proto as pb2
+from pyspark.ml.linalg import (
+    Vectors,
+    Matrices,
+    DenseVector,
+    SparseVector,
+    DenseMatrix,
+    SparseMatrix,
+)
+from pyspark.sql.connect.expressions import LiteralExpression
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect.client import SparkConnectClient
+    from pyspark.ml.param import Params
+
+
+def serialize_param(value: Any, client: "SparkConnectClient") -> pb2.Param:
+    if isinstance(value, DenseVector):
+        return pb2.Param(vector=pb2.Vector(dense=pb2.Vector.Dense(value=value.values.tolist())))
+    elif isinstance(value, SparseVector):
+        return pb2.Param(
+            vector=pb2.Vector(
+                sparse=pb2.Vector.Sparse(
+                    size=value.size, index=value.indices.tolist(), value=value.values.tolist()
+                )
+            )
+        )
+    elif isinstance(value, DenseMatrix):
+        return pb2.Param(
+            matrix=pb2.Matrix(
+                dense=pb2.Matrix.Dense(
+                    num_rows=value.numRows, num_cols=value.numCols, value=value.values.tolist()
+                )
+            )
+        )
+    elif isinstance(value, SparseMatrix):
+        return pb2.Param(
+            matrix=pb2.Matrix(
+                sparse=pb2.Matrix.Sparse(
+                    num_rows=value.numRows,
+                    num_cols=value.numCols,
+                    colptr=value.colPtrs.tolist(),
+                    row_index=value.rowIndices.tolist(),
+                    value=value.values.tolist(),
+                )
+            )
+        )
+    else:
+        literal = LiteralExpression._from_value(value).to_plan(client).literal
+        return pb2.Param(literal=literal)
+
+
+def serialize(client: "SparkConnectClient", *args: Any) -> List[Any]:
+    from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
+
+    result = []
+    for arg in args:
+        if isinstance(arg, ConnectDataFrame):
+            result.append(pb2.Fetch.Method.Args(input=arg._plan.plan(client)))
+        else:
+            result.append(pb2.Fetch.Method.Args(param=serialize_param(arg, client)))
+    return result
+
+
+def deserialize_param(param: pb2.Param) -> Any:
+    if param.HasField("literal"):
+        return LiteralExpression._to_value(param.literal)
+    if param.HasField("vector"):
+        vector = param.vector
+        if vector.HasField("dense"):
+            return Vectors.dense(vector.dense.value)
+        elif vector.HasField("sparse"):
+            return Vectors.sparse(vector.sparse.size, vector.sparse.index, vector.sparse.value)
+        else:
+            raise ValueError("Unsupported vector type")
+    if param.HasField("matrix"):
+        matrix = param.matrix
+        if matrix.HasField("dense"):
+            return DenseMatrix(
+                matrix.dense.num_rows,
+                matrix.dense.num_cols,
+                matrix.dense.value,
+                matrix.dense.is_transposed,
+            )
+        elif matrix.HasField("sparse"):
+            return Matrices.sparse(
+                matrix.sparse.num_rows,
+                matrix.sparse.num_cols,
+                matrix.sparse.colptr,
+                matrix.sparse.row_index,
+                matrix.sparse.value,
+            )
+        else:
+            raise ValueError("Unsupported matrix type")
+
+    raise ValueError("Unsupported param type")
+
+
+def deserialize(ml_command_result_properties: Dict[str, Any]) -> Any:
+    ml_command_result = ml_command_result_properties["ml_command_result"]
+    if ml_command_result.HasField("operator_info"):
+        return ml_command_result.operator_info
+
+    if ml_command_result.HasField("param"):
+        return deserialize_param(ml_command_result.param)
+
+    raise ValueError("Unsupported result type")
+
+
+def serialize_ml_params(instance: "Params", client: "SparkConnectClient") -> pb2.MlParams:
+    params: Mapping[str, pb2.Param] = {
+        k.name: serialize_param(v, client) for k, v in instance._paramMap.items()
+    }
+    return pb2.MlParams(params=params)
diff --git a/python/pyspark/ml/remote/util.py b/python/pyspark/ml/remote/util.py
new file mode 100644
index 0000000000000..cb34dae165863
--- /dev/null
+++ b/python/pyspark/ml/remote/util.py
@@ -0,0 +1,293 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import functools
+import os
+from typing import Any, cast, TypeVar, Callable, TYPE_CHECKING, Type, List, Tuple
+
+import pyspark.sql.connect.proto as pb2
+from pyspark.ml.remote.serialize import serialize_ml_params, serialize, deserialize
+from pyspark.sql import is_remote
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
+    from pyspark.ml.wrapper import JavaWrapper, JavaEstimator
+    from pyspark.ml.util import JavaMLReadable, JavaMLWritable
+
+FuncT = TypeVar("FuncT", bound=Callable[..., Any])
+
+
+def _extract_id_methods(obj_identifier: str) -> Tuple[List[pb2.Fetch.Method], str]:
+    """Extract the obj reference id and the methods. Eg, model.summary"""
+    method_chain = obj_identifier.split(".")
+    obj_ref = method_chain[0]
+    methods: List[pb2.Fetch.Method] = []
+    if len(method_chain) > 1:
+        methods = [pb2.Fetch.Method(method=m) for m in method_chain[1:]]
+    return methods, obj_ref
+
+
+def try_remote_intermediate_result(f: FuncT) -> FuncT:
+    """Mark the function/property that returns the intermediate result of the remote call.
+    Eg, model.summary"""
+
+    @functools.wraps(f)
+    def wrapped(self: "JavaWrapper") -> Any:
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            return f"{self._java_obj}.{f.__name__}"
+        else:
+            return f(self)
+
+    return cast(FuncT, wrapped)
+
+
+def try_remote_attribute_relation(f: FuncT) -> FuncT:
+    """Mark the function/property that returns a Relation.
+    Eg, model.summary.roc"""
+
+    @functools.wraps(f)
+    def wrapped(self: "JavaWrapper", *args: Any, **kwargs: Any) -> Any:
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            # The attribute returns a dataframe, we need to wrap it
+            # in the AttributeRelation
+            from pyspark.ml.remote.proto import AttributeRelation
+            from pyspark.sql.connect.session import SparkSession
+            from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
+
+            session = SparkSession.getActiveSession()
+            assert session is not None
+
+            assert isinstance(self._java_obj, str)
+
+            methods, obj_ref = _extract_id_methods(self._java_obj)
+            methods.append(
+                pb2.Fetch.Method(method=f.__name__, args=serialize(session.client, *args))
+            )
+            plan = AttributeRelation(obj_ref, methods)
+            return ConnectDataFrame(plan, session)
+        else:
+            return f(self, *args, **kwargs)
+
+    return cast(FuncT, wrapped)
+
+
+def try_remote_fit(f: FuncT) -> FuncT:
+    """Mark the function that fits a model."""
+
+    @functools.wraps(f)
+    def wrapped(self: "JavaEstimator", dataset: "ConnectDataFrame") -> Any:
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            client = dataset.sparkSession.client
+            input = dataset._plan.plan(client)
+            assert isinstance(self._java_obj, str)
+            estimator = pb2.MlOperator(
+                name=self._java_obj, uid=self.uid, type=pb2.MlOperator.ESTIMATOR
+            )
+            command = pb2.Command()
+            command.ml_command.fit.CopyFrom(
+                pb2.MlCommand.Fit(
+                    estimator=estimator,
+                    params=serialize_ml_params(self, client),
+                    dataset=input,
+                )
+            )
+            (_, properties, _) = client.execute_command(command)
+            model_info = deserialize(properties)
+            client.add_ml_cache(model_info.obj_ref.id)
+            return model_info.obj_ref.id
+        else:
+            return f(self, dataset)
+
+    return cast(FuncT, wrapped)
+
+
+def try_remote_transform_relation(f: FuncT) -> FuncT:
+    """Mark the function/property that returns a relation for model transform."""
+
+    @functools.wraps(f)
+    def wrapped(self: "JavaWrapper", dataset: "ConnectDataFrame") -> Any:
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            from pyspark.ml import Model, Transformer
+            from pyspark.sql.connect.session import SparkSession
+            from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
+
+            session = SparkSession.getActiveSession()
+            assert session is not None
+            # Model is also a Transformer, so we much match Model first
+            if isinstance(self, Model):
+                params = serialize_ml_params(self, session.client)
+                from pyspark.ml.remote.proto import TransformerRelation
+
+                assert isinstance(self._java_obj, str)
+                return ConnectDataFrame(
+                    TransformerRelation(
+                        child=dataset._plan, name=self._java_obj, ml_params=params, is_model=True
+                    ),
+                    session,
+                )
+            elif isinstance(self, Transformer):
+                params = serialize_ml_params(self, session.client)
+                from pyspark.ml.remote.proto import TransformerRelation
+
+                assert isinstance(self._java_obj, str)
+                return ConnectDataFrame(
+                    TransformerRelation(
+                        child=dataset._plan,
+                        name=self._java_obj,
+                        ml_params=params,
+                        uid=self.uid,
+                        is_model=False,
+                    ),
+                    session,
+                )
+            else:
+                raise RuntimeError(f"Unsupported {self}")
+        else:
+            return f(self, dataset)
+
+    return cast(FuncT, wrapped)
+
+
+def try_remote_call(f: FuncT) -> FuncT:
+    """Mark the function/property for the remote call.
+    Eg, model.coefficients"""
+
+    @functools.wraps(f)
+    def wrapped(self: "JavaWrapper", name: str, *args: Any) -> Any:
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            """Launch a remote call if possible"""
+            from pyspark.sql.connect.session import SparkSession
+
+            session = SparkSession.getActiveSession()
+            assert session is not None
+            assert isinstance(self._java_obj, str)
+            methods, obj_ref = _extract_id_methods(self._java_obj)
+            methods.append(pb2.Fetch.Method(method=name, args=serialize(session.client, *args)))
+            command = pb2.Command()
+            command.ml_command.fetch.CopyFrom(
+                pb2.Fetch(obj_ref=pb2.ObjectRef(id=obj_ref), methods=methods)
+            )
+            (_, properties, _) = session.client.execute_command(command)
+            ml_command_result = properties["ml_command_result"]
+            if ml_command_result.HasField("summary"):
+                summary = ml_command_result.summary
+                session.client.add_ml_cache(summary)
+                return summary
+            else:
+                return deserialize(properties)
+        else:
+            return f(self, name, *args)
+
+    return cast(FuncT, wrapped)
+
+
+def try_remote_del(f: FuncT) -> FuncT:
+    """Mark the function/property to delete a model on the server side."""
+
+    @functools.wraps(f)
+    def wrapped(self: "JavaWrapper") -> Any:
+        try:
+            in_remote = is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ
+        except Exception:
+            return
+
+        if in_remote:
+            # Delete the model if possible
+            model_id = self._java_obj
+            if model_id is not None and "." not in model_id:
+                try:
+                    from pyspark.sql.connect.session import SparkSession
+
+                    session = SparkSession.getActiveSession()
+                    if session is not None:
+                        session.client.remove_ml_cache(model_id)
+                        return
+                except Exception:
+                    # SparkSession's down.
+                    return
+        else:
+            return f(self)
+
+    return cast(FuncT, wrapped)
+
+
+def try_remote_return_java_class(f: FuncT) -> FuncT:
+    """Mark the function/property that returns none."""
+
+    @functools.wraps(f)
+    def wrapped(java_class: str, *args: Any) -> Any:
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            return java_class
+        else:
+            return f(java_class, *args)
+
+    return cast(FuncT, wrapped)
+
+
+def try_remote_write(f: FuncT) -> FuncT:
+    """Mark the function that write an estimator/model or evaluator"""
+
+    @functools.wraps(f)
+    def wrapped(self: "JavaMLWritable") -> Any:
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            from pyspark.ml.remote.readwrite import RemoteMLWriter
+
+            return RemoteMLWriter(self)
+        else:
+            return f(self)
+
+    return cast(FuncT, wrapped)
+
+
+def try_remote_read(f: FuncT) -> FuncT:
+    """Mark the function to read an estimator/model or evaluator"""
+
+    @functools.wraps(f)
+    def wrapped(cls: Type["JavaMLReadable"]) -> Any:
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            from pyspark.ml.remote.readwrite import RemoteMLReader
+
+            return RemoteMLReader(cls)
+        else:
+            return f(cls)
+
+    return cast(FuncT, wrapped)
+
+
+def try_remote_intercept(f: FuncT) -> FuncT:
+    """Mark the function/property that returns none."""
+
+    @functools.wraps(f)
+    def wrapped(java_class: str, *args: Any) -> Any:
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            return None
+        else:
+            return f(java_class, *args)
+
+    return cast(FuncT, wrapped)
+
+
+def try_remote_not_supporting(f: FuncT) -> FuncT:
+    """Mark the function/property that has not been supported yet"""
+
+    @functools.wraps(f)
+    def wrapped(*args: Any) -> Any:
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            raise NotImplementedError("")
+        else:
+            return f(*args)
+
+    return cast(FuncT, wrapped)
diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index 4dcc961909520..04b0c7278a717 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -107,7 +107,7 @@ def test(
         sc = SparkContext._active_spark_context
         assert sc is not None
 
-        javaTestObj = _jvm().org.apache.spark.ml.stat.ChiSquareTest
+        javaTestObj = getattr(_jvm(), "org.apache.spark.ml.stat.ChiSquareTest")
         args = [_py2java(sc, arg) for arg in (dataset, featuresCol, labelCol, flatten)]
         return _java2py(sc, javaTestObj.test(*args))
 
@@ -178,7 +178,7 @@ def corr(dataset: DataFrame, column: str, method: str = "pearson") -> DataFrame:
         sc = SparkContext._active_spark_context
         assert sc is not None
 
-        javaCorrObj = _jvm().org.apache.spark.ml.stat.Correlation
+        javaCorrObj = getattr(_jvm(), "org.apache.spark.ml.stat.Correlation")
         args = [_py2java(sc, arg) for arg in (dataset, column, method)]
         return _java2py(sc, javaCorrObj.corr(*args))
 
@@ -248,7 +248,7 @@ def test(dataset: DataFrame, sampleCol: str, distName: str, *params: float) -> D
         sc = SparkContext._active_spark_context
         assert sc is not None
 
-        javaTestObj = _jvm().org.apache.spark.ml.stat.KolmogorovSmirnovTest
+        javaTestObj = getattr(_jvm(), "org.apache.spark.ml.stat.KolmogorovSmirnovTest")
         dataset = _py2java(sc, dataset)
         params = [float(param) for param in params]  # type: ignore[assignment]
         return _java2py(
diff --git a/python/pyspark/ml/tests/connect/test_connect_function.py b/python/pyspark/ml/tests/connect/test_connect_function.py
index 393d38fdc426a..7d3a115ab0619 100644
--- a/python/pyspark/ml/tests/connect/test_connect_function.py
+++ b/python/pyspark/ml/tests/connect/test_connect_function.py
@@ -43,7 +43,7 @@ def setUpClass(cls):
         # Disable the shared namespace so pyspark.sql.functions, etc point the regular
         # PySpark libraries.
         os.environ["PYSPARK_NO_NAMESPACE_SHARE"] = "1"
-        cls.connect = cls.spark  # Switch Spark Connect session and regular PySpark sesion.
+        cls.connect = cls.spark  # Switch Spark Connect session and regular PySpark session.
         cls.spark = PySparkSession._instantiatedSession
         assert cls.spark is not None
 
diff --git a/python/pyspark/ml/tests/connect/test_connect_spark_ml_classification.py b/python/pyspark/ml/tests/connect/test_connect_spark_ml_classification.py
new file mode 100644
index 0000000000000..2000a38d9e616
--- /dev/null
+++ b/python/pyspark/ml/tests/connect/test_connect_spark_ml_classification.py
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import unittest
+
+from pyspark.ml.tests.test_classification import ClassificationTestsMixin
+from pyspark.sql import SparkSession
+
+
+class ClassificationTestsOnConnect(ClassificationTestsMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        self.spark = SparkSession.builder.remote(
+            os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")
+        ).getOrCreate()
+
+    def test_assert_remote_mode(self):
+        from pyspark.sql import is_remote
+
+        self.assertTrue(is_remote())
+
+    def tearDown(self) -> None:
+        self.spark.stop()
+
+
+if __name__ == "__main__":
+    from pyspark.ml.tests.connect.test_connect_spark_ml_classification import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py
index eeb342c4238dd..d0e2600a9a8b3 100644
--- a/python/pyspark/ml/tests/test_algorithms.py
+++ b/python/pyspark/ml/tests/test_algorithms.py
@@ -29,93 +29,13 @@
 )
 from pyspark.ml.clustering import DistributedLDAModel, KMeans, LocalLDAModel, LDA, LDAModel
 from pyspark.ml.fpm import FPGrowth
-from pyspark.ml.linalg import Matrices, Vectors, DenseVector
+from pyspark.ml.linalg import Vectors, DenseVector
 from pyspark.ml.recommendation import ALS
 from pyspark.ml.regression import GeneralizedLinearRegression, LinearRegression
 from pyspark.sql import Row
 from pyspark.testing.mlutils import SparkSessionTestCase
 
 
-class LogisticRegressionTest(SparkSessionTestCase):
-    def test_binomial_logistic_regression_with_bound(self):
-        df = self.spark.createDataFrame(
-            [
-                (1.0, 1.0, Vectors.dense(0.0, 5.0)),
-                (0.0, 2.0, Vectors.dense(1.0, 2.0)),
-                (1.0, 3.0, Vectors.dense(2.0, 1.0)),
-                (0.0, 4.0, Vectors.dense(3.0, 3.0)),
-            ],
-            ["label", "weight", "features"],
-        )
-
-        lor = LogisticRegression(
-            regParam=0.01,
-            weightCol="weight",
-            lowerBoundsOnCoefficients=Matrices.dense(1, 2, [-1.0, -1.0]),
-            upperBoundsOnIntercepts=Vectors.dense(0.0),
-        )
-        model = lor.fit(df)
-        self.assertTrue(np.allclose(model.coefficients.toArray(), [-0.2944, -0.0484], atol=1e-4))
-        self.assertTrue(np.isclose(model.intercept, 0.0, atol=1e-4))
-
-    def test_multinomial_logistic_regression_with_bound(self):
-        data_path = "data/mllib/sample_multiclass_classification_data.txt"
-        df = self.spark.read.format("libsvm").load(data_path)
-
-        lor = LogisticRegression(
-            regParam=0.01,
-            lowerBoundsOnCoefficients=Matrices.dense(3, 4, range(12)),
-            upperBoundsOnIntercepts=Vectors.dense(0.0, 0.0, 0.0),
-        )
-        model = lor.fit(df)
-        expected = [
-            [4.593, 4.5516, 9.0099, 12.2904],
-            [1.0, 8.1093, 7.0, 10.0],
-            [3.041, 5.0, 8.0, 11.0],
-        ]
-        for i in range(0, len(expected)):
-            self.assertTrue(
-                np.allclose(model.coefficientMatrix.toArray()[i], expected[i], atol=1e-4)
-            )
-        self.assertTrue(
-            np.allclose(model.interceptVector.toArray(), [-0.9057, -1.1392, -0.0033], atol=1e-4)
-        )
-
-    def test_logistic_regression_with_threshold(self):
-        df = self.spark.createDataFrame(
-            [
-                (1.0, 1.0, Vectors.dense(0.0, 5.0)),
-                (0.0, 2.0, Vectors.dense(1.0, 2.0)),
-                (1.0, 3.0, Vectors.dense(2.0, 1.0)),
-                (0.0, 4.0, Vectors.dense(3.0, 3.0)),
-            ],
-            ["label", "weight", "features"],
-        )
-
-        lor = LogisticRegression(weightCol="weight")
-        model = lor.fit(df)
-
-        # status changes 1
-        for t in [0.0, 0.1, 0.2, 0.5, 1.0]:
-            model.setThreshold(t).transform(df)
-
-        # status changes 2
-        [model.setThreshold(t).predict(Vectors.dense(0.0, 5.0)) for t in [0.0, 0.1, 0.2, 0.5, 1.0]]
-
-        self.assertEqual(
-            [row.prediction for row in model.setThreshold(0.0).transform(df).collect()],
-            [1.0, 1.0, 1.0, 1.0],
-        )
-        self.assertEqual(
-            [row.prediction for row in model.setThreshold(0.5).transform(df).collect()],
-            [0.0, 1.0, 1.0, 0.0],
-        )
-        self.assertEqual(
-            [row.prediction for row in model.setThreshold(1.0).transform(df).collect()],
-            [0.0, 0.0, 0.0, 0.0],
-        )
-
-
 class MultilayerPerceptronClassifierTest(SparkSessionTestCase):
     def test_raw_and_probability_prediction(self):
         data_path = "data/mllib/sample_multiclass_classification_data.txt"
diff --git a/python/pyspark/ml/tests/test_classification.py b/python/pyspark/ml/tests/test_classification.py
new file mode 100644
index 0000000000000..ee72e0394e3a0
--- /dev/null
+++ b/python/pyspark/ml/tests/test_classification.py
@@ -0,0 +1,304 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import tempfile
+import unittest
+from shutil import rmtree
+
+import numpy as np
+
+from pyspark.ml.linalg import Vectors, Matrices
+from pyspark.sql import SparkSession, DataFrame
+from pyspark.ml.classification import (
+    LogisticRegression,
+    LogisticRegressionModel,
+    LogisticRegressionSummary,
+    BinaryLogisticRegressionSummary,
+)
+
+
+class ClassificationTestsMixin:
+    def test_binomial_logistic_regression_with_bound(self):
+        df = self.spark.createDataFrame(
+            [
+                (1.0, 1.0, Vectors.dense(0.0, 5.0)),
+                (0.0, 2.0, Vectors.dense(1.0, 2.0)),
+                (1.0, 3.0, Vectors.dense(2.0, 1.0)),
+                (0.0, 4.0, Vectors.dense(3.0, 3.0)),
+            ],
+            ["label", "weight", "features"],
+        )
+
+        lor = LogisticRegression(
+            regParam=0.01,
+            weightCol="weight",
+            lowerBoundsOnCoefficients=Matrices.dense(1, 2, [-1.0, -1.0]),
+            upperBoundsOnIntercepts=Vectors.dense(0.0),
+        )
+        lor_model = lor.fit(df)
+
+        def check_result(model: LogisticRegressionModel) -> None:
+            self.assertTrue(
+                np.allclose(model.coefficients.toArray(), [-0.2944, -0.0484], atol=1e-4)
+            )
+            self.assertTrue(np.isclose(model.intercept, 0.0, atol=1e-4))
+
+        check_result(lor_model)
+
+        # Model save
+        with tempfile.TemporaryDirectory(prefix="model_save") as tmp_dir:
+            local_path = os.path.join(tmp_dir, "model")
+            lor_model.write().save(local_path)
+            loaded_model = LogisticRegressionModel.load(local_path)
+            check_result(loaded_model)
+
+    def test_multinomial_logistic_regression_with_bound(self):
+        data_path = "data/mllib/sample_multiclass_classification_data.txt"
+        df = self.spark.read.format("libsvm").load(data_path)
+
+        lor = LogisticRegression(
+            regParam=0.01,
+            lowerBoundsOnCoefficients=Matrices.dense(3, 4, range(12)),
+            upperBoundsOnIntercepts=Vectors.dense(0.0, 0.0, 0.0),
+        )
+        lor_model = lor.fit(df)
+
+        def check_result(model: LogisticRegressionModel) -> None:
+            expected = [
+                [4.593, 4.5516, 9.0099, 12.2904],
+                [1.0, 8.1093, 7.0, 10.0],
+                [3.041, 5.0, 8.0, 11.0],
+            ]
+            for i in range(0, len(expected)):
+                self.assertTrue(
+                    np.allclose(model.coefficientMatrix.toArray()[i], expected[i], atol=1e-4)
+                )
+            self.assertTrue(
+                np.allclose(model.interceptVector.toArray(), [-0.9057, -1.1392, -0.0033], atol=1e-4)
+            )
+
+        check_result(lor_model)
+
+        # Model save
+        with tempfile.TemporaryDirectory(prefix="model_save") as tmp_dir:
+            local_path = os.path.join(tmp_dir, "model")
+            lor_model.write().save(local_path)
+            loaded_model = LogisticRegressionModel.load(local_path)
+            check_result(loaded_model)
+
+    def test_logistic_regression_with_threshold(self):
+        df = self.spark.createDataFrame(
+            [
+                (1.0, 1.0, Vectors.dense(0.0, 5.0)),
+                (0.0, 2.0, Vectors.dense(1.0, 2.0)),
+                (1.0, 3.0, Vectors.dense(2.0, 1.0)),
+                (0.0, 4.0, Vectors.dense(3.0, 3.0)),
+            ],
+            ["label", "weight", "features"],
+        )
+
+        lor = LogisticRegression(weightCol="weight")
+        model = lor.fit(df)
+
+        # status changes 1
+        for t in [0.0, 0.1, 0.2, 0.5, 1.0]:
+            model.setThreshold(t).transform(df)
+
+        # status changes 2
+        [model.setThreshold(t).predict(Vectors.dense(0.0, 5.0)) for t in [0.0, 0.1, 0.2, 0.5, 1.0]]
+
+        self.assertEqual(
+            [row.prediction for row in model.setThreshold(0.0).transform(df).collect()],
+            [1.0, 1.0, 1.0, 1.0],
+        )
+        self.assertEqual(
+            [row.prediction for row in model.setThreshold(0.5).transform(df).collect()],
+            [0.0, 1.0, 1.0, 0.0],
+        )
+        self.assertEqual(
+            [row.prediction for row in model.setThreshold(1.0).transform(df).collect()],
+            [0.0, 0.0, 0.0, 0.0],
+        )
+
+    def test_binary_logistic_regression_summary(self):
+        df = self.spark.createDataFrame(
+            [(1.0, 2.0, Vectors.dense(1.0)), (0.0, 2.0, Vectors.sparse(1, [], []))],
+            ["label", "weight", "features"],
+        )
+        lr = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight", fitIntercept=False)
+        model = lr.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        # test that api is callable and returns expected types
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.probabilityCol, "probability")
+        self.assertEqual(s.labelCol, "label")
+        self.assertEqual(s.featuresCol, "features")
+        self.assertEqual(s.predictionCol, "prediction")
+        objHist = s.objectiveHistory
+        self.assertTrue(isinstance(objHist, list) and isinstance(objHist[0], float))
+        self.assertGreater(s.totalIterations, 0)
+        self.assertTrue(isinstance(s.labels, list))
+        self.assertTrue(isinstance(s.truePositiveRateByLabel, list))
+        self.assertTrue(isinstance(s.falsePositiveRateByLabel, list))
+        self.assertTrue(isinstance(s.precisionByLabel, list))
+        self.assertTrue(isinstance(s.recallByLabel, list))
+        self.assertTrue(isinstance(s.fMeasureByLabel(), list))
+        self.assertTrue(isinstance(s.fMeasureByLabel(1.0), list))
+        self.assertTrue(isinstance(s.roc, DataFrame))
+        self.assertAlmostEqual(s.areaUnderROC, 1.0, 2)
+        self.assertTrue(isinstance(s.pr, DataFrame))
+        self.assertTrue(isinstance(s.fMeasureByThreshold, DataFrame))
+        self.assertTrue(isinstance(s.precisionByThreshold, DataFrame))
+        self.assertTrue(isinstance(s.recallByThreshold, DataFrame))
+        self.assertAlmostEqual(s.accuracy, 1.0, 2)
+        self.assertAlmostEqual(s.weightedTruePositiveRate, 1.0, 2)
+        self.assertAlmostEqual(s.weightedFalsePositiveRate, 0.0, 2)
+        self.assertAlmostEqual(s.weightedRecall, 1.0, 2)
+        self.assertAlmostEqual(s.weightedPrecision, 1.0, 2)
+        self.assertAlmostEqual(s.weightedFMeasure(), 1.0, 2)
+        self.assertAlmostEqual(s.weightedFMeasure(1.0), 1.0, 2)
+
+        # test evaluation (with training dataset) produces a summary with same values
+        # one check is enough to verify a summary is returned, Scala version runs full test
+        sameSummary = model.evaluate(df)
+        self.assertTrue(isinstance(sameSummary, BinaryLogisticRegressionSummary))
+        self.assertAlmostEqual(sameSummary.areaUnderROC, s.areaUnderROC)
+        self.assertEqual(sorted(sameSummary.predictions.collect()), sorted(s.predictions.collect()))
+
+    def test_multiclass_logistic_regression_summary(self):
+        df = self.spark.createDataFrame(
+            [
+                (1.0, 2.0, Vectors.dense(1.0)),
+                (0.0, 2.0, Vectors.sparse(1, [], [])),
+                (2.0, 2.0, Vectors.dense(2.0)),
+                (2.0, 2.0, Vectors.dense(1.9)),
+            ],
+            ["label", "weight", "features"],
+        )
+        lr = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight", fitIntercept=False)
+        model = lr.fit(df)
+        self.assertTrue(model.hasSummary)
+        s = model.summary
+        # test that api is callable and returns expected types
+        self.assertTrue(isinstance(s.predictions, DataFrame))
+        self.assertEqual(s.probabilityCol, "probability")
+        self.assertEqual(s.labelCol, "label")
+        self.assertEqual(s.featuresCol, "features")
+        self.assertEqual(s.predictionCol, "prediction")
+        objHist = s.objectiveHistory
+        self.assertTrue(isinstance(objHist, list) and isinstance(objHist[0], float))
+        self.assertGreater(s.totalIterations, 0)
+        self.assertTrue(isinstance(s.labels, list))
+        self.assertTrue(isinstance(s.truePositiveRateByLabel, list))
+        self.assertTrue(isinstance(s.falsePositiveRateByLabel, list))
+        self.assertTrue(isinstance(s.precisionByLabel, list))
+        self.assertTrue(isinstance(s.recallByLabel, list))
+        self.assertTrue(isinstance(s.fMeasureByLabel(), list))
+        self.assertTrue(isinstance(s.fMeasureByLabel(1.0), list))
+        self.assertAlmostEqual(s.accuracy, 0.75, 2)
+        self.assertAlmostEqual(s.weightedTruePositiveRate, 0.75, 2)
+        self.assertAlmostEqual(s.weightedFalsePositiveRate, 0.25, 2)
+        self.assertAlmostEqual(s.weightedRecall, 0.75, 2)
+        self.assertAlmostEqual(s.weightedPrecision, 0.583, 2)
+        self.assertAlmostEqual(s.weightedFMeasure(), 0.65, 2)
+        self.assertAlmostEqual(s.weightedFMeasure(1.0), 0.65, 2)
+
+        # test evaluation (with training dataset) produces a summary with same values
+        # one check is enough to verify a summary is returned, Scala version runs full test
+        sameSummary = model.evaluate(df)
+        self.assertTrue(isinstance(sameSummary, LogisticRegressionSummary))
+        self.assertFalse(isinstance(sameSummary, BinaryLogisticRegressionSummary))
+        self.assertAlmostEqual(sameSummary.accuracy, s.accuracy)
+
+        # We can't use sorted(s.predictions.collect()), since the DenseVector doesn't support "<"
+        self.assertEqual(
+            sameSummary.predictions.coalesce(1).sort("label", "weight", "prediction").collect(),
+            s.predictions.coalesce(1).sort("label", "weight", "prediction").collect(),
+        )
+
+    def test_logistic_regression(self):
+        # test sparse/dense vector and matrix
+        lower_intercepts = Vectors.dense([1, 2, 3, 4])
+        upper_intercepts = Vectors.sparse(4, [(1, 1.0), (3, 5.5)])
+        lower_coefficients = Matrices.dense(3, 2, [0, 1, 4, 5, 9, 10])
+        upper_coefficients = Matrices.sparse(1, 1, [0, 1], [0], [2.0])
+
+        lr = LogisticRegression(
+            maxIter=1,
+            lowerBoundsOnIntercepts=lower_intercepts,
+            upperBoundsOnIntercepts=upper_intercepts,
+            lowerBoundsOnCoefficients=lower_coefficients,
+            upperBoundsOnCoefficients=upper_coefficients,
+        )
+        path = tempfile.mkdtemp()
+        lr_path = path + "/logreg"
+        lr.save(lr_path)
+        lr2 = LogisticRegression.load(lr_path)
+        self.assertEqual(
+            lr2.uid,
+            lr2.maxIter.parent,
+            "Loaded LogisticRegression instance uid (%s) "
+            "did not match Param's uid (%s)" % (lr2.uid, lr2.maxIter.parent),
+        )
+        self.assertEqual(
+            lr._defaultParamMap[lr.maxIter],
+            lr2._defaultParamMap[lr2.maxIter],
+            "Loaded LogisticRegression instance default params did not match "
+            + "original defaults",
+        )
+        self.assertEqual(
+            lr.getLowerBoundsOnIntercepts(),
+            lr2.getLowerBoundsOnIntercepts(),
+        )
+        self.assertEqual(
+            lr.getUpperBoundsOnIntercepts(),
+            lr2.getUpperBoundsOnIntercepts(),
+        )
+        self.assertEqual(
+            lr.getLowerBoundsOnCoefficients(),
+            lr2.getLowerBoundsOnCoefficients(),
+        )
+        self.assertEqual(
+            lr.getUpperBoundsOnCoefficients(),
+            lr2.getUpperBoundsOnCoefficients(),
+        )
+        try:
+            rmtree(path)
+        except OSError:
+            pass
+
+
+class ClassificationTests(ClassificationTestsMixin, unittest.TestCase):
+    def setUp(self) -> None:
+        self.spark = SparkSession.builder.master("local[4]").getOrCreate()
+
+    def tearDown(self) -> None:
+        self.spark.stop()
+
+
+if __name__ == "__main__":
+    from pyspark.ml.tests.test_classification import *  # noqa: F401,F403
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/ml/tests/test_dl_util.py b/python/pyspark/ml/tests/test_dl_util.py
index e5e2c6bc191d8..c130cf1ff6b9d 100644
--- a/python/pyspark/ml/tests/test_dl_util.py
+++ b/python/pyspark/ml/tests/test_dl_util.py
@@ -137,7 +137,7 @@ def _are_two_files_identical(self, fpath1: str, fpath2: str) -> bool:
                 "",
             ),
             (
-                "Check if it creates the correct file with only suffix + boddy",
+                "Check if it creates the correct file with only suffix + body",
                 "",
                 "print('goodbye')",
             ),
diff --git a/python/pyspark/ml/tests/test_functions.py b/python/pyspark/ml/tests/test_functions.py
index e67e46ded67bd..7719b2b27e0ab 100644
--- a/python/pyspark/ml/tests/test_functions.py
+++ b/python/pyspark/ml/tests/test_functions.py
@@ -265,14 +265,14 @@ def predict(a, b, c):
         with self.assertRaisesRegex(Exception, "Model expected 3 inputs, but received 4 columns"):
             preds = self.df.withColumn("preds", sum_cols(*columns)).toPandas()
 
-        # muliple scalar columns with one tensor_input_shape => single numpy array
+        # multiple scalar columns with one tensor_input_shape => single numpy array
         sum_cols = predict_batch_udf(
             array_sum_fn, return_type=DoubleType(), batch_size=5, input_tensor_shapes=[[4]]
         )
         preds = self.df.withColumn("preds", sum_cols(struct(*columns))).toPandas()
         self.assertTrue(np.array_equal(np.sum(self.data, axis=1), preds["preds"].to_numpy()))
 
-        # muliple scalar columns with wrong tensor_input_shape => ERROR
+        # multiple scalar columns with wrong tensor_input_shape => ERROR
         sum_cols = predict_batch_udf(
             array_sum_fn, return_type=DoubleType(), batch_size=5, input_tensor_shapes=[[3]]
         )
diff --git a/python/pyspark/ml/tests/test_param.py b/python/pyspark/ml/tests/test_param.py
index 8df50a5963e6b..0aa9827124954 100644
--- a/python/pyspark/ml/tests/test_param.py
+++ b/python/pyspark/ml/tests/test_param.py
@@ -368,12 +368,12 @@ def test_default_params_transferred(self):
         self.assertFalse(binarizer.isSet(binarizer.outputCol))
         self.assertEqual(result[0][0], 1.0)
 
-    def test_lr_evaluate_invaild_type(self):
+    def test_lr_evaluate_invalid_type(self):
         lr = LinearRegressionModel()
         invalid_type = ""
         self.assertRaises(TypeError, lr.evaluate, invalid_type)
 
-    def test_glr_evaluate_invaild_type(self):
+    def test_glr_evaluate_invalid_type(self):
         glr = GeneralizedLinearRegressionModel()
         invalid_type = ""
         self.assertRaises(TypeError, glr.evaluate, invalid_type)
diff --git a/python/pyspark/ml/tests/test_persistence.py b/python/pyspark/ml/tests/test_persistence.py
index 406180d9a6391..481c2f236d46f 100644
--- a/python/pyspark/ml/tests/test_persistence.py
+++ b/python/pyspark/ml/tests/test_persistence.py
@@ -153,29 +153,6 @@ def test_linear_regression_pmml_basic(self):
         self.assertIn("Apache Spark", pmml_text)
         self.assertIn("PMML", pmml_text)
 
-    def test_logistic_regression(self):
-        lr = LogisticRegression(maxIter=1)
-        path = tempfile.mkdtemp()
-        lr_path = path + "/logreg"
-        lr.save(lr_path)
-        lr2 = LogisticRegression.load(lr_path)
-        self.assertEqual(
-            lr2.uid,
-            lr2.maxIter.parent,
-            "Loaded LogisticRegression instance uid (%s) "
-            "did not match Param's uid (%s)" % (lr2.uid, lr2.maxIter.parent),
-        )
-        self.assertEqual(
-            lr._defaultParamMap[lr.maxIter],
-            lr2._defaultParamMap[lr2.maxIter],
-            "Loaded LogisticRegression instance default params did not match "
-            + "original defaults",
-        )
-        try:
-            rmtree(path)
-        except OSError:
-            pass
-
     def test_kmeans(self):
         kmeans = KMeans(k=2, seed=1)
         path = tempfile.mkdtemp()
diff --git a/python/pyspark/ml/tests/test_training_summary.py b/python/pyspark/ml/tests/test_training_summary.py
index 5704d7186734f..e1c8f4197e3c7 100644
--- a/python/pyspark/ml/tests/test_training_summary.py
+++ b/python/pyspark/ml/tests/test_training_summary.py
@@ -18,14 +18,11 @@
 import unittest
 
 from pyspark.ml.classification import (
-    BinaryLogisticRegressionSummary,
     BinaryRandomForestClassificationSummary,
     FMClassifier,
     FMClassificationSummary,
     LinearSVC,
     LinearSVCSummary,
-    LogisticRegression,
-    LogisticRegressionSummary,
     MultilayerPerceptronClassifier,
     MultilayerPerceptronClassificationSummary,
     RandomForestClassificationSummary,
@@ -122,94 +119,6 @@ def test_glr_summary(self):
         sameSummary = model.evaluate(df)
         self.assertAlmostEqual(sameSummary.deviance, s.deviance)
 
-    def test_binary_logistic_regression_summary(self):
-        df = self.spark.createDataFrame(
-            [(1.0, 2.0, Vectors.dense(1.0)), (0.0, 2.0, Vectors.sparse(1, [], []))],
-            ["label", "weight", "features"],
-        )
-        lr = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight", fitIntercept=False)
-        model = lr.fit(df)
-        self.assertTrue(model.hasSummary)
-        s = model.summary
-        # test that api is callable and returns expected types
-        self.assertTrue(isinstance(s.predictions, DataFrame))
-        self.assertEqual(s.probabilityCol, "probability")
-        self.assertEqual(s.labelCol, "label")
-        self.assertEqual(s.featuresCol, "features")
-        self.assertEqual(s.predictionCol, "prediction")
-        objHist = s.objectiveHistory
-        self.assertTrue(isinstance(objHist, list) and isinstance(objHist[0], float))
-        self.assertGreater(s.totalIterations, 0)
-        self.assertTrue(isinstance(s.labels, list))
-        self.assertTrue(isinstance(s.truePositiveRateByLabel, list))
-        self.assertTrue(isinstance(s.falsePositiveRateByLabel, list))
-        self.assertTrue(isinstance(s.precisionByLabel, list))
-        self.assertTrue(isinstance(s.recallByLabel, list))
-        self.assertTrue(isinstance(s.fMeasureByLabel(), list))
-        self.assertTrue(isinstance(s.fMeasureByLabel(1.0), list))
-        self.assertTrue(isinstance(s.roc, DataFrame))
-        self.assertAlmostEqual(s.areaUnderROC, 1.0, 2)
-        self.assertTrue(isinstance(s.pr, DataFrame))
-        self.assertTrue(isinstance(s.fMeasureByThreshold, DataFrame))
-        self.assertTrue(isinstance(s.precisionByThreshold, DataFrame))
-        self.assertTrue(isinstance(s.recallByThreshold, DataFrame))
-        self.assertAlmostEqual(s.accuracy, 1.0, 2)
-        self.assertAlmostEqual(s.weightedTruePositiveRate, 1.0, 2)
-        self.assertAlmostEqual(s.weightedFalsePositiveRate, 0.0, 2)
-        self.assertAlmostEqual(s.weightedRecall, 1.0, 2)
-        self.assertAlmostEqual(s.weightedPrecision, 1.0, 2)
-        self.assertAlmostEqual(s.weightedFMeasure(), 1.0, 2)
-        self.assertAlmostEqual(s.weightedFMeasure(1.0), 1.0, 2)
-        # test evaluation (with training dataset) produces a summary with same values
-        # one check is enough to verify a summary is returned, Scala version runs full test
-        sameSummary = model.evaluate(df)
-        self.assertTrue(isinstance(sameSummary, BinaryLogisticRegressionSummary))
-        self.assertAlmostEqual(sameSummary.areaUnderROC, s.areaUnderROC)
-
-    def test_multiclass_logistic_regression_summary(self):
-        df = self.spark.createDataFrame(
-            [
-                (1.0, 2.0, Vectors.dense(1.0)),
-                (0.0, 2.0, Vectors.sparse(1, [], [])),
-                (2.0, 2.0, Vectors.dense(2.0)),
-                (2.0, 2.0, Vectors.dense(1.9)),
-            ],
-            ["label", "weight", "features"],
-        )
-        lr = LogisticRegression(maxIter=5, regParam=0.01, weightCol="weight", fitIntercept=False)
-        model = lr.fit(df)
-        self.assertTrue(model.hasSummary)
-        s = model.summary
-        # test that api is callable and returns expected types
-        self.assertTrue(isinstance(s.predictions, DataFrame))
-        self.assertEqual(s.probabilityCol, "probability")
-        self.assertEqual(s.labelCol, "label")
-        self.assertEqual(s.featuresCol, "features")
-        self.assertEqual(s.predictionCol, "prediction")
-        objHist = s.objectiveHistory
-        self.assertTrue(isinstance(objHist, list) and isinstance(objHist[0], float))
-        self.assertGreater(s.totalIterations, 0)
-        self.assertTrue(isinstance(s.labels, list))
-        self.assertTrue(isinstance(s.truePositiveRateByLabel, list))
-        self.assertTrue(isinstance(s.falsePositiveRateByLabel, list))
-        self.assertTrue(isinstance(s.precisionByLabel, list))
-        self.assertTrue(isinstance(s.recallByLabel, list))
-        self.assertTrue(isinstance(s.fMeasureByLabel(), list))
-        self.assertTrue(isinstance(s.fMeasureByLabel(1.0), list))
-        self.assertAlmostEqual(s.accuracy, 0.75, 2)
-        self.assertAlmostEqual(s.weightedTruePositiveRate, 0.75, 2)
-        self.assertAlmostEqual(s.weightedFalsePositiveRate, 0.25, 2)
-        self.assertAlmostEqual(s.weightedRecall, 0.75, 2)
-        self.assertAlmostEqual(s.weightedPrecision, 0.583, 2)
-        self.assertAlmostEqual(s.weightedFMeasure(), 0.65, 2)
-        self.assertAlmostEqual(s.weightedFMeasure(1.0), 0.65, 2)
-        # test evaluation (with training dataset) produces a summary with same values
-        # one check is enough to verify a summary is returned, Scala version runs full test
-        sameSummary = model.evaluate(df)
-        self.assertTrue(isinstance(sameSummary, LogisticRegressionSummary))
-        self.assertFalse(isinstance(sameSummary, BinaryLogisticRegressionSummary))
-        self.assertAlmostEqual(sameSummary.accuracy, s.accuracy)
-
     def test_linear_svc_summary(self):
         df = self.spark.createDataFrame(
             [(1.0, 2.0, Vectors.dense(1.0, 1.0, 1.0)), (0.0, 2.0, Vectors.dense(1.0, 2.0, 3.0))],
diff --git a/python/pyspark/ml/torch/distributor.py b/python/pyspark/ml/torch/distributor.py
index 62a71c5a96af4..ef86f38b716b7 100644
--- a/python/pyspark/ml/torch/distributor.py
+++ b/python/pyspark/ml/torch/distributor.py
@@ -232,10 +232,10 @@ def _get_num_tasks(self) -> int:
 
     def _validate_input_params(self) -> None:
         if self.num_processes <= 0:
-            raise ValueError("num_proccesses has to be a positive integer")
+            raise ValueError("num_processes has to be a positive integer")
 
     def _check_encryption(self) -> None:
-        """Checks to see if the user requires encrpytion of data.
+        """Checks to see if the user requires encryption of data.
         If required, throw an exception since we don't support that.
 
         Raises
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 888beff663523..695bbf98517c3 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -278,7 +278,7 @@ def _to_java_impl(self) -> Tuple["JavaObject", "JavaObject", "JavaObject"]:
         gateway = SparkContext._gateway
         assert gateway is not None and SparkContext._jvm is not None
 
-        cls = SparkContext._jvm.org.apache.spark.ml.param.ParamMap
+        cls = getattr(SparkContext._jvm, "org.apache.spark.ml.param.ParamMap")
 
         estimator = self.getEstimator()
         if isinstance(estimator, JavaEstimator):
@@ -313,7 +313,7 @@ def meta_estimator_transfer_param_maps_to_java(
             sc is not None and SparkContext._jvm is not None and SparkContext._gateway is not None
         )
 
-        paramMapCls = SparkContext._jvm.org.apache.spark.ml.param.ParamMap
+        paramMapCls = getattr(SparkContext._jvm, "org.apache.spark.ml.param.ParamMap")
         javaParamMaps = SparkContext._gateway.new_array(paramMapCls, len(pyParamMaps))
 
         for idx, pyParamMap in enumerate(pyParamMaps):
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 9bbd64d2aef5a..3fe97f44619c0 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -37,6 +37,7 @@
 
 from pyspark import since
 from pyspark.ml.common import inherit_doc
+from pyspark.ml.remote.util import try_remote_intermediate_result, try_remote_write, try_remote_read
 from pyspark.sql import SparkSession
 from pyspark.sql.utils import is_remote
 from pyspark.util import VersionUtils
@@ -270,6 +271,7 @@ class JavaMLWritable(MLWritable):
     (Private) Mixin for ML instances that provide :py:class:`JavaMLWriter`.
     """
 
+    @try_remote_write
     def write(self) -> JavaMLWriter:
         """Returns an MLWriter instance for this ML instance."""
         return JavaMLWriter(self)
@@ -378,6 +380,7 @@ class JavaMLReadable(MLReadable[RL]):
     """
 
     @classmethod
+    @try_remote_read
     def read(cls) -> JavaMLReader[RL]:
         """Returns an MLReader instance for this class."""
         return JavaMLReader(cls)
@@ -680,6 +683,7 @@ def hasSummary(self) -> bool:
 
     @property
     @since("2.1.0")
+    @try_remote_intermediate_result
     def summary(self) -> T:
         """
         Gets summary of the model trained on the training set. An exception is thrown if
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index eed7781dc71e3..e2bf25386c77a 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -19,7 +19,15 @@
 from typing import Any, Generic, Optional, List, Type, TypeVar, TYPE_CHECKING
 
 from pyspark import since
-from pyspark.sql import DataFrame
+from pyspark.ml.remote.util import (
+    try_remote_transform_relation,
+    try_remote_call,
+    try_remote_fit,
+    try_remote_del,
+    try_remote_return_java_class,
+    try_remote_intercept,
+)
+from pyspark.sql import DataFrame, is_remote
 from pyspark.ml import Estimator, Predictor, PredictionModel, Transformer, Model
 from pyspark.ml.base import _PredictorParams
 from pyspark.ml.param import Param, Params
@@ -47,6 +55,7 @@ def __init__(self, java_obj: Optional["JavaObject"] = None):
         super(JavaWrapper, self).__init__()
         self._java_obj = java_obj
 
+    @try_remote_del
     def __del__(self) -> None:
         from pyspark.core.context import SparkContext
 
@@ -63,6 +72,7 @@ def _create_from_java_class(cls: Type[JW], java_class: str, *args: Any) -> JW:
         java_obj = JavaWrapper._new_java_obj(java_class, *args)
         return cls(java_obj)
 
+    @try_remote_call
     def _call_java(self, name: str, *args: Any) -> Any:
         from pyspark.core.context import SparkContext
 
@@ -74,6 +84,7 @@ def _call_java(self, name: str, *args: Any) -> Any:
         return _java2py(sc, m(*java_args))
 
     @staticmethod
+    @try_remote_return_java_class
     def _new_java_obj(java_class: str, *args: Any) -> "JavaObject":
         """
         Returns a new Java object.
@@ -347,6 +358,7 @@ def copy(self: "JP", extra: Optional["ParamMap"] = None) -> "JP":
             that._transfer_params_to_java()
         return that
 
+    @try_remote_intercept
     def clear(self, param: Param) -> None:
         """
         Clears a param from the param map if it has been explicitly set.
@@ -372,6 +384,7 @@ def _create_model(self, java_model: "JavaObject") -> JM:
         """
         raise NotImplementedError()
 
+    @try_remote_fit
     def _fit_java(self, dataset: DataFrame) -> "JavaObject":
         """
         Fits a Java model to the input dataset.
@@ -405,6 +418,7 @@ class JavaTransformer(JavaParams, Transformer, metaclass=ABCMeta):
     available as _java_obj.
     """
 
+    @try_remote_transform_relation
     def _transform(self, dataset: DataFrame) -> DataFrame:
         assert self._java_obj is not None
 
@@ -435,7 +449,7 @@ def __init__(self, java_model: Optional["JavaObject"] = None):
         other ML classes).
         """
         super(JavaModel, self).__init__(java_model)
-        if java_model is not None:
+        if java_model is not None and not is_remote():
             # SPARK-10931: This is a temporary fix to allow models to own params
             # from estimators. Eventually, these params should be in models through
             # using common base classes between estimators and models.
diff --git a/python/pyspark/pandas/accessors.py b/python/pyspark/pandas/accessors.py
index 4c36f7976af83..77757e4b60873 100644
--- a/python/pyspark/pandas/accessors.py
+++ b/python/pyspark/pandas/accessors.py
@@ -936,7 +936,7 @@ def _transform_batch(
 
         def pandas_concat(*series: pd.Series) -> pd.DataFrame:
             # The input can only be a DataFrame for struct from Spark 3.0.
-            # This works around makeing the input as a frame. See SPARK-27240
+            # This works around making the input as a frame. See SPARK-27240
             pdf = pd.concat(series, axis=1)
             pdf.columns = columns
             return pdf
diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
index bc54d8b9b17cb..01e23214d662d 100644
--- a/python/pyspark/pandas/base.py
+++ b/python/pyspark/pandas/base.py
@@ -1123,7 +1123,7 @@ def shift(
         Shift Series/Index by desired number of periods.
 
         .. note:: the current implementation of shift uses Spark's Window without
-            specifying partition specification. This leads to moveing all data into
+            specifying partition specification. This leads to moving all data into
             a single partition in a single machine and could cause serious
             performance degradation. Avoid this method with very large datasets.
 
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 49aa49f65e35b..86820573344ea 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -2632,7 +2632,7 @@ def to_latex(
         ...                    'mask': ['red', 'purple'],
         ...                    'weapon': ['sai', 'bo staff']},
         ...                   columns=['name', 'mask', 'weapon'])
-        >>> print(df.to_latex(index=False)) # doctest: +NORMALIZE_WHITESPACE
+        >>> print(df.to_latex(index=False))  # doctest: +SKIP
         \begin{tabular}{lll}
         \toprule
               name &    mask &    weapon \\
@@ -7292,8 +7292,6 @@ def select_dtypes(
         4  1   True  1.0
         5  2  False  2.0
         """
-        from pyspark.sql.types import _parse_datatype_string
-
         include_list: List[str]
         if not is_list_like(include):
             include_list = [cast(str, include)] if include is not None else []
@@ -7320,14 +7318,14 @@ def select_dtypes(
         include_spark_type = []
         for inc in include_list:
             try:
-                include_spark_type.append(_parse_datatype_string(inc))
+                include_spark_type.append(self._internal.spark_frame._session._parse_ddl(inc))
             except BaseException:
                 pass
 
         exclude_spark_type = []
         for exc in exclude_list:
             try:
-                exclude_spark_type.append(_parse_datatype_string(exc))
+                exclude_spark_type.append(self._internal.spark_frame._session._parse_ddl(exc))
             except BaseException:
                 pass
 
@@ -7686,7 +7684,7 @@ def _sort(
         if na_position not in ("first", "last"):
             raise ValueError("invalid na_position: '{}'".format(na_position))
 
-        # Mapper: Get a spark colum
+        # Mapper: Get a spark column
         # n function for (ascending, na_position) combination
         mapper = {
             (True, "first"): PySparkColumn.asc_nulls_first,
@@ -9808,7 +9806,7 @@ def describe(self, percentiles: Optional[List[float]] = None) -> "DataFrame":
 
         if is_all_string_type:
             # Handling string type columns
-            # We will retrive the `count`, `unique`, `top` and `freq`.
+            # We will retrieve the `count`, `unique`, `top` and `freq`.
             internal = self._internal.resolved_copy
             exprs_string = [
                 internal.spark_column_for(psser._column_label) for psser in psser_string
diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py
index c77cdf51a2f6d..d31bc1f48d112 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -138,14 +138,44 @@ def from_pandas(pobj: Union[pd.DataFrame, pd.Series, pd.Index]) -> Union[Series,
 
     Parameters
     ----------
-    pobj : pandas.DataFrame or pandas.Series
-        pandas DataFrame or Series to read.
+    pobj : pandas.DataFrame, pandas.Series or pandas.Index
+        pandas DataFrame, Series or Index to read.
 
     Returns
     -------
-    Series or DataFrame
-        If a pandas Series is passed in, this function returns a pandas-on-Spark Series.
+    DataFrame, Series or Index
         If a pandas DataFrame is passed in, this function returns a pandas-on-Spark DataFrame.
+        If a pandas Series is passed in, this function returns a pandas-on-Spark Series.
+        If a pandas Index is passed in, this function returns a pandas-on-Spark Index.
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> import pyspark.pandas as ps
+
+    Convert a pandas DataFrame:
+    >>> pdf = pd.DataFrame({'a': [1, 2, 3]})
+    >>> psdf = ps.from_pandas(pdf)
+    >>> psdf
+       a
+    0  1
+    1  2
+    2  3
+
+    Convert a pandas Series:
+    >>> pser = pd.Series([1, 2, 3])
+    >>> psser = ps.from_pandas(pser)
+    >>> psser
+    0    1
+    1    2
+    2    3
+    dtype: int64
+
+    Convert a pandas Index:
+    >>> pidx = pd.Index([1, 2, 3])
+    >>> psidx = ps.from_pandas(pidx)
+    >>> psidx
+    Index([1, 2, 3], dtype='int64')
     """
     if isinstance(pobj, pd.Series):
         return Series(pobj)
diff --git a/python/pyspark/pandas/tests/io/test_dataframe_conversion.py b/python/pyspark/pandas/tests/io/test_dataframe_conversion.py
index d4b03a855d382..7a4c635ee2941 100644
--- a/python/pyspark/pandas/tests/io/test_dataframe_conversion.py
+++ b/python/pyspark/pandas/tests/io/test_dataframe_conversion.py
@@ -26,6 +26,12 @@
 from pyspark import pandas as ps
 from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
 from pyspark.testing.sqlutils import SQLTestUtils
+from pyspark.testing.utils import (
+    have_openpyxl,
+    openpyxl_requirement_message,
+    have_jinja2,
+    jinja2_requirement_message,
+)
 
 
 class DataFrameConversionMixin:
@@ -86,6 +92,7 @@ def get_excel_dfs(pandas_on_spark_location, pandas_location):
             "expected": pd.read_excel(pandas_location, index_col=0),
         }
 
+    @unittest.skipIf(not have_openpyxl, openpyxl_requirement_message)
     def test_to_excel(self):
         with self.temp_dir() as dirpath:
             pandas_location = dirpath + "/" + "output1.xlsx"
@@ -199,6 +206,7 @@ def test_to_clipboard(self):
             psdf.to_clipboard(sep=";", index=False), pdf.to_clipboard(sep=";", index=False)
         )
 
+    @unittest.skipIf(not have_jinja2, jinja2_requirement_message)
     def test_to_latex(self):
         pdf = self.pdf
         psdf = self.psdf
diff --git a/python/pyspark/pandas/tests/io/test_io.py b/python/pyspark/pandas/tests/io/test_io.py
index 6fbdc366dd76a..da5817b86b984 100644
--- a/python/pyspark/pandas/tests/io/test_io.py
+++ b/python/pyspark/pandas/tests/io/test_io.py
@@ -24,7 +24,12 @@
 from pyspark import pandas as ps
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils
-from pyspark.testing.utils import have_tabulate, tabulate_requirement_message
+from pyspark.testing.utils import (
+    have_jinja2,
+    jinja2_requirement_message,
+    have_tabulate,
+    tabulate_requirement_message,
+)
 
 
 # This file contains test cases for 'Serialization / IO / Conversion'
@@ -91,6 +96,7 @@ def test_from_dict(self):
         psdf = ps.DataFrame.from_dict(data, orient="index", columns=["A", "B", "C", "D"])
         self.assert_eq(pdf, psdf)
 
+    @unittest.skipIf(not have_jinja2, jinja2_requirement_message)
     def test_style(self):
         # Currently, the `style` function returns a pandas object `Styler` as it is,
         # processing only the number of rows declared in `compute.max_rows`.
diff --git a/python/pyspark/pandas/tests/io/test_series_conversion.py b/python/pyspark/pandas/tests/io/test_series_conversion.py
index 2ae40e92b489a..06d923816633d 100644
--- a/python/pyspark/pandas/tests/io/test_series_conversion.py
+++ b/python/pyspark/pandas/tests/io/test_series_conversion.py
@@ -23,6 +23,7 @@
 from pyspark import pandas as ps
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils
+from pyspark.testing.utils import have_jinja2, jinja2_requirement_message
 
 
 class SeriesConversionTestsMixin:
@@ -48,6 +49,7 @@ def test_to_clipboard(self):
             psser.to_clipboard(sep=",", index=False), pser.to_clipboard(sep=",", index=False)
         )
 
+    @unittest.skipIf(not have_jinja2, jinja2_requirement_message)
     def test_to_latex(self):
         pser = self.pser
         psser = self.psser
diff --git a/python/pyspark/resource/profile.py b/python/pyspark/resource/profile.py
index e9e6ef3520eea..f0fb8f0b32d5b 100644
--- a/python/pyspark/resource/profile.py
+++ b/python/pyspark/resource/profile.py
@@ -211,9 +211,9 @@ def __init__(self) -> None:
 
         if _jvm is not None:
             self._jvm = _jvm
-            self._java_resource_profile_builder = (
-                _jvm.org.apache.spark.resource.ResourceProfileBuilder()
-            )
+            self._java_resource_profile_builder = getattr(
+                _jvm, "org.apache.spark.resource.ResourceProfileBuilder"
+            )()
         else:
             self._jvm = None
             self._java_resource_profile_builder = None
diff --git a/python/pyspark/resource/requests.py b/python/pyspark/resource/requests.py
index fa8bb43ee2c49..805cecd5dbbe0 100644
--- a/python/pyspark/resource/requests.py
+++ b/python/pyspark/resource/requests.py
@@ -173,9 +173,9 @@ def __init__(
             jvm = _jvm or SparkContext._jvm
 
         if jvm is not None:
-            self._java_executor_resource_requests = (
-                jvm.org.apache.spark.resource.ExecutorResourceRequests()
-            )
+            self._java_executor_resource_requests = getattr(
+                jvm, "org.apache.spark.resource.ExecutorResourceRequests"
+            )()
             if _requests is not None:
                 for k, v in _requests.items():
                     if k == self._MEMORY:
@@ -474,9 +474,9 @@ def __init__(
             jvm = _jvm or SparkContext._jvm
 
         if jvm is not None:
-            self._java_task_resource_requests: Optional[
-                "JavaObject"
-            ] = jvm.org.apache.spark.resource.TaskResourceRequests()
+            self._java_task_resource_requests: Optional["JavaObject"] = getattr(
+                jvm, "org.apache.spark.resource.TaskResourceRequests"
+            )()
             if _requests is not None:
                 for k, v in _requests.items():
                     if k == self._CPUS:
diff --git a/python/pyspark/sql/_typing.pyi b/python/pyspark/sql/_typing.pyi
index 4969268939adf..27fa0f2a90133 100644
--- a/python/pyspark/sql/_typing.pyi
+++ b/python/pyspark/sql/_typing.pyi
@@ -36,8 +36,10 @@ from pyspark._typing import PrimitiveType
 from pyspark.profiler import CodeMapDict
 import pyspark.sql.types
 from pyspark.sql.column import Column
+from pyspark.sql.tvf_argument import TableValuedFunctionArgument
 
 ColumnOrName = Union[Column, str]
+TVFArgumentOrName = Union[TableValuedFunctionArgument, str]
 ColumnOrNameOrOrdinal = Union[Column, str, int]
 DecimalLiteral = decimal.Decimal
 DateTimeLiteral = Union[datetime.datetime, datetime.date]
diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py
index a9e41f20357e8..0b18212faf605 100644
--- a/python/pyspark/sql/avro/functions.py
+++ b/python/pyspark/sql/avro/functions.py
@@ -102,7 +102,7 @@ def from_avro(
 
     sc = get_active_spark_context()
     try:
-        jc = cast(JVMView, sc._jvm).org.apache.spark.sql.avro.functions.from_avro(
+        jc = getattr(cast(JVMView, sc._jvm), "org.apache.spark.sql.avro.functions").from_avro(
             _to_java_column(data), jsonFormatSchema, options or {}
         )
     except TypeError as e:
@@ -168,11 +168,11 @@ def to_avro(data: "ColumnOrName", jsonFormatSchema: str = "") -> Column:
     sc = get_active_spark_context()
     try:
         if jsonFormatSchema == "":
-            jc = cast(JVMView, sc._jvm).org.apache.spark.sql.avro.functions.to_avro(
+            jc = getattr(cast(JVMView, sc._jvm), "org.apache.spark.sql.avro.functions").to_avro(
                 _to_java_column(data)
             )
         else:
-            jc = cast(JVMView, sc._jvm).org.apache.spark.sql.avro.functions.to_avro(
+            jc = getattr(cast(JVMView, sc._jvm), "org.apache.spark.sql.avro.functions").to_avro(
                 _to_java_column(data), jsonFormatSchema
             )
     except TypeError as e:
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 8c35aafa7066c..40a0d9346ccc3 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -479,7 +479,6 @@ def listFunctions(
         """
         if dbName is None:
             dbName = self.currentDatabase()
-        iter = self._jcatalog.listFunctions(dbName).toLocalIterator()
         if pattern is None:
             iter = self._jcatalog.listFunctions(dbName).toLocalIterator()
         else:
diff --git a/python/pyspark/sql/classic/column.py b/python/pyspark/sql/classic/column.py
index c08eac7f6a049..fe0e440203c36 100644
--- a/python/pyspark/sql/classic/column.py
+++ b/python/pyspark/sql/classic/column.py
@@ -522,7 +522,9 @@ def alias(self, *alias: str, **kwargs: Any) -> ParentColumn:
         if len(alias) == 1:
             if metadata:
                 assert sc._jvm is not None
-                jmeta = sc._jvm.org.apache.spark.sql.types.Metadata.fromJson(json.dumps(metadata))
+                jmeta = getattr(sc._jvm, "org.apache.spark.sql.types.Metadata").fromJson(
+                    json.dumps(metadata)
+                )
                 return Column(getattr(self._jc, "as")(alias[0], jmeta))
             else:
                 return Column(getattr(self._jc, "as")(alias[0]))
diff --git a/python/pyspark/sql/classic/dataframe.py b/python/pyspark/sql/classic/dataframe.py
index 169755c753907..84498f1b2294d 100644
--- a/python/pyspark/sql/classic/dataframe.py
+++ b/python/pyspark/sql/classic/dataframe.py
@@ -21,7 +21,7 @@
 import random
 import warnings
 from collections.abc import Iterable
-from functools import reduce
+from functools import reduce, cached_property
 from typing import (
     Any,
     Callable,
@@ -74,6 +74,7 @@
 from pyspark.sql.utils import get_active_spark_context, to_java_array, to_scala_map
 from pyspark.sql.pandas.conversion import PandasConversionMixin
 from pyspark.sql.pandas.map_ops import PandasMapOpsMixin
+from pyspark.sql.table_arg import TableArg
 
 
 if TYPE_CHECKING:
@@ -118,8 +119,6 @@ def __init__(
     ):
         from pyspark.sql.context import SQLContext
 
-        self._sql_ctx: Optional["SQLContext"] = None
-
         if isinstance(sql_ctx, SQLContext):
             assert not os.environ.get("SPARK_TESTING")  # Sanity check for our internal usage.
             assert isinstance(sql_ctx, SQLContext)
@@ -136,14 +135,11 @@ def __init__(
         self._sc: "SparkContext" = sql_ctx._sc
         self._jdf: "JavaObject" = jdf
         self.is_cached = False
-        # initialized lazily
-        self._schema: Optional[StructType] = None
-        self._lazy_rdd: Optional["RDD[Row]"] = None
         # Check whether _repr_html is supported or not, we use it to avoid calling _jdf twice
         # by __repr__ and _repr_html_ while eager evaluation opens.
         self._support_repr_html = False
 
-    @property
+    @cached_property
     def sql_ctx(self) -> "SQLContext":
         from pyspark.sql.context import SQLContext
 
@@ -151,24 +147,18 @@ def sql_ctx(self) -> "SQLContext":
             "DataFrame.sql_ctx is an internal property, and will be removed "
             "in future releases. Use DataFrame.sparkSession instead."
         )
-        if self._sql_ctx is None:
-            self._sql_ctx = SQLContext._get_or_create(self._sc)
-        return self._sql_ctx
+        return SQLContext._get_or_create(self._sc)
 
     @property
     def sparkSession(self) -> "SparkSession":
         return self._session
 
-    @property
+    @cached_property
     def rdd(self) -> "RDD[Row]":
         from pyspark.core.rdd import RDD
 
-        if self._lazy_rdd is None:
-            jrdd = self._jdf.javaToPython()
-            self._lazy_rdd = RDD(
-                jrdd, self.sparkSession._sc, BatchedSerializer(CPickleSerializer())
-            )
-        return self._lazy_rdd
+        jrdd = self._jdf.javaToPython()
+        return RDD(jrdd, self.sparkSession._sc, BatchedSerializer(CPickleSerializer()))
 
     @property
     def na(self) -> ParentDataFrameNaFunctions:
@@ -208,21 +198,17 @@ def write(self) -> DataFrameWriter:
     def writeStream(self) -> DataStreamWriter:
         return DataStreamWriter(self)
 
-    @property
+    @cached_property
     def schema(self) -> StructType:
-        if self._schema is None:
-            try:
-                self._schema = cast(
-                    StructType, _parse_datatype_json_string(self._jdf.schema().json())
-                )
-            except AnalysisException as e:
-                raise e
-            except Exception as e:
-                raise PySparkValueError(
-                    errorClass="CANNOT_PARSE_DATATYPE",
-                    messageParameters={"error": str(e)},
-                )
-        return self._schema
+        try:
+            return cast(StructType, _parse_datatype_json_string(self._jdf.schema().json()))
+        except AnalysisException as e:
+            raise e
+        except Exception as e:
+            raise PySparkValueError(
+                errorClass="CANNOT_PARSE_DATATYPE",
+                messageParameters={"error": str(e)},
+            )
 
     def printSchema(self, level: Optional[int] = None) -> None:
         if level:
@@ -665,6 +651,15 @@ def dtypes(self) -> List[Tuple[str, str]]:
     def columns(self) -> List[str]:
         return [f.name for f in self.schema.fields]
 
+    def metadataColumn(self, colName: str) -> Column:
+        if not isinstance(colName, str):
+            raise PySparkTypeError(
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "colName", "arg_type": type(colName).__name__},
+            )
+        jc = self._jdf.metadataColumn(colName)
+        return Column(jc)
+
     def colRegex(self, colName: str) -> Column:
         if not isinstance(colName, str):
             raise PySparkTypeError(
@@ -715,6 +710,22 @@ def join(
             jdf = self._jdf.join(other._jdf, on, how)
         return DataFrame(jdf, self.sparkSession)
 
+    def lateralJoin(
+        self,
+        other: ParentDataFrame,
+        on: Optional[Column] = None,
+        how: Optional[str] = None,
+    ) -> ParentDataFrame:
+        if on is None and how is None:
+            jdf = self._jdf.lateralJoin(other._jdf)
+        elif on is None:
+            jdf = self._jdf.lateralJoin(other._jdf, how)
+        elif how is None:
+            jdf = self._jdf.lateralJoin(other._jdf, on._jc)
+        else:
+            jdf = self._jdf.lateralJoin(other._jdf, on._jc, how)
+        return DataFrame(jdf, self.sparkSession)
+
     # TODO(SPARK-22947): Fix the DataFrame API.
     def _joinAsOf(
         self,
@@ -1786,6 +1797,9 @@ def transpose(self, indexColumn: Optional["ColumnOrName"] = None) -> ParentDataF
         else:
             return DataFrame(self._jdf.transpose(), self.sparkSession)
 
+    def asTable(self) -> TableArg:
+        return TableArg(self._jdf.asTable())
+
     def scalar(self) -> Column:
         return Column(self._jdf.scalar())
 
diff --git a/python/pyspark/sql/classic/window.py b/python/pyspark/sql/classic/window.py
index 63e9a337c0c2e..c7bc92739b240 100644
--- a/python/pyspark/sql/classic/window.py
+++ b/python/pyspark/sql/classic/window.py
@@ -48,9 +48,9 @@ def partitionBy(*cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> Paren
         from py4j.java_gateway import JVMView
 
         sc = get_active_spark_context()
-        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.partitionBy(
-            _to_java_cols(cols)
-        )
+        jspec = getattr(
+            cast(JVMView, sc._jvm), "org.apache.spark.sql.expressions.Window"
+        ).partitionBy(_to_java_cols(cols))
         return WindowSpec(jspec)
 
     @staticmethod
@@ -58,7 +58,7 @@ def orderBy(*cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> ParentWin
         from py4j.java_gateway import JVMView
 
         sc = get_active_spark_context()
-        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.orderBy(
+        jspec = getattr(cast(JVMView, sc._jvm), "org.apache.spark.sql.expressions.Window").orderBy(
             _to_java_cols(cols)
         )
         return WindowSpec(jspec)
@@ -72,9 +72,9 @@ def rowsBetween(start: int, end: int) -> ParentWindowSpec:
         if end >= Window._FOLLOWING_THRESHOLD:
             end = Window.unboundedFollowing
         sc = get_active_spark_context()
-        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.rowsBetween(
-            start, end
-        )
+        jspec = getattr(
+            cast(JVMView, sc._jvm), "org.apache.spark.sql.expressions.Window"
+        ).rowsBetween(start, end)
         return WindowSpec(jspec)
 
     @staticmethod
@@ -86,9 +86,9 @@ def rangeBetween(start: int, end: int) -> ParentWindowSpec:
         if end >= Window._FOLLOWING_THRESHOLD:
             end = Window.unboundedFollowing
         sc = get_active_spark_context()
-        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.rangeBetween(
-            start, end
-        )
+        jspec = getattr(
+            cast(JVMView, sc._jvm), "org.apache.spark.sql.expressions.Window"
+        ).rangeBetween(start, end)
         return WindowSpec(jspec)
 
 
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 06dd2860fe406..e5640dd81b1fb 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -25,6 +25,7 @@
     Union,
 )
 
+from pyspark.sql.tvf_argument import TableValuedFunctionArgument
 from pyspark.sql.utils import dispatch_col_method
 from pyspark.sql.types import DataType
 from pyspark.errors import PySparkValueError
@@ -37,7 +38,7 @@
 __all__ = ["Column"]
 
 
-class Column:
+class Column(TableValuedFunctionArgument):
 
     """
     A column in a DataFrame.
@@ -1524,7 +1525,11 @@ def over(self, window: "WindowSpec") -> "Column":
     @dispatch_col_method
     def outer(self) -> "Column":
         """
-        Mark this column reference as an outer reference for subqueries.
+        Mark this column as an outer column if its expression refers to columns from an outer query.
+
+        This is used to trigger lazy analysis of Spark Classic DataFrame, so that we can use it
+        to build subquery expressions. Spark Connect DataFrame is always lazily analyzed and
+        does not need to use this function.
 
         .. versionadded:: 4.0.0
 
diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py
index 78d4e0fc1c4f4..4a85ca26b64de 100644
--- a/python/pyspark/sql/connect/client/core.py
+++ b/python/pyspark/sql/connect/client/core.py
@@ -20,6 +20,8 @@
     "SparkConnectClient",
 ]
 
+import atexit
+
 from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__)
@@ -329,8 +331,8 @@ def default_port() -> int:
                 jvm = PySparkSession._instantiatedSession._jvm  # type: ignore[union-attr]
                 return getattr(
                     getattr(
-                        jvm.org.apache.spark.sql.connect.service,  # type: ignore[union-attr]
-                        "SparkConnectService$",
+                        jvm,
+                        "org.apache.spark.sql.connect.service.SparkConnectService$",
                     ),
                     "MODULE$",
                 ).localPort()
@@ -494,6 +496,7 @@ def __init__(
         is_same_semantics: Optional[bool],
         semantic_hash: Optional[int],
         storage_level: Optional[StorageLevel],
+        ddl_string: Optional[str],
     ):
         self.schema = schema
         self.explain_string = explain_string
@@ -506,6 +509,7 @@ def __init__(
         self.is_same_semantics = is_same_semantics
         self.semantic_hash = semantic_hash
         self.storage_level = storage_level
+        self.ddl_string = ddl_string
 
     @classmethod
     def fromProto(cls, pb: Any) -> "AnalyzeResult":
@@ -520,6 +524,7 @@ def fromProto(cls, pb: Any) -> "AnalyzeResult":
         is_same_semantics: Optional[bool] = None
         semantic_hash: Optional[int] = None
         storage_level: Optional[StorageLevel] = None
+        ddl_string: Optional[str] = None
 
         if pb.HasField("schema"):
             schema = types.proto_schema_to_pyspark_data_type(pb.schema.schema)
@@ -547,6 +552,8 @@ def fromProto(cls, pb: Any) -> "AnalyzeResult":
             pass
         elif pb.HasField("get_storage_level"):
             storage_level = proto_to_storage_level(pb.get_storage_level.storage_level)
+        elif pb.HasField("json_to_ddl"):
+            ddl_string = pb.json_to_ddl.ddl_string
         else:
             raise SparkConnectException("No analyze result found!")
 
@@ -562,6 +569,7 @@ def fromProto(cls, pb: Any) -> "AnalyzeResult":
             is_same_semantics,
             semantic_hash,
             storage_level,
+            ddl_string,
         )
 
 
@@ -669,6 +677,9 @@ def __init__(
 
         self._progress_handlers: List[ProgressHandler] = []
 
+        # cleanup ml cache if possible
+        atexit.register(self._cleanup_ml)
+
     def register_progress_handler(self, handler: ProgressHandler) -> None:
         """
         Register a progress handler to be called when a progress message is received.
@@ -1284,6 +1295,8 @@ def _analyze(self, method: str, **kwargs: Any) -> AnalyzeResult:
                 req.unpersist.blocking = cast(bool, kwargs.get("blocking"))
         elif method == "get_storage_level":
             req.get_storage_level.relation.CopyFrom(cast(pb2.Relation, kwargs.get("relation")))
+        elif method == "json_to_ddl":
+            req.json_to_ddl.json_string = cast(str, kwargs.get("json_string"))
         else:
             raise PySparkValueError(
                 errorClass="UNSUPPORTED_OPERATION",
@@ -1471,6 +1484,8 @@ def handle_response(
                         b.checkpoint_command_result.relation
                     )
                 }
+            if b.HasField("ml_command_result"):
+                yield {"ml_command_result": b.ml_command_result}
 
         try:
             if self._use_reattachable_execute:
@@ -1923,3 +1938,33 @@ def _create_profile(self, profile: pb2.ResourceProfile) -> int:
         (_, properties, _) = self.execute_command(cmd)
         profile_id = properties["create_resource_profile_command_result"]
         return profile_id
+
+    def add_ml_cache(self, cache_id: str) -> None:
+        if not hasattr(self.thread_local, "ml_caches"):
+            self.thread_local.ml_caches = set()
+        self.thread_local.ml_caches.add(cache_id)
+
+    def remove_ml_cache(self, cache_id: str) -> None:
+        if not hasattr(self.thread_local, "ml_caches"):
+            self.thread_local.ml_caches = set()
+
+        if cache_id in self.thread_local.ml_caches:
+            self._delete_ml_cache(cache_id)
+
+    def _delete_ml_cache(self, cache_id: str) -> None:
+        # try best to delete the cache
+        try:
+            command = pb2.Command()
+            command.ml_command.delete.obj_ref.CopyFrom(pb2.ObjectRef(id=cache_id))
+            self.execute_command(command)
+        except Exception:
+            pass
+
+    def _cleanup_ml(self) -> None:
+        if not hasattr(self.thread_local, "ml_caches"):
+            self.thread_local.ml_caches = set()
+
+        self.disable_reattachable_execute()
+        # Todo add a pattern to delete all model in one command
+        for model_id in self.thread_local.ml_caches:
+            self._delete_ml_cache(model_id)
diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py
index e840081146340..c5733801814eb 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -34,7 +34,6 @@
     PySparkTypeError,
     PySparkAttributeError,
     PySparkValueError,
-    PySparkNotImplementedError,
 )
 from pyspark.sql.types import DataType
 from pyspark.sql.utils import enum_to_value
@@ -44,6 +43,7 @@
     Expression,
     UnresolvedFunction,
     UnresolvedExtractValue,
+    LazyExpression,
     LiteralExpression,
     CaseWhen,
     SortOrder,
@@ -460,11 +460,7 @@ def over(self, window: "WindowSpec") -> ParentColumn:  # type: ignore[override]
         return Column(WindowExpression(windowFunction=self._expr, windowSpec=window))
 
     def outer(self) -> ParentColumn:
-        # TODO(SPARK-50134): Implement this method
-        raise PySparkNotImplementedError(
-            errorClass="NOT_IMPLEMENTED",
-            messageParameters={"feature": "outer()"},
-        )
+        return Column(LazyExpression(self._expr))
 
     def isin(self, *cols: Any) -> ParentColumn:
         if len(cols) == 1 and isinstance(cols[0], (list, set)):
diff --git a/python/pyspark/sql/connect/conf.py b/python/pyspark/sql/connect/conf.py
index 1ef72ee3cfa43..84d7ad34fb360 100644
--- a/python/pyspark/sql/connect/conf.py
+++ b/python/pyspark/sql/connect/conf.py
@@ -49,6 +49,20 @@ def set(self, key: str, value: Union[str, int, bool]) -> None:
 
     set.__doc__ = PySparkRuntimeConfig.set.__doc__
 
+    def _set_all(self, configs: Dict[str, Union[str, int, bool]], silent: bool) -> None:
+        conf_list = []
+        for key, value in configs.items():
+            if isinstance(value, bool):
+                value = "true" if value else "false"
+            elif isinstance(value, int):
+                value = str(value)
+            conf_list.append(proto.KeyValue(key=key, value=value))
+        op_set = proto.ConfigRequest.Set(pairs=conf_list, silent=silent)
+        operation = proto.ConfigRequest.Operation(set=op_set)
+        result = self._client.config(operation)
+        for warn in result.warnings:
+            warnings.warn(warn)
+
     def get(
         self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue
     ) -> Optional[str]:
diff --git a/python/pyspark/sql/connect/conversion.py b/python/pyspark/sql/connect/conversion.py
index d803f37c5b9f1..b6b0bd65adcb8 100644
--- a/python/pyspark/sql/connect/conversion.py
+++ b/python/pyspark/sql/connect/conversion.py
@@ -104,6 +104,7 @@ def _need_converter(
     def _create_converter(
         dataType: DataType,
         nullable: bool = True,
+        variants_as_dicts: bool = False,  # some code paths may require python internal types
     ) -> Callable:
         assert dataType is not None and isinstance(dataType, DataType)
         assert isinstance(nullable, bool)
@@ -126,8 +127,7 @@ def convert_null(value: Any) -> Any:
 
             field_convs = [
                 LocalDataToArrowConversion._create_converter(
-                    field.dataType,
-                    field.nullable,
+                    field.dataType, field.nullable, variants_as_dicts
                 )
                 for field in dataType.fields
             ]
@@ -170,8 +170,7 @@ def convert_struct(value: Any) -> Any:
 
         elif isinstance(dataType, ArrayType):
             element_conv = LocalDataToArrowConversion._create_converter(
-                dataType.elementType,
-                dataType.containsNull,
+                dataType.elementType, dataType.containsNull, variants_as_dicts
             )
 
             def convert_array(value: Any) -> Any:
@@ -188,8 +187,7 @@ def convert_array(value: Any) -> Any:
         elif isinstance(dataType, MapType):
             key_conv = LocalDataToArrowConversion._create_converter(dataType.keyType)
             value_conv = LocalDataToArrowConversion._create_converter(
-                dataType.valueType,
-                dataType.valueContainsNull,
+                dataType.valueType, dataType.valueContainsNull, variants_as_dicts
             )
 
             def convert_map(value: Any) -> Any:
@@ -303,8 +301,11 @@ def convert_variant(value: Any) -> Any:
                     isinstance(value, dict)
                     and all(key in value for key in ["value", "metadata"])
                     and all(isinstance(value[key], bytes) for key in ["value", "metadata"])
+                    and not variants_as_dicts
                 ):
                     return VariantVal(value["value"], value["metadata"])
+                elif isinstance(value, VariantVal) and variants_as_dicts:
+                    return VariantType().toInternal(value)
                 else:
                     raise PySparkValueError(errorClass="MALFORMED_VARIANT")
 
@@ -331,8 +332,7 @@ def convert(data: Sequence[Any], schema: StructType) -> "pa.Table":
 
         column_convs = [
             LocalDataToArrowConversion._create_converter(
-                field.dataType,
-                field.nullable,
+                field.dataType, field.nullable, variants_as_dicts=True
             )
             for field in schema.fields
         ]
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index e85efeb592dff..76b7881f234ff 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -79,12 +79,14 @@
 from pyspark.sql.column import Column
 from pyspark.sql.connect.expressions import (
     ColumnReference,
+    SubqueryExpression,
     UnresolvedRegex,
     UnresolvedStar,
 )
 from pyspark.sql.connect.functions import builtin as F
 from pyspark.sql.pandas.types import from_arrow_schema, to_arrow_schema
 from pyspark.sql.pandas.functions import _validate_pandas_udf  # type: ignore[attr-defined]
+from pyspark.sql.table_arg import TableArg
 
 
 if TYPE_CHECKING:
@@ -272,6 +274,14 @@ def alias(self, alias: str) -> ParentDataFrame:
         res._cached_schema = self._cached_schema
         return res
 
+    def metadataColumn(self, colName: str) -> Column:
+        if not isinstance(colName, str):
+            raise PySparkTypeError(
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "colName", "arg_type": type(colName).__name__},
+            )
+        return self._col(colName, is_metadata_column=True)
+
     def colRegex(self, colName: str) -> Column:
         from pyspark.sql.connect.column import Column as ConnectColumn
 
@@ -686,6 +696,22 @@ def join(
             session=self._session,
         )
 
+    def lateralJoin(
+        self,
+        other: ParentDataFrame,
+        on: Optional[Column] = None,
+        how: Optional[str] = None,
+    ) -> ParentDataFrame:
+        self._check_same_session(other)
+        if how is not None and isinstance(how, str):
+            how = how.lower().replace("_", "")
+        return DataFrame(
+            plan.LateralJoin(
+                left=self._plan, right=cast(plan.LogicalPlan, other._plan), on=on, how=how
+            ),
+            session=self._session,
+        )
+
     def _joinAsOf(
         self,
         other: ParentDataFrame,
@@ -1732,13 +1758,14 @@ def __getitem__(
                 messageParameters={"arg_name": "item", "arg_type": type(item).__name__},
             )
 
-    def _col(self, name: str) -> Column:
+    def _col(self, name: str, is_metadata_column: bool = False) -> Column:
         from pyspark.sql.connect.column import Column as ConnectColumn
 
         return ConnectColumn(
             ColumnReference(
                 unparsed_identifier=name,
                 plan_id=self._plan._plan_id,
+                is_metadata_column=is_metadata_column,
             )
         )
 
@@ -1784,19 +1811,22 @@ def transpose(self, indexColumn: Optional["ColumnOrName"] = None) -> ParentDataF
             self._session,
         )
 
-    def scalar(self) -> Column:
-        # TODO(SPARK-50134): Implement this method
+    def asTable(self) -> TableArg:
+        # TODO(SPARK-50393): Support DataFrame conversion to table argument in Spark Connect
         raise PySparkNotImplementedError(
             errorClass="NOT_IMPLEMENTED",
-            messageParameters={"feature": "scalar()"},
+            messageParameters={"feature": "asTable()"},
         )
 
+    def scalar(self) -> Column:
+        from pyspark.sql.connect.column import Column as ConnectColumn
+
+        return ConnectColumn(SubqueryExpression(self._plan, subquery_type="scalar"))
+
     def exists(self) -> Column:
-        # TODO(SPARK-50134): Implement this method
-        raise PySparkNotImplementedError(
-            errorClass="NOT_IMPLEMENTED",
-            messageParameters={"feature": "exists()"},
-        )
+        from pyspark.sql.connect.column import Column as ConnectColumn
+
+        return ConnectColumn(SubqueryExpression(self._plan, subquery_type="exists"))
 
     @property
     def schema(self) -> StructType:
@@ -2023,6 +2053,8 @@ def _map_partitions(
         from pyspark.sql.connect.udf import UserDefinedFunction
 
         _validate_pandas_udf(func, evalType)
+        if isinstance(schema, str):
+            schema = cast(StructType, self._session._parse_ddl(schema))
         udf_obj = UserDefinedFunction(
             func,
             returnType=schema,
@@ -2262,10 +2294,6 @@ def _test() -> None:
         del pyspark.sql.dataframe.DataFrame.toJSON.__doc__
         del pyspark.sql.dataframe.DataFrame.rdd.__doc__
 
-    # TODO(SPARK-50134): Support subquery in connect
-    del pyspark.sql.dataframe.DataFrame.scalar.__doc__
-    del pyspark.sql.dataframe.DataFrame.exists.__doc__
-
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.dataframe tests")
         .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
diff --git a/python/pyspark/sql/connect/expressions.py b/python/pyspark/sql/connect/expressions.py
index 5a5320366f666..c32db14968c6b 100644
--- a/python/pyspark/sql/connect/expressions.py
+++ b/python/pyspark/sql/connect/expressions.py
@@ -82,6 +82,7 @@
 if TYPE_CHECKING:
     from pyspark.sql.connect.client import SparkConnectClient
     from pyspark.sql.connect.window import WindowSpec
+    from pyspark.sql.connect.plan import LogicalPlan
 
 
 class Expression:
@@ -128,6 +129,15 @@ def _create_proto_expression(self) -> proto.Expression:
             plan.common.origin.CopyFrom(self.origin)
         return plan
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return []
+
+    def foreach(self, f: Callable[["Expression"], None]) -> None:
+        f(self)
+        for c in self.children:
+            c.foreach(f)
+
 
 class CaseWhen(Expression):
     def __init__(
@@ -162,6 +172,16 @@ def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
 
         return unresolved_function.to_plan(session)
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        children = []
+        for branch in self._branches:
+            children.append(branch[0])
+            children.append(branch[1])
+        if self._else_value is not None:
+            children.append(self._else_value)
+        return children
+
     def __repr__(self) -> str:
         _cases = "".join([f" WHEN {c} THEN {v}" for c, v in self._branches])
         _else = f" ELSE {self._else_value}" if self._else_value is not None else ""
@@ -196,6 +216,10 @@ def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
             exp.alias.expr.CopyFrom(self._child.to_plan(session))
             return exp
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return [self._child]
+
     def __repr__(self) -> str:
         return f"{self._child} AS {','.join(self._alias)}"
 
@@ -500,7 +524,12 @@ class ColumnReference(Expression):
     treat it as an unresolved attribute. Attributes that have the same fully
     qualified name are identical"""
 
-    def __init__(self, unparsed_identifier: str, plan_id: Optional[int] = None) -> None:
+    def __init__(
+        self,
+        unparsed_identifier: str,
+        plan_id: Optional[int] = None,
+        is_metadata_column: bool = False,
+    ) -> None:
         super().__init__()
         assert isinstance(unparsed_identifier, str)
         self._unparsed_identifier = unparsed_identifier
@@ -508,6 +537,8 @@ def __init__(self, unparsed_identifier: str, plan_id: Optional[int] = None) -> N
         assert plan_id is None or isinstance(plan_id, int)
         self._plan_id = plan_id
 
+        self._is_metadata_column = is_metadata_column
+
     def name(self) -> str:
         """Returns the qualified name of the column reference."""
         return self._unparsed_identifier
@@ -518,6 +549,7 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         expr.unresolved_attribute.unparsed_identifier = self._unparsed_identifier
         if self._plan_id is not None:
             expr.unresolved_attribute.plan_id = self._plan_id
+        expr.unresolved_attribute.is_metadata_column = self._is_metadata_column
         return expr
 
     def __repr__(self) -> str:
@@ -622,6 +654,10 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
 
         return sort
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return [self._child]
+
 
 class UnresolvedFunction(Expression):
     def __init__(
@@ -649,6 +685,10 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         fun.unresolved_function.is_distinct = self._is_distinct
         return fun
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return self._args
+
     def __repr__(self) -> str:
         # Default print handling:
         if self._is_distinct:
@@ -730,12 +770,12 @@ def __init__(
         function_name: str,
         function: Union[PythonUDF, JavaUDF],
         deterministic: bool = False,
-        arguments: Sequence[Expression] = [],
+        arguments: Optional[Sequence[Expression]] = None,
     ):
         super().__init__()
         self._function_name = function_name
         self._deterministic = deterministic
-        self._arguments = arguments
+        self._arguments: Sequence[Expression] = arguments or []
         self._function = function
 
     def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
@@ -770,6 +810,10 @@ def to_plan_judf(
         expr.java_udf.CopyFrom(cast(proto.JavaUDF, self._function.to_plan(session)))
         return expr
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return self._arguments
+
     def __repr__(self) -> str:
         return f"{self._function_name}({', '.join([str(arg) for arg in self._arguments])})"
 
@@ -799,6 +843,10 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         expr.update_fields.value_expression.CopyFrom(self._valueExpr.to_plan(session))
         return expr
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return [self._structExpr, self._valueExpr]
+
     def __repr__(self) -> str:
         return f"update_field({self._structExpr}, {self._fieldName}, {self._valueExpr})"
 
@@ -823,6 +871,10 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         expr.update_fields.field_name = self._fieldName
         return expr
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return [self._structExpr]
+
     def __repr__(self) -> str:
         return f"drop_field({self._structExpr}, {self._fieldName})"
 
@@ -847,6 +899,10 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         expr.unresolved_extract_value.extraction.CopyFrom(self._extraction.to_plan(session))
         return expr
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return [self._child, self._extraction]
+
     def __repr__(self) -> str:
         return f"{self._child}['{self._extraction}']"
 
@@ -906,6 +962,10 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
 
         return fun
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return [self._expr]
+
     def __repr__(self) -> str:
         # We cannot guarantee the string representations be exactly the same, e.g.
         # str(sf.col("a").cast("long")):
@@ -989,6 +1049,10 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         )
         return expr
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return [self._function] + self._arguments
+
     def __repr__(self) -> str:
         return (
             f"LambdaFunction({str(self._function)}, "
@@ -1098,6 +1162,12 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
 
         return expr
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return (
+            [self._windowFunction] + self._windowSpec._partitionSpec + self._windowSpec._orderSpec
+        )
+
     def __repr__(self) -> str:
         return f"WindowExpression({str(self._windowFunction)}, ({str(self._windowSpec)}))"
 
@@ -1128,6 +1198,10 @@ def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
             expr.call_function.arguments.extend([arg.to_plan(session) for arg in self._args])
         return expr
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return self._args
+
     def __repr__(self) -> str:
         if len(self._args) > 0:
             return f"CallFunction('{self._name}', {', '.join([str(arg) for arg in self._args])})"
@@ -1151,5 +1225,50 @@ def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
         expr.named_argument_expression.value.CopyFrom(self._value.to_plan(session))
         return expr
 
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return [self._value]
+
     def __repr__(self) -> str:
         return f"{self._key} => {self._value}"
+
+
+class LazyExpression(Expression):
+    def __init__(self, expr: Expression):
+        assert isinstance(expr, Expression)
+        super().__init__()
+        self._expr = expr
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        expr = self._create_proto_expression()
+        expr.lazy_expression.child.CopyFrom(self._expr.to_plan(session))
+        return expr
+
+    @property
+    def children(self) -> Sequence["Expression"]:
+        return [self._expr]
+
+    def __repr__(self) -> str:
+        return f"lazy({self._expr})"
+
+
+class SubqueryExpression(Expression):
+    def __init__(self, plan: "LogicalPlan", subquery_type: str) -> None:
+        assert isinstance(subquery_type, str)
+        assert subquery_type in ("scalar", "exists")
+
+        super().__init__()
+        self._plan = plan
+        self._subquery_type = subquery_type
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        expr = self._create_proto_expression()
+        expr.subquery_expression.plan_id = self._plan._plan_id
+        if self._subquery_type == "scalar":
+            expr.subquery_expression.subquery_type = proto.SubqueryExpression.SUBQUERY_TYPE_SCALAR
+        elif self._subquery_type == "exists":
+            expr.subquery_expression.subquery_type = proto.SubqueryExpression.SUBQUERY_TYPE_EXISTS
+        return expr
+
+    def __repr__(self) -> str:
+        return f"SubqueryExpression({self._plan}, {self._subquery_type})"
diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py
index f52cdffb84b7c..f13eeab12dd35 100644
--- a/python/pyspark/sql/connect/functions/builtin.py
+++ b/python/pyspark/sql/connect/functions/builtin.py
@@ -1064,6 +1064,64 @@ def collect_set(col: "ColumnOrName") -> Column:
 collect_set.__doc__ = pysparkfuncs.collect_set.__doc__
 
 
+def listagg(col: "ColumnOrName", delimiter: Optional[Union[Column, str, bytes]] = None) -> Column:
+    if delimiter is None:
+        return _invoke_function_over_columns("listagg", col)
+    else:
+        return _invoke_function_over_columns("listagg", col, lit(delimiter))
+
+
+listagg.__doc__ = pysparkfuncs.listagg.__doc__
+
+
+def listagg_distinct(
+    col: "ColumnOrName", delimiter: Optional[Union[Column, str, bytes]] = None
+) -> Column:
+    from pyspark.sql.connect.column import Column as ConnectColumn
+
+    args = [col]
+    if delimiter is not None:
+        args += [lit(delimiter)]
+
+    _exprs = [_to_col(c)._expr for c in args]
+    return ConnectColumn(
+        UnresolvedFunction("listagg", _exprs, is_distinct=True)  # type: ignore[arg-type]
+    )
+
+
+listagg_distinct.__doc__ = pysparkfuncs.listagg_distinct.__doc__
+
+
+def string_agg(
+    col: "ColumnOrName", delimiter: Optional[Union[Column, str, bytes]] = None
+) -> Column:
+    if delimiter is None:
+        return _invoke_function_over_columns("string_agg", col)
+    else:
+        return _invoke_function_over_columns("string_agg", col, lit(delimiter))
+
+
+string_agg.__doc__ = pysparkfuncs.string_agg.__doc__
+
+
+def string_agg_distinct(
+    col: "ColumnOrName", delimiter: Optional[Union[Column, str, bytes]] = None
+) -> Column:
+    from pyspark.sql.connect.column import Column as ConnectColumn
+
+    args = [col]
+    if delimiter is not None:
+        args += [lit(delimiter)]
+
+    _exprs = [_to_col(c)._expr for c in args]
+    return ConnectColumn(
+        UnresolvedFunction("string_agg", _exprs, is_distinct=True)  # type: ignore[arg-type]
+    )
+
+
+string_agg_distinct.__doc__ = pysparkfuncs.string_agg_distinct.__doc__
+
+
 def corr(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("corr", col1, col2)
 
diff --git a/python/pyspark/sql/connect/group.py b/python/pyspark/sql/connect/group.py
index 863461da10ec9..11adc8850fec1 100644
--- a/python/pyspark/sql/connect/group.py
+++ b/python/pyspark/sql/connect/group.py
@@ -35,8 +35,7 @@
 from pyspark.sql.group import GroupedData as PySparkGroupedData
 from pyspark.sql.pandas.group_ops import PandasCogroupedOps as PySparkPandasCogroupedOps
 from pyspark.sql.pandas.functions import _validate_pandas_udf  # type: ignore[attr-defined]
-from pyspark.sql.types import NumericType
-from pyspark.sql.types import StructType
+from pyspark.sql.types import NumericType, StructType
 
 import pyspark.sql.connect.plan as plan
 from pyspark.sql.column import Column
@@ -295,6 +294,8 @@ def applyInPandas(
         from pyspark.sql.connect.dataframe import DataFrame
 
         _validate_pandas_udf(func, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+        if isinstance(schema, str):
+            schema = cast(StructType, self._df._session._parse_ddl(schema))
         udf_obj = UserDefinedFunction(
             func,
             returnType=schema,
@@ -367,6 +368,8 @@ def applyInArrow(
         from pyspark.sql.connect.dataframe import DataFrame
 
         _validate_pandas_udf(func, PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF)
+        if isinstance(schema, str):
+            schema = cast(StructType, self._df._session._parse_ddl(schema))
         udf_obj = UserDefinedFunction(
             func,
             returnType=schema,
@@ -410,6 +413,8 @@ def applyInPandas(
         from pyspark.sql.connect.dataframe import DataFrame
 
         _validate_pandas_udf(func, PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF)
+        if isinstance(schema, str):
+            schema = cast(StructType, self._gd1._df._session._parse_ddl(schema))
         udf_obj = UserDefinedFunction(
             func,
             returnType=schema,
@@ -439,6 +444,8 @@ def applyInArrow(
         from pyspark.sql.connect.dataframe import DataFrame
 
         _validate_pandas_udf(func, PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF)
+        if isinstance(schema, str):
+            schema = cast(StructType, self._gd1._df._session._parse_ddl(schema))
         udf_obj = UserDefinedFunction(
             func,
             returnType=schema,
diff --git a/python/pyspark/sql/connect/logging.py b/python/pyspark/sql/connect/logging.py
index b80342cf99743..099193fd7ce45 100644
--- a/python/pyspark/sql/connect/logging.py
+++ b/python/pyspark/sql/connect/logging.py
@@ -21,13 +21,18 @@
 import os
 from typing import Optional
 
-__all__ = [
-    "getLogLevel",
-]
+__all__ = ["configureLogging", "getLogLevel"]
 
 
-def _configure_logging() -> logging.Logger:
-    """Configure logging for the Spark Connect clients."""
+def configureLogging(level: Optional[str] = None) -> logging.Logger:
+    """
+    Configure log level for Spark Connect components.
+    When not specified as a parameter, log level will be configured based on
+    the SPARK_CONNECT_LOG_LEVEL environment variable.
+    When both are absent, logging is disabled.
+
+    .. versionadded:: 4.0.0
+    """
     logger = PySparkLogger.getLogger(__name__)
     handler = logging.StreamHandler()
     handler.setFormatter(
@@ -35,8 +40,9 @@ def _configure_logging() -> logging.Logger:
     )
     logger.addHandler(handler)
 
-    # Check the environment variables for log levels:
-    if "SPARK_CONNECT_LOG_LEVEL" in os.environ:
+    if level is not None:
+        logger.setLevel(level.upper())
+    elif "SPARK_CONNECT_LOG_LEVEL" in os.environ:
         logger.setLevel(os.environ["SPARK_CONNECT_LOG_LEVEL"].upper())
     else:
         logger.disabled = True
@@ -44,7 +50,7 @@ def _configure_logging() -> logging.Logger:
 
 
 # Instantiate the logger based on the environment configuration.
-logger = _configure_logging()
+logger = configureLogging()
 
 
 def getLogLevel() -> Optional[int]:
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index b387ca1d4e508..02b60381ab939 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -52,7 +52,7 @@
 from pyspark.sql.connect.logging import logger
 from pyspark.sql.connect.proto import base_pb2 as spark_dot_connect_dot_base__pb2
 from pyspark.sql.connect.conversion import storage_level_to_proto
-from pyspark.sql.connect.expressions import Expression
+from pyspark.sql.connect.expressions import Expression, SubqueryExpression
 from pyspark.sql.connect.types import pyspark_types_to_proto_types, UnparsedDataType
 from pyspark.errors import (
     AnalysisException,
@@ -73,9 +73,30 @@ class LogicalPlan:
 
     INDENT = 2
 
-    def __init__(self, child: Optional["LogicalPlan"]) -> None:
+    def __init__(
+        self, child: Optional["LogicalPlan"], references: Optional[Sequence["LogicalPlan"]] = None
+    ) -> None:
+        """
+
+        Parameters
+        ----------
+        child : :class:`LogicalPlan`, optional.
+            The child logical plan.
+        references : list of :class:`LogicalPlan`, optional.
+            The list of logical plans that are referenced as subqueries in this logical plan.
+        """
         self._child = child
-        self._plan_id = LogicalPlan._fresh_plan_id()
+        self._root_plan_id = LogicalPlan._fresh_plan_id()
+
+        self._references: Sequence["LogicalPlan"] = references or []
+        self._plan_id_with_rel: Optional[int] = None
+        if len(self._references) > 0:
+            assert all(isinstance(r, LogicalPlan) for r in self._references)
+            self._plan_id_with_rel = LogicalPlan._fresh_plan_id()
+
+    @property
+    def _plan_id(self) -> int:
+        return self._plan_id_with_rel or self._root_plan_id
 
     @staticmethod
     def _fresh_plan_id() -> int:
@@ -89,7 +110,7 @@ def _fresh_plan_id() -> int:
 
     def _create_proto_relation(self) -> proto.Relation:
         plan = proto.Relation()
-        plan.common.plan_id = self._plan_id
+        plan.common.plan_id = self._root_plan_id
         return plan
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:  # type: ignore[empty-body]
@@ -136,6 +157,42 @@ def observations(self) -> Dict[str, "Observation"]:
         else:
             return self._child.observations
 
+    @staticmethod
+    def _collect_references(
+        cols_or_exprs: Sequence[Union[Column, Expression]]
+    ) -> Sequence["LogicalPlan"]:
+        references: List[LogicalPlan] = []
+
+        def append_reference(e: Expression) -> None:
+            if isinstance(e, SubqueryExpression):
+                references.append(e._plan)
+
+        for col_or_expr in cols_or_exprs:
+            if isinstance(col_or_expr, Column):
+                col_or_expr._expr.foreach(append_reference)
+            else:
+                col_or_expr.foreach(append_reference)
+        return references
+
+    def _with_relations(
+        self, root: proto.Relation, session: "SparkConnectClient"
+    ) -> proto.Relation:
+        if len(self._references) == 0:
+            return root
+        else:
+            # When there are references to other DataFrame, e.g., subqueries, build new plan like:
+            # with_relations [id 10]
+            #     root: plan  [id 9]
+            #     reference:
+            #          refs#1: [id 8]
+            #          refs#2: [id 5]
+            plan = proto.Relation()
+            assert isinstance(self._plan_id_with_rel, int)
+            plan.common.plan_id = self._plan_id_with_rel
+            plan.with_relations.root.CopyFrom(root)
+            plan.with_relations.references.extend([ref.plan(session) for ref in self._references])
+            return plan
+
     def _parameters_to_print(self, parameters: Mapping[str, Any]) -> Mapping[str, Any]:
         """
         Extracts the parameters that are able to be printed. It looks up the signature
@@ -192,6 +249,7 @@ def _parameters_to_print(self, parameters: Mapping[str, Any]) -> Mapping[str, An
                 getattr(a, "__forward_arg__", "").endswith("LogicalPlan")
                 for a in getattr(tpe.annotation, "__args__", ())
             )
+
             if (
                 not is_logical_plan
                 and not is_forwardref_logical_plan
@@ -205,7 +263,7 @@ def _parameters_to_print(self, parameters: Mapping[str, Any]) -> Mapping[str, An
                     try:
                         params[name] = getattr(self, "_" + name)
                     except AttributeError:
-                        pass  # Simpy ignore
+                        pass  # Simply ignore
         return params
 
     def print(self, indent: int = 0) -> str:
@@ -473,8 +531,8 @@ def __init__(
         child: Optional["LogicalPlan"],
         columns: List[Column],
     ) -> None:
-        super().__init__(child)
         assert all(isinstance(c, Column) for c in columns)
+        super().__init__(child, self._collect_references(columns))
         self._columns = columns
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
@@ -482,7 +540,8 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         plan = self._create_proto_relation()
         plan.project.input.CopyFrom(self._child.plan(session))
         plan.project.expressions.extend([c.to_plan(session) for c in self._columns])
-        return plan
+
+        return self._with_relations(plan, session)
 
 
 class WithColumns(LogicalPlan):
@@ -495,8 +554,6 @@ def __init__(
         columns: Sequence[Column],
         metadata: Optional[Sequence[str]] = None,
     ) -> None:
-        super().__init__(child)
-
         assert isinstance(columnNames, list)
         assert len(columnNames) > 0
         assert all(isinstance(c, str) for c in columnNames)
@@ -513,6 +570,8 @@ def __init__(
                 # validate json string
                 assert m == "" or json.loads(m) is not None
 
+        super().__init__(child, self._collect_references(columns))
+
         self._columnNames = columnNames
         self._columns = columns
         self._metadata = metadata
@@ -530,7 +589,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
                 alias.metadata = self._metadata[i]
             plan.with_columns.aliases.append(alias)
 
-        return plan
+        return self._with_relations(plan, session)
 
 
 class WithWatermark(LogicalPlan):
@@ -608,16 +667,14 @@ def __init__(
         name: str,
         parameters: Sequence[Column],
     ) -> None:
-        super().__init__(child)
-
         assert isinstance(name, str)
 
-        self._name = name
-
         assert parameters is not None and isinstance(parameters, List)
         for param in parameters:
             assert isinstance(param, Column)
 
+        super().__init__(child, self._collect_references(parameters))
+        self._name = name
         self._parameters = parameters
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
@@ -626,12 +683,12 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         plan.hint.input.CopyFrom(self._child.plan(session))
         plan.hint.name = self._name
         plan.hint.parameters.extend([param.to_plan(session) for param in self._parameters])
-        return plan
+        return self._with_relations(plan, session)
 
 
 class Filter(LogicalPlan):
     def __init__(self, child: Optional["LogicalPlan"], filter: Column) -> None:
-        super().__init__(child)
+        super().__init__(child, self._collect_references([filter]))
         self.filter = filter
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
@@ -639,7 +696,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         plan = self._create_proto_relation()
         plan.filter.input.CopyFrom(self._child.plan(session))
         plan.filter.condition.CopyFrom(self.filter.to_plan(session))
-        return plan
+        return self._with_relations(plan, session)
 
 
 class Limit(LogicalPlan):
@@ -712,11 +769,10 @@ def __init__(
         columns: List[Column],
         is_global: bool,
     ) -> None:
-        super().__init__(child)
-
         assert all(isinstance(c, Column) for c in columns)
         assert isinstance(is_global, bool)
 
+        super().__init__(child, self._collect_references(columns))
         self.columns = columns
         self.is_global = is_global
 
@@ -726,7 +782,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         plan.sort.input.CopyFrom(self._child.plan(session))
         plan.sort.order.extend([c.to_plan(session).sort_order for c in self.columns])
         plan.sort.is_global = self.is_global
-        return plan
+        return self._with_relations(plan, session)
 
 
 class Drop(LogicalPlan):
@@ -735,9 +791,12 @@ def __init__(
         child: Optional["LogicalPlan"],
         columns: List[Union[Column, str]],
     ) -> None:
-        super().__init__(child)
         if len(columns) > 0:
             assert all(isinstance(c, (Column, str)) for c in columns)
+
+        super().__init__(
+            child, self._collect_references([c for c in columns if isinstance(c, Column)])
+        )
         self._columns = columns
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
@@ -749,7 +808,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
                 plan.drop.columns.append(c.to_plan(session))
             else:
                 plan.drop.column_names.append(c)
-        return plan
+        return self._with_relations(plan, session)
 
 
 class Sample(LogicalPlan):
@@ -792,8 +851,6 @@ def __init__(
         pivot_values: Optional[Sequence[Column]],
         grouping_sets: Optional[Sequence[Sequence[Column]]],
     ) -> None:
-        super().__init__(child)
-
         assert isinstance(group_type, str) and group_type in [
             "groupby",
             "rollup",
@@ -801,15 +858,12 @@ def __init__(
             "pivot",
             "grouping_sets",
         ]
-        self._group_type = group_type
 
         assert isinstance(grouping_cols, list) and all(isinstance(c, Column) for c in grouping_cols)
-        self._grouping_cols = grouping_cols
 
         assert isinstance(aggregate_cols, list) and all(
             isinstance(c, Column) for c in aggregate_cols
         )
-        self._aggregate_cols = aggregate_cols
 
         if group_type == "pivot":
             assert pivot_col is not None and isinstance(pivot_col, Column)
@@ -821,6 +875,19 @@ def __init__(
             assert pivot_values is None
             assert grouping_sets is None
 
+        super().__init__(
+            child,
+            self._collect_references(
+                grouping_cols
+                + aggregate_cols
+                + ([pivot_col] if pivot_col is not None else [])
+                + (pivot_values if pivot_values is not None else [])
+                + ([g for gs in grouping_sets for g in gs] if grouping_sets is not None else [])
+            ),
+        )
+        self._group_type = group_type
+        self._grouping_cols = grouping_cols
+        self._aggregate_cols = aggregate_cols
         self._pivot_col = pivot_col
         self._pivot_values = pivot_values
         self._grouping_sets = grouping_sets
@@ -859,7 +926,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
                         grouping_set=[c.to_plan(session) for c in grouping_set]
                     )
                 )
-        return plan
+        return self._with_relations(plan, session)
 
 
 class Join(LogicalPlan):
@@ -870,7 +937,16 @@ def __init__(
         on: Optional[Union[str, List[str], Column, List[Column]]],
         how: Optional[str],
     ) -> None:
-        super().__init__(left)
+        super().__init__(
+            left,
+            self._collect_references(
+                []
+                if on is None or isinstance(on, str)
+                else [on]
+                if isinstance(on, Column)
+                else [c for c in on if isinstance(c, Column)]
+            ),
+        )
         self.left = cast(LogicalPlan, left)
         self.right = right
         self.on = on
@@ -893,7 +969,35 @@ def __init__(
         else:
             raise AnalysisException(
                 errorClass="UNSUPPORTED_JOIN_TYPE",
-                messageParameters={"join_type": how},
+                messageParameters={
+                    "typ": how,
+                    "supported": (
+                        "'"
+                        + "', '".join(
+                            [
+                                "inner",
+                                "outer",
+                                "full",
+                                "fullouter",
+                                "full_outer",
+                                "leftouter",
+                                "left",
+                                "left_outer",
+                                "rightouter",
+                                "right",
+                                "right_outer",
+                                "leftsemi",
+                                "left_semi",
+                                "semi",
+                                "leftanti",
+                                "left_anti",
+                                "anti",
+                                "cross",
+                            ]
+                        )
+                        + "'"
+                    ),
+                },
             )
         self.how = join_type
 
@@ -914,7 +1018,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
                     merge_column = functools.reduce(lambda c1, c2: c1 & c2, self.on)
                     plan.join.join_condition.CopyFrom(cast(Column, merge_column).to_plan(session))
         plan.join.join_type = self.how
-        return plan
+        return self._with_relations(plan, session)
 
     @property
     def observations(self) -> Dict[str, "Observation"]:
@@ -954,7 +1058,20 @@ def __init__(
         allow_exact_matches: bool,
         direction: str,
     ) -> None:
-        super().__init__(left)
+        super().__init__(
+            left,
+            self._collect_references(
+                [left_as_of, right_as_of]
+                + (
+                    []
+                    if on is None or isinstance(on, str)
+                    else [on]
+                    if isinstance(on, Column)
+                    else [c for c in on if isinstance(c, Column)]
+                )
+                + ([tolerance] if tolerance is not None else [])
+            ),
+        )
         self.left = left
         self.right = right
         self.left_as_of = left_as_of
@@ -994,7 +1111,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         plan.as_of_join.allow_exact_matches = self.allow_exact_matches
         plan.as_of_join.direction = self.direction
 
-        return plan
+        return self._with_relations(plan, session)
 
     @property
     def observations(self) -> Dict[str, "Observation"]:
@@ -1028,6 +1145,74 @@ def _repr_html_(self) -> str:
         """
 
 
+class LateralJoin(LogicalPlan):
+    def __init__(
+        self,
+        left: Optional[LogicalPlan],
+        right: LogicalPlan,
+        on: Optional[Column],
+        how: Optional[str],
+    ) -> None:
+        super().__init__(left, self._collect_references([on] if on is not None else []))
+        self.left = cast(LogicalPlan, left)
+        self.right = right
+        self.on = on
+        if how is None:
+            join_type = proto.Join.JoinType.JOIN_TYPE_INNER
+        elif how == "inner":
+            join_type = proto.Join.JoinType.JOIN_TYPE_INNER
+        elif how in ["leftouter", "left"]:
+            join_type = proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER
+        elif how == "cross":
+            join_type = proto.Join.JoinType.JOIN_TYPE_CROSS
+        else:
+            raise AnalysisException(
+                errorClass="UNSUPPORTED_JOIN_TYPE",
+                messageParameters={
+                    "typ": how,
+                    "supported": (
+                        "'"
+                        + "', '".join(["inner", "leftouter", "left", "left_outer", "cross"])
+                        + "'"
+                    ),
+                },
+            )
+        self.how = join_type
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        plan.lateral_join.left.CopyFrom(self.left.plan(session))
+        plan.lateral_join.right.CopyFrom(self.right.plan(session))
+        if self.on is not None:
+            plan.lateral_join.join_condition.CopyFrom(self.on.to_plan(session))
+        plan.lateral_join.join_type = self.how
+        return self._with_relations(plan, session)
+
+    @property
+    def observations(self) -> Dict[str, "Observation"]:
+        return dict(**super().observations, **self.right.observations)
+
+    def print(self, indent: int = 0) -> str:
+        i = " " * indent
+        o = " " * (indent + LogicalPlan.INDENT)
+        n = indent + LogicalPlan.INDENT * 2
+        return (
+            f"{i}<LateralJoin on={self.on} how={self.how}>\n{o}"
+            f"left=\n{self.left.print(n)}\n{o}right=\n{self.right.print(n)}"
+        )
+
+    def _repr_html_(self) -> str:
+        return f"""
+        <ul>
+            <li>
+                <b>LateralJoin</b><br />
+                Left: {self.left._repr_html_()}
+                Right: {self.right._repr_html_()}
+            </li>
+        </uL>
+        """
+
+
 class SetOperation(LogicalPlan):
     def __init__(
         self,
@@ -1129,9 +1314,9 @@ def __init__(
         num_partitions: Optional[int],
         columns: List[Column],
     ) -> None:
-        super().__init__(child)
-        self.num_partitions = num_partitions
         assert all(isinstance(c, Column) for c in columns)
+        super().__init__(child, self._collect_references(columns))
+        self.num_partitions = num_partitions
         self.columns = columns
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
@@ -1144,7 +1329,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
             plan.repartition_by_expression.input.CopyFrom(self._child.plan(session))
         if self.num_partitions is not None:
             plan.repartition_by_expression.num_partitions = self.num_partitions
-        return plan
+        return self._with_relations(plan, session)
 
 
 class SubqueryAlias(LogicalPlan):
@@ -1190,8 +1375,6 @@ def __init__(
         named_args: Optional[Dict[str, Column]] = None,
         views: Optional[Sequence[SubqueryAlias]] = None,
     ) -> None:
-        super().__init__(None)
-
         if args is not None:
             assert isinstance(args, List)
             assert all(isinstance(arg, Column) for arg in args)
@@ -1205,10 +1388,8 @@ def __init__(
         if views is not None:
             assert isinstance(views, List)
             assert all(isinstance(v, SubqueryAlias) for v in views)
-            if len(views) > 0:
-                # reserved plan id for WithRelations
-                self._plan_id_with_rel = LogicalPlan._fresh_plan_id()
 
+        super().__init__(None, views)
         self._query = query
         self._args = args
         self._named_args = named_args
@@ -1224,20 +1405,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
             for k, arg in self._named_args.items():
                 plan.sql.named_arguments[k].CopyFrom(arg.to_plan(session))
 
-        if self._views is not None and len(self._views) > 0:
-            # build new plan like
-            # with_relations [id 10]
-            #     root: sql  [id 9]
-            #     reference:
-            #          view#1: [id 8]
-            #          view#2: [id 5]
-            sql_plan = plan
-            plan = proto.Relation()
-            plan.common.plan_id = self._plan_id_with_rel
-            plan.with_relations.root.CopyFrom(sql_plan)
-            plan.with_relations.references.extend([v.plan(session) for v in self._views])
-
-        return plan
+        return self._with_relations(plan, session)
 
     def command(self, session: "SparkConnectClient") -> proto.Command:
         cmd = proto.Command()
@@ -1311,7 +1479,7 @@ def __init__(
         variable_column_name: str,
         value_column_name: str,
     ) -> None:
-        super().__init__(child)
+        super().__init__(child, self._collect_references(ids + (values or [])))
         self.ids = ids
         self.values = values
         self.variable_column_name = variable_column_name
@@ -1326,7 +1494,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
             plan.unpivot.values.values.extend([v.to_plan(session) for v in self.values])
         plan.unpivot.variable_column_name = self.variable_column_name
         plan.unpivot.value_column_name = self.value_column_name
-        return plan
+        return self._with_relations(plan, session)
 
 
 class Transpose(LogicalPlan):
@@ -1337,7 +1505,7 @@ def __init__(
         child: Optional["LogicalPlan"],
         index_columns: Sequence[Column],
     ) -> None:
-        super().__init__(child)
+        super().__init__(child, self._collect_references(index_columns))
         self.index_columns = index_columns
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
@@ -1347,12 +1515,12 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         if self.index_columns is not None and len(self.index_columns) > 0:
             for index_column in self.index_columns:
                 plan.transpose.index_columns.append(index_column.to_plan(session))
-        return plan
+        return self._with_relations(plan, session)
 
 
 class UnresolvedTableValuedFunction(LogicalPlan):
     def __init__(self, name: str, args: Sequence[Column]):
-        super().__init__(None)
+        super().__init__(None, self._collect_references(args))
         self._name = name
         self._args = args
 
@@ -1361,7 +1529,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         plan.unresolved_table_valued_function.function_name = self._name
         for arg in self._args:
             plan.unresolved_table_valued_function.arguments.append(arg.to_plan(session))
-        return plan
+        return self._with_relations(plan, session)
 
 
 class CollectMetrics(LogicalPlan):
@@ -1373,9 +1541,9 @@ def __init__(
         observation: Union[str, "Observation"],
         exprs: List[Column],
     ) -> None:
-        super().__init__(child)
-        self._observation = observation
         assert all(isinstance(e, Column) for e in exprs)
+        super().__init__(child, self._collect_references(exprs))
+        self._observation = observation
         self._exprs = exprs
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
@@ -1388,7 +1556,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
             else str(self._observation._name)
         )
         plan.collect_metrics.metrics.extend([e.to_plan(session) for e in self._exprs])
-        return plan
+        return self._with_relations(plan, session)
 
     @property
     def observations(self) -> Dict[str, "Observation"]:
@@ -1473,13 +1641,13 @@ def __init__(
         cols: Optional[List[str]],
         replacements: Sequence[Tuple[Column, Column]],
     ) -> None:
-        super().__init__(child)
-        self.cols = cols
-
         assert replacements is not None and isinstance(replacements, List)
         for k, v in replacements:
             assert k is not None and isinstance(k, Column)
             assert v is not None and isinstance(v, Column)
+
+        super().__init__(child, self._collect_references([e for t in replacements for e in t]))
+        self.cols = cols
         self.replacements = replacements
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
@@ -1494,7 +1662,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
                 replacement.old_value.CopyFrom(old_value.to_plan(session).literal)
                 replacement.new_value.CopyFrom(new_value.to_plan(session).literal)
                 plan.replace.replacements.append(replacement)
-        return plan
+        return self._with_relations(plan, session)
 
 
 class StatSummary(LogicalPlan):
@@ -1604,8 +1772,6 @@ def __init__(
         fractions: Sequence[Tuple[Column, float]],
         seed: int,
     ) -> None:
-        super().__init__(child)
-
         assert col is not None and isinstance(col, (Column, str))
 
         assert fractions is not None and isinstance(fractions, List)
@@ -1615,6 +1781,12 @@ def __init__(
 
         assert seed is None or isinstance(seed, int)
 
+        super().__init__(
+            child,
+            self._collect_references(
+                [col] if isinstance(col, Column) else [] + [c for c, _ in fractions]
+            ),
+        )
         self._col = col
         self._fractions = fractions
         self._seed = seed
@@ -1631,7 +1803,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
                 fraction.fraction = float(v)
                 plan.sample_by.fractions.append(fraction)
         plan.sample_by.seed = self._seed
-        return plan
+        return self._with_relations(plan, session)
 
 
 class StatCorr(LogicalPlan):
@@ -2279,7 +2451,7 @@ def __init__(
     ):
         assert isinstance(grouping_cols, list) and all(isinstance(c, Column) for c in grouping_cols)
 
-        super().__init__(child)
+        super().__init__(child, self._collect_references(grouping_cols))
         self._grouping_cols = grouping_cols
         self._function = function._build_common_inline_user_defined_function(*cols)
 
@@ -2291,7 +2463,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
             [c.to_plan(session) for c in self._grouping_cols]
         )
         plan.group_map.func.CopyFrom(self._function.to_plan_udf(session))
-        return plan
+        return self._with_relations(plan, session)
 
 
 class CoGroupMap(LogicalPlan):
@@ -2312,7 +2484,7 @@ def __init__(
             isinstance(c, Column) for c in other_grouping_cols
         )
 
-        super().__init__(input)
+        super().__init__(input, self._collect_references(input_grouping_cols + other_grouping_cols))
         self._input_grouping_cols = input_grouping_cols
         self._other_grouping_cols = other_grouping_cols
         self._other = cast(LogicalPlan, other)
@@ -2332,7 +2504,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
             [c.to_plan(session) for c in self._other_grouping_cols]
         )
         plan.co_group_map.func.CopyFrom(self._function.to_plan_udf(session))
-        return plan
+        return self._with_relations(plan, session)
 
 
 class ApplyInPandasWithState(LogicalPlan):
@@ -2351,7 +2523,7 @@ def __init__(
     ):
         assert isinstance(grouping_cols, list) and all(isinstance(c, Column) for c in grouping_cols)
 
-        super().__init__(child)
+        super().__init__(child, self._collect_references(grouping_cols))
         self._grouping_cols = grouping_cols
         self._function = function._build_common_inline_user_defined_function(*cols)
         self._output_schema = output_schema
@@ -2371,7 +2543,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         plan.apply_in_pandas_with_state.state_schema = self._state_schema
         plan.apply_in_pandas_with_state.output_mode = self._output_mode
         plan.apply_in_pandas_with_state.timeout_conf = self._timeout_conf
-        return plan
+        return self._with_relations(plan, session)
 
 
 class PythonUDTF:
@@ -2435,7 +2607,7 @@ def __init__(
         deterministic: bool,
         arguments: Sequence[Expression],
     ) -> None:
-        super().__init__(None)
+        super().__init__(None, self._collect_references(arguments))
         self._function_name = function_name
         self._deterministic = deterministic
         self._arguments = arguments
@@ -2452,7 +2624,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         plan.common_inline_user_defined_table_function.python_udtf.CopyFrom(
             self._function.to_plan(session)
         )
-        return plan
+        return self._with_relations(plan, session)
 
     def udtf_plan(
         self, session: "SparkConnectClient"
diff --git a/python/pyspark/sql/connect/proto/__init__.py b/python/pyspark/sql/connect/proto/__init__.py
index 3e8d074d963dc..0877696c2680e 100644
--- a/python/pyspark/sql/connect/proto/__init__.py
+++ b/python/pyspark/sql/connect/proto/__init__.py
@@ -23,3 +23,5 @@
 from pyspark.sql.connect.proto.relations_pb2 import *
 from pyspark.sql.connect.proto.catalog_pb2 import *
 from pyspark.sql.connect.proto.common_pb2 import *
+from pyspark.sql.connect.proto.ml_pb2 import *
+from pyspark.sql.connect.proto.ml_common_pb2 import *
diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py
index 620f413f62c00..2fbc4287db786 100644
--- a/python/pyspark/sql/connect/proto/base_pb2.py
+++ b/python/pyspark/sql/connect/proto/base_pb2.py
@@ -40,10 +40,11 @@
 from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_expressions__pb2
 from pyspark.sql.connect.proto import relations_pb2 as spark_dot_connect_dot_relations__pb2
 from pyspark.sql.connect.proto import types_pb2 as spark_dot_connect_dot_types__pb2
+from pyspark.sql.connect.proto import ml_pb2 as spark_dot_connect_dot_ml__pb2
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf8\x13\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x11 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xce\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xa3\x05\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x01R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1a\xa5\x01\n\rRequestOption\x12K\n\x10reattach_options\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ReattachOptionsH\x00R\x0freattachOptions\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB)\n\'_client_observed_server_side_session_idB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\xe6\x16\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12\x1f\n\x0bresponse_id\x18\r \x01(\tR\nresponseId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\x87\x01\n&streaming_query_listener_events_result\x18\x10 \x01(\x0b\x32\x31.spark.connect.StreamingQueryListenerEventsResultH\x00R"streamingQueryListenerEventsResult\x12\\\n\x0fresult_complete\x18\x0e \x01(\x0b\x32\x31.spark.connect.ExecutePlanResponse.ResultCompleteH\x00R\x0eresultComplete\x12\x87\x01\n&create_resource_profile_command_result\x18\x11 \x01(\x0b\x32\x31.spark.connect.CreateResourceProfileCommandResultH\x00R"createResourceProfileCommandResult\x12\x65\n\x12\x65xecution_progress\x18\x12 \x01(\x0b\x32\x34.spark.connect.ExecutePlanResponse.ExecutionProgressH\x00R\x11\x65xecutionProgress\x12\x64\n\x19\x63heckpoint_command_result\x18\x13 \x01(\x0b\x32&.spark.connect.CheckpointCommandResultH\x00R\x17\x63heckpointCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1av\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x12&\n\x0cstart_offset\x18\x03 \x01(\x03H\x00R\x0bstartOffset\x88\x01\x01\x42\x0f\n\r_start_offset\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a\x8d\x01\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x12\n\x04keys\x18\x03 \x03(\tR\x04keys\x12\x17\n\x07plan_id\x18\x04 \x01(\x03R\x06planId\x1a\x10\n\x0eResultComplete\x1a\xcd\x02\n\x11\x45xecutionProgress\x12V\n\x06stages\x18\x01 \x03(\x0b\x32>.spark.connect.ExecutePlanResponse.ExecutionProgress.StageInfoR\x06stages\x12,\n\x12num_inflight_tasks\x18\x02 \x01(\x03R\x10numInflightTasks\x1a\xb1\x01\n\tStageInfo\x12\x19\n\x08stage_id\x18\x01 \x01(\x03R\x07stageId\x12\x1b\n\tnum_tasks\x18\x02 \x01(\x03R\x08numTasks\x12.\n\x13num_completed_tasks\x18\x03 \x01(\x03R\x11numCompletedTasks\x12(\n\x10input_bytes_read\x18\x04 \x01(\x03R\x0einputBytesRead\x12\x12\n\x04\x64one\x18\x05 \x01(\x08R\x04\x64oneB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x87\t\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xaf\x01\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x04 \x01(\tR\x13serverSideSessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xea\x07\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x02R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x02\n\x14\x41\x64\x64\x41rtifactsResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc6\x02\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xe0\x02\n\x18\x41rtifactStatusesResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists"\xdb\x04\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"\x80\x01\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x1f\n\x1bINTERRUPT_TYPE_OPERATION_ID\x10\x03\x42\x0b\n\tinterruptB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x01\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds"5\n\x0fReattachOptions\x12"\n\x0creattachable\x18\x01 \x01(\x08R\x0creattachable"\x96\x03\n\x16ReattachExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x06 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12-\n\x10last_response_id\x18\x05 \x01(\tH\x02R\x0elastResponseId\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_typeB\x13\n\x11_last_response_id"\xc9\x04\n\x15ReleaseExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12R\n\x0brelease_all\x18\x05 \x01(\x0b\x32/.spark.connect.ReleaseExecuteRequest.ReleaseAllH\x00R\nreleaseAll\x12X\n\rrelease_until\x18\x06 \x01(\x0b\x32\x31.spark.connect.ReleaseExecuteRequest.ReleaseUntilH\x00R\x0creleaseUntil\x1a\x0c\n\nReleaseAll\x1a/\n\x0cReleaseUntil\x12\x1f\n\x0bresponse_id\x18\x01 \x01(\tR\nresponseIdB\t\n\x07releaseB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xa5\x01\n\x16ReleaseExecuteResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12&\n\x0coperation_id\x18\x02 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x42\x0f\n\r_operation_id"\xab\x01\n\x15ReleaseSessionRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"l\n\x16ReleaseSessionResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x02 \x01(\tR\x13serverSideSessionId"\xcc\x02\n\x18\x46\x65tchErrorDetailsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x19\n\x08\x65rror_id\x18\x03 \x01(\tR\x07\x65rrorId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x93\x0c\n\x19\x46\x65tchErrorDetailsResponse\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\x1d\n\nsession_id\x18\x04 \x01(\tR\tsessionId\x12)\n\x0eroot_error_idx\x18\x01 \x01(\x05H\x00R\x0crootErrorIdx\x88\x01\x01\x12\x46\n\x06\x65rrors\x18\x02 \x03(\x0b\x32..spark.connect.FetchErrorDetailsResponse.ErrorR\x06\x65rrors\x1a\xae\x01\n\x11StackTraceElement\x12\'\n\x0f\x64\x65\x63laring_class\x18\x01 \x01(\tR\x0e\x64\x65\x63laringClass\x12\x1f\n\x0bmethod_name\x18\x02 \x01(\tR\nmethodName\x12 \n\tfile_name\x18\x03 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12\x1f\n\x0bline_number\x18\x04 \x01(\x05R\nlineNumberB\x0c\n\n_file_name\x1a\xf0\x02\n\x0cQueryContext\x12\x64\n\x0c\x63ontext_type\x18\n \x01(\x0e\x32\x41.spark.connect.FetchErrorDetailsResponse.QueryContext.ContextTypeR\x0b\x63ontextType\x12\x1f\n\x0bobject_type\x18\x01 \x01(\tR\nobjectType\x12\x1f\n\x0bobject_name\x18\x02 \x01(\tR\nobjectName\x12\x1f\n\x0bstart_index\x18\x03 \x01(\x05R\nstartIndex\x12\x1d\n\nstop_index\x18\x04 \x01(\x05R\tstopIndex\x12\x1a\n\x08\x66ragment\x18\x05 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x06 \x01(\tR\x08\x63\x61llSite\x12\x18\n\x07summary\x18\x07 \x01(\tR\x07summary"%\n\x0b\x43ontextType\x12\x07\n\x03SQL\x10\x00\x12\r\n\tDATAFRAME\x10\x01\x1a\x99\x03\n\x0eSparkThrowable\x12$\n\x0b\x65rror_class\x18\x01 \x01(\tH\x00R\nerrorClass\x88\x01\x01\x12}\n\x12message_parameters\x18\x02 \x03(\x0b\x32N.spark.connect.FetchErrorDetailsResponse.SparkThrowable.MessageParametersEntryR\x11messageParameters\x12\\\n\x0equery_contexts\x18\x03 \x03(\x0b\x32\x35.spark.connect.FetchErrorDetailsResponse.QueryContextR\rqueryContexts\x12 \n\tsql_state\x18\x04 \x01(\tH\x01R\x08sqlState\x88\x01\x01\x1a\x44\n\x16MessageParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0e\n\x0c_error_classB\x0c\n\n_sql_state\x1a\xdb\x02\n\x05\x45rror\x12\x30\n\x14\x65rror_type_hierarchy\x18\x01 \x03(\tR\x12\x65rrorTypeHierarchy\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12[\n\x0bstack_trace\x18\x03 \x03(\x0b\x32:.spark.connect.FetchErrorDetailsResponse.StackTraceElementR\nstackTrace\x12 \n\tcause_idx\x18\x04 \x01(\x05H\x00R\x08\x63\x61useIdx\x88\x01\x01\x12\x65\n\x0fspark_throwable\x18\x05 \x01(\x0b\x32\x37.spark.connect.FetchErrorDetailsResponse.SparkThrowableH\x01R\x0esparkThrowable\x88\x01\x01\x42\x0c\n\n_cause_idxB\x12\n\x10_spark_throwableB\x11\n\x0f_root_error_idx"Z\n\x17\x43heckpointCommandResult\x12?\n\x08relation\x18\x01 \x01(\x0b\x32#.spark.connect.CachedRemoteRelationR\x08relation2\xb2\x07\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x12`\n\x0fReattachExecute\x12%.spark.connect.ReattachExecuteRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12_\n\x0eReleaseExecute\x12$.spark.connect.ReleaseExecuteRequest\x1a%.spark.connect.ReleaseExecuteResponse"\x00\x12_\n\x0eReleaseSession\x12$.spark.connect.ReleaseSessionRequest\x1a%.spark.connect.ReleaseSessionResponse"\x00\x12h\n\x11\x46\x65tchErrorDetails\x12\'.spark.connect.FetchErrorDetailsRequest\x1a(.spark.connect.FetchErrorDetailsResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto\x1a\x16spark/connect/ml.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf5\x14\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x11 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x12M\n\x0bjson_to_ddl\x18\x12 \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.JsonToDDLH\x00R\tjsonToDdl\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1a,\n\tJsonToDDL\x12\x1f\n\x0bjson_string\x18\x01 \x01(\tR\njsonStringB\t\n\x07\x61nalyzeB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xca\x0e\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x12N\n\x0bjson_to_ddl\x18\x10 \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.JsonToDDLH\x00R\tjsonToDdl\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevel\x1a*\n\tJsonToDDL\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlStringB\x08\n\x06result"\xa3\x05\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x01R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1a\xa5\x01\n\rRequestOption\x12K\n\x10reattach_options\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ReattachOptionsH\x00R\x0freattachOptions\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB)\n\'_client_observed_server_side_session_idB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\xb4\x17\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12\x1f\n\x0bresponse_id\x18\r \x01(\tR\nresponseId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\x87\x01\n&streaming_query_listener_events_result\x18\x10 \x01(\x0b\x32\x31.spark.connect.StreamingQueryListenerEventsResultH\x00R"streamingQueryListenerEventsResult\x12\\\n\x0fresult_complete\x18\x0e \x01(\x0b\x32\x31.spark.connect.ExecutePlanResponse.ResultCompleteH\x00R\x0eresultComplete\x12\x87\x01\n&create_resource_profile_command_result\x18\x11 \x01(\x0b\x32\x31.spark.connect.CreateResourceProfileCommandResultH\x00R"createResourceProfileCommandResult\x12\x65\n\x12\x65xecution_progress\x18\x12 \x01(\x0b\x32\x34.spark.connect.ExecutePlanResponse.ExecutionProgressH\x00R\x11\x65xecutionProgress\x12\x64\n\x19\x63heckpoint_command_result\x18\x13 \x01(\x0b\x32&.spark.connect.CheckpointCommandResultH\x00R\x17\x63heckpointCommandResult\x12L\n\x11ml_command_result\x18\x14 \x01(\x0b\x32\x1e.spark.connect.MlCommandResultH\x00R\x0fmlCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1av\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x12&\n\x0cstart_offset\x18\x03 \x01(\x03H\x00R\x0bstartOffset\x88\x01\x01\x42\x0f\n\r_start_offset\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a\x8d\x01\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x12\n\x04keys\x18\x03 \x03(\tR\x04keys\x12\x17\n\x07plan_id\x18\x04 \x01(\x03R\x06planId\x1a\x10\n\x0eResultComplete\x1a\xcd\x02\n\x11\x45xecutionProgress\x12V\n\x06stages\x18\x01 \x03(\x0b\x32>.spark.connect.ExecutePlanResponse.ExecutionProgress.StageInfoR\x06stages\x12,\n\x12num_inflight_tasks\x18\x02 \x01(\x03R\x10numInflightTasks\x1a\xb1\x01\n\tStageInfo\x12\x19\n\x08stage_id\x18\x01 \x01(\x03R\x07stageId\x12\x1b\n\tnum_tasks\x18\x02 \x01(\x03R\x08numTasks\x12.\n\x13num_completed_tasks\x18\x03 \x01(\x03R\x11numCompletedTasks\x12(\n\x10input_bytes_read\x18\x04 \x01(\x03R\x0einputBytesRead\x12\x12\n\x04\x64one\x18\x05 \x01(\x08R\x04\x64oneB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\xaf\t\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\\\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1b\n\x06silent\x18\x02 \x01(\x08H\x00R\x06silent\x88\x01\x01\x42\t\n\x07_silent\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xaf\x01\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x04 \x01(\tR\x13serverSideSessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xea\x07\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x02R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x02\n\x14\x41\x64\x64\x41rtifactsResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc6\x02\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xe0\x02\n\x18\x41rtifactStatusesResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists"\xdb\x04\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"\x80\x01\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x1f\n\x1bINTERRUPT_TYPE_OPERATION_ID\x10\x03\x42\x0b\n\tinterruptB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x01\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds"5\n\x0fReattachOptions\x12"\n\x0creattachable\x18\x01 \x01(\x08R\x0creattachable"\x96\x03\n\x16ReattachExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x06 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12-\n\x10last_response_id\x18\x05 \x01(\tH\x02R\x0elastResponseId\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_typeB\x13\n\x11_last_response_id"\xc9\x04\n\x15ReleaseExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12R\n\x0brelease_all\x18\x05 \x01(\x0b\x32/.spark.connect.ReleaseExecuteRequest.ReleaseAllH\x00R\nreleaseAll\x12X\n\rrelease_until\x18\x06 \x01(\x0b\x32\x31.spark.connect.ReleaseExecuteRequest.ReleaseUntilH\x00R\x0creleaseUntil\x1a\x0c\n\nReleaseAll\x1a/\n\x0cReleaseUntil\x12\x1f\n\x0bresponse_id\x18\x01 \x01(\tR\nresponseIdB\t\n\x07releaseB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xa5\x01\n\x16ReleaseExecuteResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12&\n\x0coperation_id\x18\x02 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x42\x0f\n\r_operation_id"\xd4\x01\n\x15ReleaseSessionRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x12\'\n\x0f\x61llow_reconnect\x18\x04 \x01(\x08R\x0e\x61llowReconnectB\x0e\n\x0c_client_type"l\n\x16ReleaseSessionResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x02 \x01(\tR\x13serverSideSessionId"\xcc\x02\n\x18\x46\x65tchErrorDetailsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x19\n\x08\x65rror_id\x18\x03 \x01(\tR\x07\x65rrorId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x93\x0c\n\x19\x46\x65tchErrorDetailsResponse\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\x1d\n\nsession_id\x18\x04 \x01(\tR\tsessionId\x12)\n\x0eroot_error_idx\x18\x01 \x01(\x05H\x00R\x0crootErrorIdx\x88\x01\x01\x12\x46\n\x06\x65rrors\x18\x02 \x03(\x0b\x32..spark.connect.FetchErrorDetailsResponse.ErrorR\x06\x65rrors\x1a\xae\x01\n\x11StackTraceElement\x12\'\n\x0f\x64\x65\x63laring_class\x18\x01 \x01(\tR\x0e\x64\x65\x63laringClass\x12\x1f\n\x0bmethod_name\x18\x02 \x01(\tR\nmethodName\x12 \n\tfile_name\x18\x03 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12\x1f\n\x0bline_number\x18\x04 \x01(\x05R\nlineNumberB\x0c\n\n_file_name\x1a\xf0\x02\n\x0cQueryContext\x12\x64\n\x0c\x63ontext_type\x18\n \x01(\x0e\x32\x41.spark.connect.FetchErrorDetailsResponse.QueryContext.ContextTypeR\x0b\x63ontextType\x12\x1f\n\x0bobject_type\x18\x01 \x01(\tR\nobjectType\x12\x1f\n\x0bobject_name\x18\x02 \x01(\tR\nobjectName\x12\x1f\n\x0bstart_index\x18\x03 \x01(\x05R\nstartIndex\x12\x1d\n\nstop_index\x18\x04 \x01(\x05R\tstopIndex\x12\x1a\n\x08\x66ragment\x18\x05 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x06 \x01(\tR\x08\x63\x61llSite\x12\x18\n\x07summary\x18\x07 \x01(\tR\x07summary"%\n\x0b\x43ontextType\x12\x07\n\x03SQL\x10\x00\x12\r\n\tDATAFRAME\x10\x01\x1a\x99\x03\n\x0eSparkThrowable\x12$\n\x0b\x65rror_class\x18\x01 \x01(\tH\x00R\nerrorClass\x88\x01\x01\x12}\n\x12message_parameters\x18\x02 \x03(\x0b\x32N.spark.connect.FetchErrorDetailsResponse.SparkThrowable.MessageParametersEntryR\x11messageParameters\x12\\\n\x0equery_contexts\x18\x03 \x03(\x0b\x32\x35.spark.connect.FetchErrorDetailsResponse.QueryContextR\rqueryContexts\x12 \n\tsql_state\x18\x04 \x01(\tH\x01R\x08sqlState\x88\x01\x01\x1a\x44\n\x16MessageParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0e\n\x0c_error_classB\x0c\n\n_sql_state\x1a\xdb\x02\n\x05\x45rror\x12\x30\n\x14\x65rror_type_hierarchy\x18\x01 \x03(\tR\x12\x65rrorTypeHierarchy\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12[\n\x0bstack_trace\x18\x03 \x03(\x0b\x32:.spark.connect.FetchErrorDetailsResponse.StackTraceElementR\nstackTrace\x12 \n\tcause_idx\x18\x04 \x01(\x05H\x00R\x08\x63\x61useIdx\x88\x01\x01\x12\x65\n\x0fspark_throwable\x18\x05 \x01(\x0b\x32\x37.spark.connect.FetchErrorDetailsResponse.SparkThrowableH\x01R\x0esparkThrowable\x88\x01\x01\x42\x0c\n\n_cause_idxB\x12\n\x10_spark_throwableB\x11\n\x0f_root_error_idx"Z\n\x17\x43heckpointCommandResult\x12?\n\x08relation\x18\x01 \x01(\x0b\x32#.spark.connect.CachedRemoteRelationR\x08relation2\xb2\x07\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x12`\n\x0fReattachExecute\x12%.spark.connect.ReattachExecuteRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12_\n\x0eReleaseExecute\x12$.spark.connect.ReleaseExecuteRequest\x1a%.spark.connect.ReleaseExecuteResponse"\x00\x12_\n\x0eReleaseSession\x12$.spark.connect.ReleaseSessionRequest\x1a%.spark.connect.ReleaseSessionResponse"\x00\x12h\n\x11\x46\x65tchErrorDetails\x12\'.spark.connect.FetchErrorDetailsRequest\x1a(.spark.connect.FetchErrorDetailsResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _globals = globals()
@@ -68,186 +69,190 @@
     _globals[
         "_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY"
     ]._serialized_options = b"8\001"
-    _globals["_PLAN"]._serialized_start = 219
-    _globals["_PLAN"]._serialized_end = 335
-    _globals["_USERCONTEXT"]._serialized_start = 337
-    _globals["_USERCONTEXT"]._serialized_end = 459
-    _globals["_ANALYZEPLANREQUEST"]._serialized_start = 462
-    _globals["_ANALYZEPLANREQUEST"]._serialized_end = 3014
-    _globals["_ANALYZEPLANREQUEST_SCHEMA"]._serialized_start = 1745
-    _globals["_ANALYZEPLANREQUEST_SCHEMA"]._serialized_end = 1794
-    _globals["_ANALYZEPLANREQUEST_EXPLAIN"]._serialized_start = 1797
-    _globals["_ANALYZEPLANREQUEST_EXPLAIN"]._serialized_end = 2112
-    _globals["_ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE"]._serialized_start = 1940
-    _globals["_ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE"]._serialized_end = 2112
-    _globals["_ANALYZEPLANREQUEST_TREESTRING"]._serialized_start = 2114
-    _globals["_ANALYZEPLANREQUEST_TREESTRING"]._serialized_end = 2204
-    _globals["_ANALYZEPLANREQUEST_ISLOCAL"]._serialized_start = 2206
-    _globals["_ANALYZEPLANREQUEST_ISLOCAL"]._serialized_end = 2256
-    _globals["_ANALYZEPLANREQUEST_ISSTREAMING"]._serialized_start = 2258
-    _globals["_ANALYZEPLANREQUEST_ISSTREAMING"]._serialized_end = 2312
-    _globals["_ANALYZEPLANREQUEST_INPUTFILES"]._serialized_start = 2314
-    _globals["_ANALYZEPLANREQUEST_INPUTFILES"]._serialized_end = 2367
-    _globals["_ANALYZEPLANREQUEST_SPARKVERSION"]._serialized_start = 2369
-    _globals["_ANALYZEPLANREQUEST_SPARKVERSION"]._serialized_end = 2383
-    _globals["_ANALYZEPLANREQUEST_DDLPARSE"]._serialized_start = 2385
-    _globals["_ANALYZEPLANREQUEST_DDLPARSE"]._serialized_end = 2426
-    _globals["_ANALYZEPLANREQUEST_SAMESEMANTICS"]._serialized_start = 2428
-    _globals["_ANALYZEPLANREQUEST_SAMESEMANTICS"]._serialized_end = 2549
-    _globals["_ANALYZEPLANREQUEST_SEMANTICHASH"]._serialized_start = 2551
-    _globals["_ANALYZEPLANREQUEST_SEMANTICHASH"]._serialized_end = 2606
-    _globals["_ANALYZEPLANREQUEST_PERSIST"]._serialized_start = 2609
-    _globals["_ANALYZEPLANREQUEST_PERSIST"]._serialized_end = 2760
-    _globals["_ANALYZEPLANREQUEST_UNPERSIST"]._serialized_start = 2762
-    _globals["_ANALYZEPLANREQUEST_UNPERSIST"]._serialized_end = 2872
-    _globals["_ANALYZEPLANREQUEST_GETSTORAGELEVEL"]._serialized_start = 2874
-    _globals["_ANALYZEPLANREQUEST_GETSTORAGELEVEL"]._serialized_end = 2944
-    _globals["_ANALYZEPLANRESPONSE"]._serialized_start = 3017
-    _globals["_ANALYZEPLANRESPONSE"]._serialized_end = 4759
-    _globals["_ANALYZEPLANRESPONSE_SCHEMA"]._serialized_start = 4178
-    _globals["_ANALYZEPLANRESPONSE_SCHEMA"]._serialized_end = 4235
-    _globals["_ANALYZEPLANRESPONSE_EXPLAIN"]._serialized_start = 4237
-    _globals["_ANALYZEPLANRESPONSE_EXPLAIN"]._serialized_end = 4285
-    _globals["_ANALYZEPLANRESPONSE_TREESTRING"]._serialized_start = 4287
-    _globals["_ANALYZEPLANRESPONSE_TREESTRING"]._serialized_end = 4332
-    _globals["_ANALYZEPLANRESPONSE_ISLOCAL"]._serialized_start = 4334
-    _globals["_ANALYZEPLANRESPONSE_ISLOCAL"]._serialized_end = 4370
-    _globals["_ANALYZEPLANRESPONSE_ISSTREAMING"]._serialized_start = 4372
-    _globals["_ANALYZEPLANRESPONSE_ISSTREAMING"]._serialized_end = 4420
-    _globals["_ANALYZEPLANRESPONSE_INPUTFILES"]._serialized_start = 4422
-    _globals["_ANALYZEPLANRESPONSE_INPUTFILES"]._serialized_end = 4456
-    _globals["_ANALYZEPLANRESPONSE_SPARKVERSION"]._serialized_start = 4458
-    _globals["_ANALYZEPLANRESPONSE_SPARKVERSION"]._serialized_end = 4498
-    _globals["_ANALYZEPLANRESPONSE_DDLPARSE"]._serialized_start = 4500
-    _globals["_ANALYZEPLANRESPONSE_DDLPARSE"]._serialized_end = 4559
-    _globals["_ANALYZEPLANRESPONSE_SAMESEMANTICS"]._serialized_start = 4561
-    _globals["_ANALYZEPLANRESPONSE_SAMESEMANTICS"]._serialized_end = 4600
-    _globals["_ANALYZEPLANRESPONSE_SEMANTICHASH"]._serialized_start = 4602
-    _globals["_ANALYZEPLANRESPONSE_SEMANTICHASH"]._serialized_end = 4640
-    _globals["_ANALYZEPLANRESPONSE_PERSIST"]._serialized_start = 2609
-    _globals["_ANALYZEPLANRESPONSE_PERSIST"]._serialized_end = 2618
-    _globals["_ANALYZEPLANRESPONSE_UNPERSIST"]._serialized_start = 2762
-    _globals["_ANALYZEPLANRESPONSE_UNPERSIST"]._serialized_end = 2773
-    _globals["_ANALYZEPLANRESPONSE_GETSTORAGELEVEL"]._serialized_start = 4666
-    _globals["_ANALYZEPLANRESPONSE_GETSTORAGELEVEL"]._serialized_end = 4749
-    _globals["_EXECUTEPLANREQUEST"]._serialized_start = 4762
-    _globals["_EXECUTEPLANREQUEST"]._serialized_end = 5437
-    _globals["_EXECUTEPLANREQUEST_REQUESTOPTION"]._serialized_start = 5196
-    _globals["_EXECUTEPLANREQUEST_REQUESTOPTION"]._serialized_end = 5361
-    _globals["_EXECUTEPLANRESPONSE"]._serialized_start = 5440
-    _globals["_EXECUTEPLANRESPONSE"]._serialized_end = 8358
-    _globals["_EXECUTEPLANRESPONSE_SQLCOMMANDRESULT"]._serialized_start = 7132
-    _globals["_EXECUTEPLANRESPONSE_SQLCOMMANDRESULT"]._serialized_end = 7203
-    _globals["_EXECUTEPLANRESPONSE_ARROWBATCH"]._serialized_start = 7205
-    _globals["_EXECUTEPLANRESPONSE_ARROWBATCH"]._serialized_end = 7323
-    _globals["_EXECUTEPLANRESPONSE_METRICS"]._serialized_start = 7326
-    _globals["_EXECUTEPLANRESPONSE_METRICS"]._serialized_end = 7843
-    _globals["_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT"]._serialized_start = 7421
-    _globals["_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT"]._serialized_end = 7753
+    _globals["_PLAN"]._serialized_start = 243
+    _globals["_PLAN"]._serialized_end = 359
+    _globals["_USERCONTEXT"]._serialized_start = 361
+    _globals["_USERCONTEXT"]._serialized_end = 483
+    _globals["_ANALYZEPLANREQUEST"]._serialized_start = 486
+    _globals["_ANALYZEPLANREQUEST"]._serialized_end = 3163
+    _globals["_ANALYZEPLANREQUEST_SCHEMA"]._serialized_start = 1848
+    _globals["_ANALYZEPLANREQUEST_SCHEMA"]._serialized_end = 1897
+    _globals["_ANALYZEPLANREQUEST_EXPLAIN"]._serialized_start = 1900
+    _globals["_ANALYZEPLANREQUEST_EXPLAIN"]._serialized_end = 2215
+    _globals["_ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE"]._serialized_start = 2043
+    _globals["_ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE"]._serialized_end = 2215
+    _globals["_ANALYZEPLANREQUEST_TREESTRING"]._serialized_start = 2217
+    _globals["_ANALYZEPLANREQUEST_TREESTRING"]._serialized_end = 2307
+    _globals["_ANALYZEPLANREQUEST_ISLOCAL"]._serialized_start = 2309
+    _globals["_ANALYZEPLANREQUEST_ISLOCAL"]._serialized_end = 2359
+    _globals["_ANALYZEPLANREQUEST_ISSTREAMING"]._serialized_start = 2361
+    _globals["_ANALYZEPLANREQUEST_ISSTREAMING"]._serialized_end = 2415
+    _globals["_ANALYZEPLANREQUEST_INPUTFILES"]._serialized_start = 2417
+    _globals["_ANALYZEPLANREQUEST_INPUTFILES"]._serialized_end = 2470
+    _globals["_ANALYZEPLANREQUEST_SPARKVERSION"]._serialized_start = 2472
+    _globals["_ANALYZEPLANREQUEST_SPARKVERSION"]._serialized_end = 2486
+    _globals["_ANALYZEPLANREQUEST_DDLPARSE"]._serialized_start = 2488
+    _globals["_ANALYZEPLANREQUEST_DDLPARSE"]._serialized_end = 2529
+    _globals["_ANALYZEPLANREQUEST_SAMESEMANTICS"]._serialized_start = 2531
+    _globals["_ANALYZEPLANREQUEST_SAMESEMANTICS"]._serialized_end = 2652
+    _globals["_ANALYZEPLANREQUEST_SEMANTICHASH"]._serialized_start = 2654
+    _globals["_ANALYZEPLANREQUEST_SEMANTICHASH"]._serialized_end = 2709
+    _globals["_ANALYZEPLANREQUEST_PERSIST"]._serialized_start = 2712
+    _globals["_ANALYZEPLANREQUEST_PERSIST"]._serialized_end = 2863
+    _globals["_ANALYZEPLANREQUEST_UNPERSIST"]._serialized_start = 2865
+    _globals["_ANALYZEPLANREQUEST_UNPERSIST"]._serialized_end = 2975
+    _globals["_ANALYZEPLANREQUEST_GETSTORAGELEVEL"]._serialized_start = 2977
+    _globals["_ANALYZEPLANREQUEST_GETSTORAGELEVEL"]._serialized_end = 3047
+    _globals["_ANALYZEPLANREQUEST_JSONTODDL"]._serialized_start = 3049
+    _globals["_ANALYZEPLANREQUEST_JSONTODDL"]._serialized_end = 3093
+    _globals["_ANALYZEPLANRESPONSE"]._serialized_start = 3166
+    _globals["_ANALYZEPLANRESPONSE"]._serialized_end = 5032
+    _globals["_ANALYZEPLANRESPONSE_SCHEMA"]._serialized_start = 4407
+    _globals["_ANALYZEPLANRESPONSE_SCHEMA"]._serialized_end = 4464
+    _globals["_ANALYZEPLANRESPONSE_EXPLAIN"]._serialized_start = 4466
+    _globals["_ANALYZEPLANRESPONSE_EXPLAIN"]._serialized_end = 4514
+    _globals["_ANALYZEPLANRESPONSE_TREESTRING"]._serialized_start = 4516
+    _globals["_ANALYZEPLANRESPONSE_TREESTRING"]._serialized_end = 4561
+    _globals["_ANALYZEPLANRESPONSE_ISLOCAL"]._serialized_start = 4563
+    _globals["_ANALYZEPLANRESPONSE_ISLOCAL"]._serialized_end = 4599
+    _globals["_ANALYZEPLANRESPONSE_ISSTREAMING"]._serialized_start = 4601
+    _globals["_ANALYZEPLANRESPONSE_ISSTREAMING"]._serialized_end = 4649
+    _globals["_ANALYZEPLANRESPONSE_INPUTFILES"]._serialized_start = 4651
+    _globals["_ANALYZEPLANRESPONSE_INPUTFILES"]._serialized_end = 4685
+    _globals["_ANALYZEPLANRESPONSE_SPARKVERSION"]._serialized_start = 4687
+    _globals["_ANALYZEPLANRESPONSE_SPARKVERSION"]._serialized_end = 4727
+    _globals["_ANALYZEPLANRESPONSE_DDLPARSE"]._serialized_start = 4729
+    _globals["_ANALYZEPLANRESPONSE_DDLPARSE"]._serialized_end = 4788
+    _globals["_ANALYZEPLANRESPONSE_SAMESEMANTICS"]._serialized_start = 4790
+    _globals["_ANALYZEPLANRESPONSE_SAMESEMANTICS"]._serialized_end = 4829
+    _globals["_ANALYZEPLANRESPONSE_SEMANTICHASH"]._serialized_start = 4831
+    _globals["_ANALYZEPLANRESPONSE_SEMANTICHASH"]._serialized_end = 4869
+    _globals["_ANALYZEPLANRESPONSE_PERSIST"]._serialized_start = 2712
+    _globals["_ANALYZEPLANRESPONSE_PERSIST"]._serialized_end = 2721
+    _globals["_ANALYZEPLANRESPONSE_UNPERSIST"]._serialized_start = 2865
+    _globals["_ANALYZEPLANRESPONSE_UNPERSIST"]._serialized_end = 2876
+    _globals["_ANALYZEPLANRESPONSE_GETSTORAGELEVEL"]._serialized_start = 4895
+    _globals["_ANALYZEPLANRESPONSE_GETSTORAGELEVEL"]._serialized_end = 4978
+    _globals["_ANALYZEPLANRESPONSE_JSONTODDL"]._serialized_start = 4980
+    _globals["_ANALYZEPLANRESPONSE_JSONTODDL"]._serialized_end = 5022
+    _globals["_EXECUTEPLANREQUEST"]._serialized_start = 5035
+    _globals["_EXECUTEPLANREQUEST"]._serialized_end = 5710
+    _globals["_EXECUTEPLANREQUEST_REQUESTOPTION"]._serialized_start = 5469
+    _globals["_EXECUTEPLANREQUEST_REQUESTOPTION"]._serialized_end = 5634
+    _globals["_EXECUTEPLANRESPONSE"]._serialized_start = 5713
+    _globals["_EXECUTEPLANRESPONSE"]._serialized_end = 8709
+    _globals["_EXECUTEPLANRESPONSE_SQLCOMMANDRESULT"]._serialized_start = 7483
+    _globals["_EXECUTEPLANRESPONSE_SQLCOMMANDRESULT"]._serialized_end = 7554
+    _globals["_EXECUTEPLANRESPONSE_ARROWBATCH"]._serialized_start = 7556
+    _globals["_EXECUTEPLANRESPONSE_ARROWBATCH"]._serialized_end = 7674
+    _globals["_EXECUTEPLANRESPONSE_METRICS"]._serialized_start = 7677
+    _globals["_EXECUTEPLANRESPONSE_METRICS"]._serialized_end = 8194
+    _globals["_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT"]._serialized_start = 7772
+    _globals["_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT"]._serialized_end = 8104
     _globals[
         "_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY"
-    ]._serialized_start = 7630
+    ]._serialized_start = 7981
     _globals[
         "_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY"
-    ]._serialized_end = 7753
-    _globals["_EXECUTEPLANRESPONSE_METRICS_METRICVALUE"]._serialized_start = 7755
-    _globals["_EXECUTEPLANRESPONSE_METRICS_METRICVALUE"]._serialized_end = 7843
-    _globals["_EXECUTEPLANRESPONSE_OBSERVEDMETRICS"]._serialized_start = 7846
-    _globals["_EXECUTEPLANRESPONSE_OBSERVEDMETRICS"]._serialized_end = 7987
-    _globals["_EXECUTEPLANRESPONSE_RESULTCOMPLETE"]._serialized_start = 7989
-    _globals["_EXECUTEPLANRESPONSE_RESULTCOMPLETE"]._serialized_end = 8005
-    _globals["_EXECUTEPLANRESPONSE_EXECUTIONPROGRESS"]._serialized_start = 8008
-    _globals["_EXECUTEPLANRESPONSE_EXECUTIONPROGRESS"]._serialized_end = 8341
-    _globals["_EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO"]._serialized_start = 8164
-    _globals["_EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO"]._serialized_end = 8341
-    _globals["_KEYVALUE"]._serialized_start = 8360
-    _globals["_KEYVALUE"]._serialized_end = 8425
-    _globals["_CONFIGREQUEST"]._serialized_start = 8428
-    _globals["_CONFIGREQUEST"]._serialized_end = 9587
-    _globals["_CONFIGREQUEST_OPERATION"]._serialized_start = 8736
-    _globals["_CONFIGREQUEST_OPERATION"]._serialized_end = 9234
-    _globals["_CONFIGREQUEST_SET"]._serialized_start = 9236
-    _globals["_CONFIGREQUEST_SET"]._serialized_end = 9288
-    _globals["_CONFIGREQUEST_GET"]._serialized_start = 9290
-    _globals["_CONFIGREQUEST_GET"]._serialized_end = 9315
-    _globals["_CONFIGREQUEST_GETWITHDEFAULT"]._serialized_start = 9317
-    _globals["_CONFIGREQUEST_GETWITHDEFAULT"]._serialized_end = 9380
-    _globals["_CONFIGREQUEST_GETOPTION"]._serialized_start = 9382
-    _globals["_CONFIGREQUEST_GETOPTION"]._serialized_end = 9413
-    _globals["_CONFIGREQUEST_GETALL"]._serialized_start = 9415
-    _globals["_CONFIGREQUEST_GETALL"]._serialized_end = 9463
-    _globals["_CONFIGREQUEST_UNSET"]._serialized_start = 9465
-    _globals["_CONFIGREQUEST_UNSET"]._serialized_end = 9492
-    _globals["_CONFIGREQUEST_ISMODIFIABLE"]._serialized_start = 9494
-    _globals["_CONFIGREQUEST_ISMODIFIABLE"]._serialized_end = 9528
-    _globals["_CONFIGRESPONSE"]._serialized_start = 9590
-    _globals["_CONFIGRESPONSE"]._serialized_end = 9765
-    _globals["_ADDARTIFACTSREQUEST"]._serialized_start = 9768
-    _globals["_ADDARTIFACTSREQUEST"]._serialized_end = 10770
-    _globals["_ADDARTIFACTSREQUEST_ARTIFACTCHUNK"]._serialized_start = 10243
-    _globals["_ADDARTIFACTSREQUEST_ARTIFACTCHUNK"]._serialized_end = 10296
-    _globals["_ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT"]._serialized_start = 10298
-    _globals["_ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT"]._serialized_end = 10409
-    _globals["_ADDARTIFACTSREQUEST_BATCH"]._serialized_start = 10411
-    _globals["_ADDARTIFACTSREQUEST_BATCH"]._serialized_end = 10504
-    _globals["_ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT"]._serialized_start = 10507
-    _globals["_ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT"]._serialized_end = 10700
-    _globals["_ADDARTIFACTSRESPONSE"]._serialized_start = 10773
-    _globals["_ADDARTIFACTSRESPONSE"]._serialized_end = 11045
-    _globals["_ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY"]._serialized_start = 10964
-    _globals["_ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY"]._serialized_end = 11045
-    _globals["_ARTIFACTSTATUSESREQUEST"]._serialized_start = 11048
-    _globals["_ARTIFACTSTATUSESREQUEST"]._serialized_end = 11374
-    _globals["_ARTIFACTSTATUSESRESPONSE"]._serialized_start = 11377
-    _globals["_ARTIFACTSTATUSESRESPONSE"]._serialized_end = 11729
-    _globals["_ARTIFACTSTATUSESRESPONSE_STATUSESENTRY"]._serialized_start = 11572
-    _globals["_ARTIFACTSTATUSESRESPONSE_STATUSESENTRY"]._serialized_end = 11687
-    _globals["_ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS"]._serialized_start = 11689
-    _globals["_ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS"]._serialized_end = 11729
-    _globals["_INTERRUPTREQUEST"]._serialized_start = 11732
-    _globals["_INTERRUPTREQUEST"]._serialized_end = 12335
-    _globals["_INTERRUPTREQUEST_INTERRUPTTYPE"]._serialized_start = 12135
-    _globals["_INTERRUPTREQUEST_INTERRUPTTYPE"]._serialized_end = 12263
-    _globals["_INTERRUPTRESPONSE"]._serialized_start = 12338
-    _globals["_INTERRUPTRESPONSE"]._serialized_end = 12482
-    _globals["_REATTACHOPTIONS"]._serialized_start = 12484
-    _globals["_REATTACHOPTIONS"]._serialized_end = 12537
-    _globals["_REATTACHEXECUTEREQUEST"]._serialized_start = 12540
-    _globals["_REATTACHEXECUTEREQUEST"]._serialized_end = 12946
-    _globals["_RELEASEEXECUTEREQUEST"]._serialized_start = 12949
-    _globals["_RELEASEEXECUTEREQUEST"]._serialized_end = 13534
-    _globals["_RELEASEEXECUTEREQUEST_RELEASEALL"]._serialized_start = 13403
-    _globals["_RELEASEEXECUTEREQUEST_RELEASEALL"]._serialized_end = 13415
-    _globals["_RELEASEEXECUTEREQUEST_RELEASEUNTIL"]._serialized_start = 13417
-    _globals["_RELEASEEXECUTEREQUEST_RELEASEUNTIL"]._serialized_end = 13464
-    _globals["_RELEASEEXECUTERESPONSE"]._serialized_start = 13537
-    _globals["_RELEASEEXECUTERESPONSE"]._serialized_end = 13702
-    _globals["_RELEASESESSIONREQUEST"]._serialized_start = 13705
-    _globals["_RELEASESESSIONREQUEST"]._serialized_end = 13876
-    _globals["_RELEASESESSIONRESPONSE"]._serialized_start = 13878
-    _globals["_RELEASESESSIONRESPONSE"]._serialized_end = 13986
-    _globals["_FETCHERRORDETAILSREQUEST"]._serialized_start = 13989
-    _globals["_FETCHERRORDETAILSREQUEST"]._serialized_end = 14321
-    _globals["_FETCHERRORDETAILSRESPONSE"]._serialized_start = 14324
-    _globals["_FETCHERRORDETAILSRESPONSE"]._serialized_end = 15879
-    _globals["_FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT"]._serialized_start = 14553
-    _globals["_FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT"]._serialized_end = 14727
-    _globals["_FETCHERRORDETAILSRESPONSE_QUERYCONTEXT"]._serialized_start = 14730
-    _globals["_FETCHERRORDETAILSRESPONSE_QUERYCONTEXT"]._serialized_end = 15098
-    _globals["_FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE"]._serialized_start = 15061
-    _globals["_FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE"]._serialized_end = 15098
-    _globals["_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE"]._serialized_start = 15101
-    _globals["_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE"]._serialized_end = 15510
+    ]._serialized_end = 8104
+    _globals["_EXECUTEPLANRESPONSE_METRICS_METRICVALUE"]._serialized_start = 8106
+    _globals["_EXECUTEPLANRESPONSE_METRICS_METRICVALUE"]._serialized_end = 8194
+    _globals["_EXECUTEPLANRESPONSE_OBSERVEDMETRICS"]._serialized_start = 8197
+    _globals["_EXECUTEPLANRESPONSE_OBSERVEDMETRICS"]._serialized_end = 8338
+    _globals["_EXECUTEPLANRESPONSE_RESULTCOMPLETE"]._serialized_start = 8340
+    _globals["_EXECUTEPLANRESPONSE_RESULTCOMPLETE"]._serialized_end = 8356
+    _globals["_EXECUTEPLANRESPONSE_EXECUTIONPROGRESS"]._serialized_start = 8359
+    _globals["_EXECUTEPLANRESPONSE_EXECUTIONPROGRESS"]._serialized_end = 8692
+    _globals["_EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO"]._serialized_start = 8515
+    _globals["_EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO"]._serialized_end = 8692
+    _globals["_KEYVALUE"]._serialized_start = 8711
+    _globals["_KEYVALUE"]._serialized_end = 8776
+    _globals["_CONFIGREQUEST"]._serialized_start = 8779
+    _globals["_CONFIGREQUEST"]._serialized_end = 9978
+    _globals["_CONFIGREQUEST_OPERATION"]._serialized_start = 9087
+    _globals["_CONFIGREQUEST_OPERATION"]._serialized_end = 9585
+    _globals["_CONFIGREQUEST_SET"]._serialized_start = 9587
+    _globals["_CONFIGREQUEST_SET"]._serialized_end = 9679
+    _globals["_CONFIGREQUEST_GET"]._serialized_start = 9681
+    _globals["_CONFIGREQUEST_GET"]._serialized_end = 9706
+    _globals["_CONFIGREQUEST_GETWITHDEFAULT"]._serialized_start = 9708
+    _globals["_CONFIGREQUEST_GETWITHDEFAULT"]._serialized_end = 9771
+    _globals["_CONFIGREQUEST_GETOPTION"]._serialized_start = 9773
+    _globals["_CONFIGREQUEST_GETOPTION"]._serialized_end = 9804
+    _globals["_CONFIGREQUEST_GETALL"]._serialized_start = 9806
+    _globals["_CONFIGREQUEST_GETALL"]._serialized_end = 9854
+    _globals["_CONFIGREQUEST_UNSET"]._serialized_start = 9856
+    _globals["_CONFIGREQUEST_UNSET"]._serialized_end = 9883
+    _globals["_CONFIGREQUEST_ISMODIFIABLE"]._serialized_start = 9885
+    _globals["_CONFIGREQUEST_ISMODIFIABLE"]._serialized_end = 9919
+    _globals["_CONFIGRESPONSE"]._serialized_start = 9981
+    _globals["_CONFIGRESPONSE"]._serialized_end = 10156
+    _globals["_ADDARTIFACTSREQUEST"]._serialized_start = 10159
+    _globals["_ADDARTIFACTSREQUEST"]._serialized_end = 11161
+    _globals["_ADDARTIFACTSREQUEST_ARTIFACTCHUNK"]._serialized_start = 10634
+    _globals["_ADDARTIFACTSREQUEST_ARTIFACTCHUNK"]._serialized_end = 10687
+    _globals["_ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT"]._serialized_start = 10689
+    _globals["_ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT"]._serialized_end = 10800
+    _globals["_ADDARTIFACTSREQUEST_BATCH"]._serialized_start = 10802
+    _globals["_ADDARTIFACTSREQUEST_BATCH"]._serialized_end = 10895
+    _globals["_ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT"]._serialized_start = 10898
+    _globals["_ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT"]._serialized_end = 11091
+    _globals["_ADDARTIFACTSRESPONSE"]._serialized_start = 11164
+    _globals["_ADDARTIFACTSRESPONSE"]._serialized_end = 11436
+    _globals["_ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY"]._serialized_start = 11355
+    _globals["_ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY"]._serialized_end = 11436
+    _globals["_ARTIFACTSTATUSESREQUEST"]._serialized_start = 11439
+    _globals["_ARTIFACTSTATUSESREQUEST"]._serialized_end = 11765
+    _globals["_ARTIFACTSTATUSESRESPONSE"]._serialized_start = 11768
+    _globals["_ARTIFACTSTATUSESRESPONSE"]._serialized_end = 12120
+    _globals["_ARTIFACTSTATUSESRESPONSE_STATUSESENTRY"]._serialized_start = 11963
+    _globals["_ARTIFACTSTATUSESRESPONSE_STATUSESENTRY"]._serialized_end = 12078
+    _globals["_ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS"]._serialized_start = 12080
+    _globals["_ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS"]._serialized_end = 12120
+    _globals["_INTERRUPTREQUEST"]._serialized_start = 12123
+    _globals["_INTERRUPTREQUEST"]._serialized_end = 12726
+    _globals["_INTERRUPTREQUEST_INTERRUPTTYPE"]._serialized_start = 12526
+    _globals["_INTERRUPTREQUEST_INTERRUPTTYPE"]._serialized_end = 12654
+    _globals["_INTERRUPTRESPONSE"]._serialized_start = 12729
+    _globals["_INTERRUPTRESPONSE"]._serialized_end = 12873
+    _globals["_REATTACHOPTIONS"]._serialized_start = 12875
+    _globals["_REATTACHOPTIONS"]._serialized_end = 12928
+    _globals["_REATTACHEXECUTEREQUEST"]._serialized_start = 12931
+    _globals["_REATTACHEXECUTEREQUEST"]._serialized_end = 13337
+    _globals["_RELEASEEXECUTEREQUEST"]._serialized_start = 13340
+    _globals["_RELEASEEXECUTEREQUEST"]._serialized_end = 13925
+    _globals["_RELEASEEXECUTEREQUEST_RELEASEALL"]._serialized_start = 13794
+    _globals["_RELEASEEXECUTEREQUEST_RELEASEALL"]._serialized_end = 13806
+    _globals["_RELEASEEXECUTEREQUEST_RELEASEUNTIL"]._serialized_start = 13808
+    _globals["_RELEASEEXECUTEREQUEST_RELEASEUNTIL"]._serialized_end = 13855
+    _globals["_RELEASEEXECUTERESPONSE"]._serialized_start = 13928
+    _globals["_RELEASEEXECUTERESPONSE"]._serialized_end = 14093
+    _globals["_RELEASESESSIONREQUEST"]._serialized_start = 14096
+    _globals["_RELEASESESSIONREQUEST"]._serialized_end = 14308
+    _globals["_RELEASESESSIONRESPONSE"]._serialized_start = 14310
+    _globals["_RELEASESESSIONRESPONSE"]._serialized_end = 14418
+    _globals["_FETCHERRORDETAILSREQUEST"]._serialized_start = 14421
+    _globals["_FETCHERRORDETAILSREQUEST"]._serialized_end = 14753
+    _globals["_FETCHERRORDETAILSRESPONSE"]._serialized_start = 14756
+    _globals["_FETCHERRORDETAILSRESPONSE"]._serialized_end = 16311
+    _globals["_FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT"]._serialized_start = 14985
+    _globals["_FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT"]._serialized_end = 15159
+    _globals["_FETCHERRORDETAILSRESPONSE_QUERYCONTEXT"]._serialized_start = 15162
+    _globals["_FETCHERRORDETAILSRESPONSE_QUERYCONTEXT"]._serialized_end = 15530
+    _globals["_FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE"]._serialized_start = 15493
+    _globals["_FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE"]._serialized_end = 15530
+    _globals["_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE"]._serialized_start = 15533
+    _globals["_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE"]._serialized_end = 15942
     _globals[
         "_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY"
-    ]._serialized_start = 15412
+    ]._serialized_start = 15844
     _globals[
         "_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY"
-    ]._serialized_end = 15480
-    _globals["_FETCHERRORDETAILSRESPONSE_ERROR"]._serialized_start = 15513
-    _globals["_FETCHERRORDETAILSRESPONSE_ERROR"]._serialized_end = 15860
-    _globals["_CHECKPOINTCOMMANDRESULT"]._serialized_start = 15881
-    _globals["_CHECKPOINTCOMMANDRESULT"]._serialized_end = 15971
-    _globals["_SPARKCONNECTSERVICE"]._serialized_start = 15974
-    _globals["_SPARKCONNECTSERVICE"]._serialized_end = 16920
+    ]._serialized_end = 15912
+    _globals["_FETCHERRORDETAILSRESPONSE_ERROR"]._serialized_start = 15945
+    _globals["_FETCHERRORDETAILSRESPONSE_ERROR"]._serialized_end = 16292
+    _globals["_CHECKPOINTCOMMANDRESULT"]._serialized_start = 16313
+    _globals["_CHECKPOINTCOMMANDRESULT"]._serialized_end = 16403
+    _globals["_SPARKCONNECTSERVICE"]._serialized_start = 16406
+    _globals["_SPARKCONNECTSERVICE"]._serialized_end = 17352
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi
index 5db25569828b7..738339fa968ec 100644
--- a/python/pyspark/sql/connect/proto/base_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/base_pb2.pyi
@@ -43,6 +43,7 @@ import google.protobuf.message
 import pyspark.sql.connect.proto.commands_pb2
 import pyspark.sql.connect.proto.common_pb2
 import pyspark.sql.connect.proto.expressions_pb2
+import pyspark.sql.connect.proto.ml_pb2
 import pyspark.sql.connect.proto.relations_pb2
 import pyspark.sql.connect.proto.types_pb2
 import sys
@@ -477,6 +478,21 @@ class AnalyzePlanRequest(google.protobuf.message.Message):
             self, field_name: typing_extensions.Literal["relation", b"relation"]
         ) -> None: ...
 
+    class JsonToDDL(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        JSON_STRING_FIELD_NUMBER: builtins.int
+        json_string: builtins.str
+        """(Required) The JSON formatted string to be converted to DDL."""
+        def __init__(
+            self,
+            *,
+            json_string: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["json_string", b"json_string"]
+        ) -> None: ...
+
     SESSION_ID_FIELD_NUMBER: builtins.int
     CLIENT_OBSERVED_SERVER_SIDE_SESSION_ID_FIELD_NUMBER: builtins.int
     USER_CONTEXT_FIELD_NUMBER: builtins.int
@@ -494,6 +510,7 @@ class AnalyzePlanRequest(google.protobuf.message.Message):
     PERSIST_FIELD_NUMBER: builtins.int
     UNPERSIST_FIELD_NUMBER: builtins.int
     GET_STORAGE_LEVEL_FIELD_NUMBER: builtins.int
+    JSON_TO_DDL_FIELD_NUMBER: builtins.int
     session_id: builtins.str
     """(Required)
 
@@ -542,6 +559,8 @@ class AnalyzePlanRequest(google.protobuf.message.Message):
     def unpersist(self) -> global___AnalyzePlanRequest.Unpersist: ...
     @property
     def get_storage_level(self) -> global___AnalyzePlanRequest.GetStorageLevel: ...
+    @property
+    def json_to_ddl(self) -> global___AnalyzePlanRequest.JsonToDDL: ...
     def __init__(
         self,
         *,
@@ -562,6 +581,7 @@ class AnalyzePlanRequest(google.protobuf.message.Message):
         persist: global___AnalyzePlanRequest.Persist | None = ...,
         unpersist: global___AnalyzePlanRequest.Unpersist | None = ...,
         get_storage_level: global___AnalyzePlanRequest.GetStorageLevel | None = ...,
+        json_to_ddl: global___AnalyzePlanRequest.JsonToDDL | None = ...,
     ) -> None: ...
     def HasField(
         self,
@@ -588,6 +608,8 @@ class AnalyzePlanRequest(google.protobuf.message.Message):
             b"is_local",
             "is_streaming",
             b"is_streaming",
+            "json_to_ddl",
+            b"json_to_ddl",
             "persist",
             b"persist",
             "same_semantics",
@@ -631,6 +653,8 @@ class AnalyzePlanRequest(google.protobuf.message.Message):
             b"is_local",
             "is_streaming",
             b"is_streaming",
+            "json_to_ddl",
+            b"json_to_ddl",
             "persist",
             b"persist",
             "same_semantics",
@@ -680,6 +704,7 @@ class AnalyzePlanRequest(google.protobuf.message.Message):
             "persist",
             "unpersist",
             "get_storage_level",
+            "json_to_ddl",
         ]
         | None
     ): ...
@@ -877,6 +902,20 @@ class AnalyzePlanResponse(google.protobuf.message.Message):
             self, field_name: typing_extensions.Literal["storage_level", b"storage_level"]
         ) -> None: ...
 
+    class JsonToDDL(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        DDL_STRING_FIELD_NUMBER: builtins.int
+        ddl_string: builtins.str
+        def __init__(
+            self,
+            *,
+            ddl_string: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["ddl_string", b"ddl_string"]
+        ) -> None: ...
+
     SESSION_ID_FIELD_NUMBER: builtins.int
     SERVER_SIDE_SESSION_ID_FIELD_NUMBER: builtins.int
     SCHEMA_FIELD_NUMBER: builtins.int
@@ -892,6 +931,7 @@ class AnalyzePlanResponse(google.protobuf.message.Message):
     PERSIST_FIELD_NUMBER: builtins.int
     UNPERSIST_FIELD_NUMBER: builtins.int
     GET_STORAGE_LEVEL_FIELD_NUMBER: builtins.int
+    JSON_TO_DDL_FIELD_NUMBER: builtins.int
     session_id: builtins.str
     server_side_session_id: builtins.str
     """Server-side generated idempotency key that the client can use to assert that the server side
@@ -923,6 +963,8 @@ class AnalyzePlanResponse(google.protobuf.message.Message):
     def unpersist(self) -> global___AnalyzePlanResponse.Unpersist: ...
     @property
     def get_storage_level(self) -> global___AnalyzePlanResponse.GetStorageLevel: ...
+    @property
+    def json_to_ddl(self) -> global___AnalyzePlanResponse.JsonToDDL: ...
     def __init__(
         self,
         *,
@@ -941,6 +983,7 @@ class AnalyzePlanResponse(google.protobuf.message.Message):
         persist: global___AnalyzePlanResponse.Persist | None = ...,
         unpersist: global___AnalyzePlanResponse.Unpersist | None = ...,
         get_storage_level: global___AnalyzePlanResponse.GetStorageLevel | None = ...,
+        json_to_ddl: global___AnalyzePlanResponse.JsonToDDL | None = ...,
     ) -> None: ...
     def HasField(
         self,
@@ -957,6 +1000,8 @@ class AnalyzePlanResponse(google.protobuf.message.Message):
             b"is_local",
             "is_streaming",
             b"is_streaming",
+            "json_to_ddl",
+            b"json_to_ddl",
             "persist",
             b"persist",
             "result",
@@ -990,6 +1035,8 @@ class AnalyzePlanResponse(google.protobuf.message.Message):
             b"is_local",
             "is_streaming",
             b"is_streaming",
+            "json_to_ddl",
+            b"json_to_ddl",
             "persist",
             b"persist",
             "result",
@@ -1029,6 +1076,7 @@ class AnalyzePlanResponse(google.protobuf.message.Message):
             "persist",
             "unpersist",
             "get_storage_level",
+            "json_to_ddl",
         ]
         | None
     ): ...
@@ -1534,6 +1582,7 @@ class ExecutePlanResponse(google.protobuf.message.Message):
     CREATE_RESOURCE_PROFILE_COMMAND_RESULT_FIELD_NUMBER: builtins.int
     EXECUTION_PROGRESS_FIELD_NUMBER: builtins.int
     CHECKPOINT_COMMAND_RESULT_FIELD_NUMBER: builtins.int
+    ML_COMMAND_RESULT_FIELD_NUMBER: builtins.int
     EXTENSION_FIELD_NUMBER: builtins.int
     METRICS_FIELD_NUMBER: builtins.int
     OBSERVED_METRICS_FIELD_NUMBER: builtins.int
@@ -1598,6 +1647,9 @@ class ExecutePlanResponse(google.protobuf.message.Message):
     def checkpoint_command_result(self) -> global___CheckpointCommandResult:
         """Response for command that checkpoints a DataFrame."""
     @property
+    def ml_command_result(self) -> pyspark.sql.connect.proto.ml_pb2.MlCommandResult:
+        """ML command response"""
+    @property
     def extension(self) -> google.protobuf.any_pb2.Any:
         """Support arbitrary result objects."""
     @property
@@ -1639,6 +1691,7 @@ class ExecutePlanResponse(google.protobuf.message.Message):
         | None = ...,
         execution_progress: global___ExecutePlanResponse.ExecutionProgress | None = ...,
         checkpoint_command_result: global___CheckpointCommandResult | None = ...,
+        ml_command_result: pyspark.sql.connect.proto.ml_pb2.MlCommandResult | None = ...,
         extension: google.protobuf.any_pb2.Any | None = ...,
         metrics: global___ExecutePlanResponse.Metrics | None = ...,
         observed_metrics: collections.abc.Iterable[global___ExecutePlanResponse.ObservedMetrics]
@@ -1662,6 +1715,8 @@ class ExecutePlanResponse(google.protobuf.message.Message):
             b"get_resources_command_result",
             "metrics",
             b"metrics",
+            "ml_command_result",
+            b"ml_command_result",
             "response_type",
             b"response_type",
             "result_complete",
@@ -1697,6 +1752,8 @@ class ExecutePlanResponse(google.protobuf.message.Message):
             b"get_resources_command_result",
             "metrics",
             b"metrics",
+            "ml_command_result",
+            b"ml_command_result",
             "observed_metrics",
             b"observed_metrics",
             "operation_id",
@@ -1740,6 +1797,7 @@ class ExecutePlanResponse(google.protobuf.message.Message):
             "create_resource_profile_command_result",
             "execution_progress",
             "checkpoint_command_result",
+            "ml_command_result",
             "extension",
         ]
         | None
@@ -1874,17 +1932,32 @@ class ConfigRequest(google.protobuf.message.Message):
         DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
         PAIRS_FIELD_NUMBER: builtins.int
+        SILENT_FIELD_NUMBER: builtins.int
         @property
         def pairs(
             self,
         ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___KeyValue]:
             """(Required) The config key-value pairs to set."""
+        silent: builtins.bool
+        """(Optional) Whether to ignore failures."""
         def __init__(
             self,
             *,
             pairs: collections.abc.Iterable[global___KeyValue] | None = ...,
+            silent: builtins.bool | None = ...,
         ) -> None: ...
-        def ClearField(self, field_name: typing_extensions.Literal["pairs", b"pairs"]) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["_silent", b"_silent", "silent", b"silent"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_silent", b"_silent", "pairs", b"pairs", "silent", b"silent"
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_silent", b"_silent"]
+        ) -> typing_extensions.Literal["silent"] | None: ...
 
     class Get(google.protobuf.message.Message):
         DESCRIPTOR: google.protobuf.descriptor.Descriptor
@@ -3169,6 +3242,7 @@ class ReleaseSessionRequest(google.protobuf.message.Message):
     SESSION_ID_FIELD_NUMBER: builtins.int
     USER_CONTEXT_FIELD_NUMBER: builtins.int
     CLIENT_TYPE_FIELD_NUMBER: builtins.int
+    ALLOW_RECONNECT_FIELD_NUMBER: builtins.int
     session_id: builtins.str
     """(Required)
 
@@ -3187,12 +3261,27 @@ class ReleaseSessionRequest(google.protobuf.message.Message):
     can be used for language or version specific information and is only intended for
     logging purposes and will not be interpreted by the server.
     """
+    allow_reconnect: builtins.bool
+    """Signals the server to allow the client to reconnect to the session after it is released.
+
+    By default, the server tombstones the session upon release, preventing reconnections and
+    fully cleaning the session state.
+
+    If this flag is set to true, the server may permit the client to reconnect to the session
+    post-release, even if the session state has been cleaned. This can result in missing state,
+    such as Temporary Views, Temporary UDFs, or the Current Catalog, in the reconnected session.
+
+    Use this option sparingly and only when the client fully understands the implications of
+    reconnecting to a released session. The client must ensure that any queries executed do not
+    rely on the session state prior to its release.
+    """
     def __init__(
         self,
         *,
         session_id: builtins.str = ...,
         user_context: global___UserContext | None = ...,
         client_type: builtins.str | None = ...,
+        allow_reconnect: builtins.bool = ...,
     ) -> None: ...
     def HasField(
         self,
@@ -3210,6 +3299,8 @@ class ReleaseSessionRequest(google.protobuf.message.Message):
         field_name: typing_extensions.Literal[
             "_client_type",
             b"_client_type",
+            "allow_reconnect",
+            b"allow_reconnect",
             "client_type",
             b"client_type",
             "session_id",
diff --git a/python/pyspark/sql/connect/proto/commands_pb2.py b/python/pyspark/sql/connect/proto/commands_pb2.py
index a7fcc1d7e0908..57a770f0226d9 100644
--- a/python/pyspark/sql/connect/proto/commands_pb2.py
+++ b/python/pyspark/sql/connect/proto/commands_pb2.py
@@ -38,10 +38,11 @@
 from pyspark.sql.connect.proto import common_pb2 as spark_dot_connect_dot_common__pb2
 from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_expressions__pb2
 from pyspark.sql.connect.proto import relations_pb2 as spark_dot_connect_dot_relations__pb2
+from pyspark.sql.connect.proto import ml_pb2 as spark_dot_connect_dot_ml__pb2
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1cspark/connect/commands.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto"\x90\r\n\x07\x43ommand\x12]\n\x11register_function\x18\x01 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x10registerFunction\x12H\n\x0fwrite_operation\x18\x02 \x01(\x0b\x32\x1d.spark.connect.WriteOperationH\x00R\x0ewriteOperation\x12_\n\x15\x63reate_dataframe_view\x18\x03 \x01(\x0b\x32).spark.connect.CreateDataFrameViewCommandH\x00R\x13\x63reateDataframeView\x12O\n\x12write_operation_v2\x18\x04 \x01(\x0b\x32\x1f.spark.connect.WriteOperationV2H\x00R\x10writeOperationV2\x12<\n\x0bsql_command\x18\x05 \x01(\x0b\x32\x19.spark.connect.SqlCommandH\x00R\nsqlCommand\x12k\n\x1cwrite_stream_operation_start\x18\x06 \x01(\x0b\x32(.spark.connect.WriteStreamOperationStartH\x00R\x19writeStreamOperationStart\x12^\n\x17streaming_query_command\x18\x07 \x01(\x0b\x32$.spark.connect.StreamingQueryCommandH\x00R\x15streamingQueryCommand\x12X\n\x15get_resources_command\x18\x08 \x01(\x0b\x32".spark.connect.GetResourcesCommandH\x00R\x13getResourcesCommand\x12t\n\x1fstreaming_query_manager_command\x18\t \x01(\x0b\x32+.spark.connect.StreamingQueryManagerCommandH\x00R\x1cstreamingQueryManagerCommand\x12m\n\x17register_table_function\x18\n \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R\x15registerTableFunction\x12\x81\x01\n$streaming_query_listener_bus_command\x18\x0b \x01(\x0b\x32/.spark.connect.StreamingQueryListenerBusCommandH\x00R streamingQueryListenerBusCommand\x12\x64\n\x14register_data_source\x18\x0c \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R\x12registerDataSource\x12t\n\x1f\x63reate_resource_profile_command\x18\r \x01(\x0b\x32+.spark.connect.CreateResourceProfileCommandH\x00R\x1c\x63reateResourceProfileCommand\x12Q\n\x12\x63heckpoint_command\x18\x0e \x01(\x0b\x32 .spark.connect.CheckpointCommandH\x00R\x11\x63heckpointCommand\x12\x84\x01\n%remove_cached_remote_relation_command\x18\x0f \x01(\x0b\x32\x30.spark.connect.RemoveCachedRemoteRelationCommandH\x00R!removeCachedRemoteRelationCommand\x12_\n\x18merge_into_table_command\x18\x10 \x01(\x0b\x32$.spark.connect.MergeIntoTableCommandH\x00R\x15mergeIntoTableCommand\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x0e\n\x0c\x63ommand_type"\xaa\x04\n\nSqlCommand\x12\x14\n\x03sql\x18\x01 \x01(\tB\x02\x18\x01R\x03sql\x12;\n\x04\x61rgs\x18\x02 \x03(\x0b\x32#.spark.connect.SqlCommand.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12Z\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32-.spark.connect.SqlCommand.NamedArgumentsEntryB\x02\x18\x01R\x0enamedArguments\x12\x42\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionB\x02\x18\x01R\x0cposArguments\x12-\n\x05input\x18\x06 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"\x96\x01\n\x1a\x43reateDataFrameViewCommand\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x1b\n\tis_global\x18\x03 \x01(\x08R\x08isGlobal\x12\x18\n\x07replace\x18\x04 \x01(\x08R\x07replace"\xca\x08\n\x0eWriteOperation\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1b\n\x06source\x18\x02 \x01(\tH\x01R\x06source\x88\x01\x01\x12\x14\n\x04path\x18\x03 \x01(\tH\x00R\x04path\x12?\n\x05table\x18\x04 \x01(\x0b\x32\'.spark.connect.WriteOperation.SaveTableH\x00R\x05table\x12:\n\x04mode\x18\x05 \x01(\x0e\x32&.spark.connect.WriteOperation.SaveModeR\x04mode\x12*\n\x11sort_column_names\x18\x06 \x03(\tR\x0fsortColumnNames\x12\x31\n\x14partitioning_columns\x18\x07 \x03(\tR\x13partitioningColumns\x12\x43\n\tbucket_by\x18\x08 \x01(\x0b\x32&.spark.connect.WriteOperation.BucketByR\x08\x62ucketBy\x12\x44\n\x07options\x18\t \x03(\x0b\x32*.spark.connect.WriteOperation.OptionsEntryR\x07options\x12-\n\x12\x63lustering_columns\x18\n \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x82\x02\n\tSaveTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12X\n\x0bsave_method\x18\x02 \x01(\x0e\x32\x37.spark.connect.WriteOperation.SaveTable.TableSaveMethodR\nsaveMethod"|\n\x0fTableSaveMethod\x12!\n\x1dTABLE_SAVE_METHOD_UNSPECIFIED\x10\x00\x12#\n\x1fTABLE_SAVE_METHOD_SAVE_AS_TABLE\x10\x01\x12!\n\x1dTABLE_SAVE_METHOD_INSERT_INTO\x10\x02\x1a[\n\x08\x42ucketBy\x12.\n\x13\x62ucket_column_names\x18\x01 \x03(\tR\x11\x62ucketColumnNames\x12\x1f\n\x0bnum_buckets\x18\x02 \x01(\x05R\nnumBuckets"\x89\x01\n\x08SaveMode\x12\x19\n\x15SAVE_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10SAVE_MODE_APPEND\x10\x01\x12\x17\n\x13SAVE_MODE_OVERWRITE\x10\x02\x12\x1d\n\x19SAVE_MODE_ERROR_IF_EXISTS\x10\x03\x12\x14\n\x10SAVE_MODE_IGNORE\x10\x04\x42\x0b\n\tsave_typeB\t\n\x07_source"\xdc\x06\n\x10WriteOperationV2\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\ntable_name\x18\x02 \x01(\tR\ttableName\x12\x1f\n\x08provider\x18\x03 \x01(\tH\x00R\x08provider\x88\x01\x01\x12L\n\x14partitioning_columns\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13partitioningColumns\x12\x46\n\x07options\x18\x05 \x03(\x0b\x32,.spark.connect.WriteOperationV2.OptionsEntryR\x07options\x12_\n\x10table_properties\x18\x06 \x03(\x0b\x32\x34.spark.connect.WriteOperationV2.TablePropertiesEntryR\x0ftableProperties\x12\x38\n\x04mode\x18\x07 \x01(\x0e\x32$.spark.connect.WriteOperationV2.ModeR\x04mode\x12J\n\x13overwrite_condition\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x12overwriteCondition\x12-\n\x12\x63lustering_columns\x18\t \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x42\n\x14TablePropertiesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\x9f\x01\n\x04Mode\x12\x14\n\x10MODE_UNSPECIFIED\x10\x00\x12\x0f\n\x0bMODE_CREATE\x10\x01\x12\x12\n\x0eMODE_OVERWRITE\x10\x02\x12\x1d\n\x19MODE_OVERWRITE_PARTITIONS\x10\x03\x12\x0f\n\x0bMODE_APPEND\x10\x04\x12\x10\n\x0cMODE_REPLACE\x10\x05\x12\x1a\n\x16MODE_CREATE_OR_REPLACE\x10\x06\x42\x0b\n\t_provider"\xd8\x06\n\x19WriteStreamOperationStart\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06\x66ormat\x18\x02 \x01(\tR\x06\x66ormat\x12O\n\x07options\x18\x03 \x03(\x0b\x32\x35.spark.connect.WriteStreamOperationStart.OptionsEntryR\x07options\x12:\n\x19partitioning_column_names\x18\x04 \x03(\tR\x17partitioningColumnNames\x12:\n\x18processing_time_interval\x18\x05 \x01(\tH\x00R\x16processingTimeInterval\x12%\n\ravailable_now\x18\x06 \x01(\x08H\x00R\x0c\x61vailableNow\x12\x14\n\x04once\x18\x07 \x01(\x08H\x00R\x04once\x12\x46\n\x1e\x63ontinuous_checkpoint_interval\x18\x08 \x01(\tH\x00R\x1c\x63ontinuousCheckpointInterval\x12\x1f\n\x0boutput_mode\x18\t \x01(\tR\noutputMode\x12\x1d\n\nquery_name\x18\n \x01(\tR\tqueryName\x12\x14\n\x04path\x18\x0b \x01(\tH\x01R\x04path\x12\x1f\n\ntable_name\x18\x0c \x01(\tH\x01R\ttableName\x12N\n\x0e\x66oreach_writer\x18\r \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\rforeachWriter\x12L\n\rforeach_batch\x18\x0e \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\x0c\x66oreachBatch\x12\x36\n\x17\x63lustering_column_names\x18\x0f \x03(\tR\x15\x63lusteringColumnNames\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07triggerB\x12\n\x10sink_destination"\xb3\x01\n\x18StreamingForeachFunction\x12\x43\n\x0fpython_function\x18\x01 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x0epythonFunction\x12\x46\n\x0escala_function\x18\x02 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\rscalaFunctionB\n\n\x08\x66unction"\xd4\x01\n\x1fWriteStreamOperationStartResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12<\n\x18query_started_event_json\x18\x03 \x01(\tH\x00R\x15queryStartedEventJson\x88\x01\x01\x42\x1b\n\x19_query_started_event_json"A\n\x18StreamingQueryInstanceId\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x15\n\x06run_id\x18\x02 \x01(\tR\x05runId"\xf8\x04\n\x15StreamingQueryCommand\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x18\n\x06status\x18\x02 \x01(\x08H\x00R\x06status\x12%\n\rlast_progress\x18\x03 \x01(\x08H\x00R\x0clastProgress\x12)\n\x0frecent_progress\x18\x04 \x01(\x08H\x00R\x0erecentProgress\x12\x14\n\x04stop\x18\x05 \x01(\x08H\x00R\x04stop\x12\x34\n\x15process_all_available\x18\x06 \x01(\x08H\x00R\x13processAllAvailable\x12O\n\x07\x65xplain\x18\x07 \x01(\x0b\x32\x33.spark.connect.StreamingQueryCommand.ExplainCommandH\x00R\x07\x65xplain\x12\x1e\n\texception\x18\x08 \x01(\x08H\x00R\texception\x12k\n\x11\x61wait_termination\x18\t \x01(\x0b\x32<.spark.connect.StreamingQueryCommand.AwaitTerminationCommandH\x00R\x10\x61waitTermination\x1a,\n\x0e\x45xplainCommand\x12\x1a\n\x08\x65xtended\x18\x01 \x01(\x08R\x08\x65xtended\x1aL\n\x17\x41waitTerminationCommand\x12"\n\ntimeout_ms\x18\x02 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_msB\t\n\x07\x63ommand"\xf5\x08\n\x1bStreamingQueryCommandResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12Q\n\x06status\x18\x02 \x01(\x0b\x32\x37.spark.connect.StreamingQueryCommandResult.StatusResultH\x00R\x06status\x12j\n\x0frecent_progress\x18\x03 \x01(\x0b\x32?.spark.connect.StreamingQueryCommandResult.RecentProgressResultH\x00R\x0erecentProgress\x12T\n\x07\x65xplain\x18\x04 \x01(\x0b\x32\x38.spark.connect.StreamingQueryCommandResult.ExplainResultH\x00R\x07\x65xplain\x12Z\n\texception\x18\x05 \x01(\x0b\x32:.spark.connect.StreamingQueryCommandResult.ExceptionResultH\x00R\texception\x12p\n\x11\x61wait_termination\x18\x06 \x01(\x0b\x32\x41.spark.connect.StreamingQueryCommandResult.AwaitTerminationResultH\x00R\x10\x61waitTermination\x1a\xaa\x01\n\x0cStatusResult\x12%\n\x0estatus_message\x18\x01 \x01(\tR\rstatusMessage\x12*\n\x11is_data_available\x18\x02 \x01(\x08R\x0fisDataAvailable\x12*\n\x11is_trigger_active\x18\x03 \x01(\x08R\x0fisTriggerActive\x12\x1b\n\tis_active\x18\x04 \x01(\x08R\x08isActive\x1aH\n\x14RecentProgressResult\x12\x30\n\x14recent_progress_json\x18\x05 \x03(\tR\x12recentProgressJson\x1a\'\n\rExplainResult\x12\x16\n\x06result\x18\x01 \x01(\tR\x06result\x1a\xc5\x01\n\x0f\x45xceptionResult\x12\x30\n\x11\x65xception_message\x18\x01 \x01(\tH\x00R\x10\x65xceptionMessage\x88\x01\x01\x12$\n\x0b\x65rror_class\x18\x02 \x01(\tH\x01R\nerrorClass\x88\x01\x01\x12$\n\x0bstack_trace\x18\x03 \x01(\tH\x02R\nstackTrace\x88\x01\x01\x42\x14\n\x12_exception_messageB\x0e\n\x0c_error_classB\x0e\n\x0c_stack_trace\x1a\x38\n\x16\x41waitTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminatedB\r\n\x0bresult_type"\xbd\x06\n\x1cStreamingQueryManagerCommand\x12\x18\n\x06\x61\x63tive\x18\x01 \x01(\x08H\x00R\x06\x61\x63tive\x12\x1d\n\tget_query\x18\x02 \x01(\tH\x00R\x08getQuery\x12|\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32\x46.spark.connect.StreamingQueryManagerCommand.AwaitAnyTerminationCommandH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12n\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0b\x61\x64\x64Listener\x12t\n\x0fremove_listener\x18\x06 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0eremoveListener\x12\'\n\x0elist_listeners\x18\x07 \x01(\x08H\x00R\rlistListeners\x1aO\n\x1a\x41waitAnyTerminationCommand\x12"\n\ntimeout_ms\x18\x01 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_ms\x1a\xcd\x01\n\x1dStreamingQueryListenerCommand\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x12U\n\x17python_listener_payload\x18\x02 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x15pythonListenerPayload\x88\x01\x01\x12\x0e\n\x02id\x18\x03 \x01(\tR\x02idB\x1a\n\x18_python_listener_payloadB\t\n\x07\x63ommand"\xb4\x08\n"StreamingQueryManagerCommandResult\x12X\n\x06\x61\x63tive\x18\x01 \x01(\x0b\x32>.spark.connect.StreamingQueryManagerCommandResult.ActiveResultH\x00R\x06\x61\x63tive\x12`\n\x05query\x18\x02 \x01(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceH\x00R\x05query\x12\x81\x01\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32K.spark.connect.StreamingQueryManagerCommandResult.AwaitAnyTerminationResultH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12#\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x08H\x00R\x0b\x61\x64\x64Listener\x12)\n\x0fremove_listener\x18\x06 \x01(\x08H\x00R\x0eremoveListener\x12{\n\x0elist_listeners\x18\x07 \x01(\x0b\x32R.spark.connect.StreamingQueryManagerCommandResult.ListStreamingQueryListenerResultH\x00R\rlistListeners\x1a\x7f\n\x0c\x41\x63tiveResult\x12o\n\x0e\x61\x63tive_queries\x18\x01 \x03(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceR\ractiveQueries\x1as\n\x16StreamingQueryInstance\x12\x37\n\x02id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x02id\x12\x17\n\x04name\x18\x02 \x01(\tH\x00R\x04name\x88\x01\x01\x42\x07\n\x05_name\x1a;\n\x19\x41waitAnyTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminated\x1aK\n\x1eStreamingQueryListenerInstance\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x1a\x45\n ListStreamingQueryListenerResult\x12!\n\x0clistener_ids\x18\x01 \x03(\tR\x0blistenerIdsB\r\n\x0bresult_type"\xad\x01\n StreamingQueryListenerBusCommand\x12;\n\x19\x61\x64\x64_listener_bus_listener\x18\x01 \x01(\x08H\x00R\x16\x61\x64\x64ListenerBusListener\x12\x41\n\x1cremove_listener_bus_listener\x18\x02 \x01(\x08H\x00R\x19removeListenerBusListenerB\t\n\x07\x63ommand"\x83\x01\n\x1bStreamingQueryListenerEvent\x12\x1d\n\nevent_json\x18\x01 \x01(\tR\teventJson\x12\x45\n\nevent_type\x18\x02 \x01(\x0e\x32&.spark.connect.StreamingQueryEventTypeR\teventType"\xcc\x01\n"StreamingQueryListenerEventsResult\x12\x42\n\x06\x65vents\x18\x01 \x03(\x0b\x32*.spark.connect.StreamingQueryListenerEventR\x06\x65vents\x12\x42\n\x1blistener_bus_listener_added\x18\x02 \x01(\x08H\x00R\x18listenerBusListenerAdded\x88\x01\x01\x42\x1e\n\x1c_listener_bus_listener_added"\x15\n\x13GetResourcesCommand"\xd4\x01\n\x19GetResourcesCommandResult\x12U\n\tresources\x18\x01 \x03(\x0b\x32\x37.spark.connect.GetResourcesCommandResult.ResourcesEntryR\tresources\x1a`\n\x0eResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32".spark.connect.ResourceInformationR\x05value:\x02\x38\x01"X\n\x1c\x43reateResourceProfileCommand\x12\x38\n\x07profile\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ResourceProfileR\x07profile"C\n"CreateResourceProfileCommandResult\x12\x1d\n\nprofile_id\x18\x01 \x01(\x05R\tprofileId"d\n!RemoveCachedRemoteRelationCommand\x12?\n\x08relation\x18\x01 \x01(\x0b\x32#.spark.connect.CachedRemoteRelationR\x08relation"\xcd\x01\n\x11\x43heckpointCommand\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x14\n\x05local\x18\x02 \x01(\x08R\x05local\x12\x14\n\x05\x65\x61ger\x18\x03 \x01(\x08R\x05\x65\x61ger\x12\x45\n\rstorage_level\x18\x04 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level"\xe8\x03\n\x15MergeIntoTableCommand\x12*\n\x11target_table_name\x18\x01 \x01(\tR\x0ftargetTableName\x12\x43\n\x11source_table_plan\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x0fsourceTablePlan\x12\x42\n\x0fmerge_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0emergeCondition\x12>\n\rmatch_actions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cmatchActions\x12I\n\x13not_matched_actions\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x11notMatchedActions\x12[\n\x1dnot_matched_by_source_actions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x19notMatchedBySourceActions\x12\x32\n\x15with_schema_evolution\x18\x07 \x01(\x08R\x13withSchemaEvolution*\x85\x01\n\x17StreamingQueryEventType\x12\x1e\n\x1aQUERY_PROGRESS_UNSPECIFIED\x10\x00\x12\x18\n\x14QUERY_PROGRESS_EVENT\x10\x01\x12\x1a\n\x16QUERY_TERMINATED_EVENT\x10\x02\x12\x14\n\x10QUERY_IDLE_EVENT\x10\x03\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1cspark/connect/commands.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x16spark/connect/ml.proto"\xcb\r\n\x07\x43ommand\x12]\n\x11register_function\x18\x01 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x10registerFunction\x12H\n\x0fwrite_operation\x18\x02 \x01(\x0b\x32\x1d.spark.connect.WriteOperationH\x00R\x0ewriteOperation\x12_\n\x15\x63reate_dataframe_view\x18\x03 \x01(\x0b\x32).spark.connect.CreateDataFrameViewCommandH\x00R\x13\x63reateDataframeView\x12O\n\x12write_operation_v2\x18\x04 \x01(\x0b\x32\x1f.spark.connect.WriteOperationV2H\x00R\x10writeOperationV2\x12<\n\x0bsql_command\x18\x05 \x01(\x0b\x32\x19.spark.connect.SqlCommandH\x00R\nsqlCommand\x12k\n\x1cwrite_stream_operation_start\x18\x06 \x01(\x0b\x32(.spark.connect.WriteStreamOperationStartH\x00R\x19writeStreamOperationStart\x12^\n\x17streaming_query_command\x18\x07 \x01(\x0b\x32$.spark.connect.StreamingQueryCommandH\x00R\x15streamingQueryCommand\x12X\n\x15get_resources_command\x18\x08 \x01(\x0b\x32".spark.connect.GetResourcesCommandH\x00R\x13getResourcesCommand\x12t\n\x1fstreaming_query_manager_command\x18\t \x01(\x0b\x32+.spark.connect.StreamingQueryManagerCommandH\x00R\x1cstreamingQueryManagerCommand\x12m\n\x17register_table_function\x18\n \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R\x15registerTableFunction\x12\x81\x01\n$streaming_query_listener_bus_command\x18\x0b \x01(\x0b\x32/.spark.connect.StreamingQueryListenerBusCommandH\x00R streamingQueryListenerBusCommand\x12\x64\n\x14register_data_source\x18\x0c \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R\x12registerDataSource\x12t\n\x1f\x63reate_resource_profile_command\x18\r \x01(\x0b\x32+.spark.connect.CreateResourceProfileCommandH\x00R\x1c\x63reateResourceProfileCommand\x12Q\n\x12\x63heckpoint_command\x18\x0e \x01(\x0b\x32 .spark.connect.CheckpointCommandH\x00R\x11\x63heckpointCommand\x12\x84\x01\n%remove_cached_remote_relation_command\x18\x0f \x01(\x0b\x32\x30.spark.connect.RemoveCachedRemoteRelationCommandH\x00R!removeCachedRemoteRelationCommand\x12_\n\x18merge_into_table_command\x18\x10 \x01(\x0b\x32$.spark.connect.MergeIntoTableCommandH\x00R\x15mergeIntoTableCommand\x12\x39\n\nml_command\x18\x11 \x01(\x0b\x32\x18.spark.connect.MlCommandH\x00R\tmlCommand\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x0e\n\x0c\x63ommand_type"\xaa\x04\n\nSqlCommand\x12\x14\n\x03sql\x18\x01 \x01(\tB\x02\x18\x01R\x03sql\x12;\n\x04\x61rgs\x18\x02 \x03(\x0b\x32#.spark.connect.SqlCommand.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12Z\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32-.spark.connect.SqlCommand.NamedArgumentsEntryB\x02\x18\x01R\x0enamedArguments\x12\x42\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionB\x02\x18\x01R\x0cposArguments\x12-\n\x05input\x18\x06 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"\x96\x01\n\x1a\x43reateDataFrameViewCommand\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x1b\n\tis_global\x18\x03 \x01(\x08R\x08isGlobal\x12\x18\n\x07replace\x18\x04 \x01(\x08R\x07replace"\xca\x08\n\x0eWriteOperation\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1b\n\x06source\x18\x02 \x01(\tH\x01R\x06source\x88\x01\x01\x12\x14\n\x04path\x18\x03 \x01(\tH\x00R\x04path\x12?\n\x05table\x18\x04 \x01(\x0b\x32\'.spark.connect.WriteOperation.SaveTableH\x00R\x05table\x12:\n\x04mode\x18\x05 \x01(\x0e\x32&.spark.connect.WriteOperation.SaveModeR\x04mode\x12*\n\x11sort_column_names\x18\x06 \x03(\tR\x0fsortColumnNames\x12\x31\n\x14partitioning_columns\x18\x07 \x03(\tR\x13partitioningColumns\x12\x43\n\tbucket_by\x18\x08 \x01(\x0b\x32&.spark.connect.WriteOperation.BucketByR\x08\x62ucketBy\x12\x44\n\x07options\x18\t \x03(\x0b\x32*.spark.connect.WriteOperation.OptionsEntryR\x07options\x12-\n\x12\x63lustering_columns\x18\n \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x82\x02\n\tSaveTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12X\n\x0bsave_method\x18\x02 \x01(\x0e\x32\x37.spark.connect.WriteOperation.SaveTable.TableSaveMethodR\nsaveMethod"|\n\x0fTableSaveMethod\x12!\n\x1dTABLE_SAVE_METHOD_UNSPECIFIED\x10\x00\x12#\n\x1fTABLE_SAVE_METHOD_SAVE_AS_TABLE\x10\x01\x12!\n\x1dTABLE_SAVE_METHOD_INSERT_INTO\x10\x02\x1a[\n\x08\x42ucketBy\x12.\n\x13\x62ucket_column_names\x18\x01 \x03(\tR\x11\x62ucketColumnNames\x12\x1f\n\x0bnum_buckets\x18\x02 \x01(\x05R\nnumBuckets"\x89\x01\n\x08SaveMode\x12\x19\n\x15SAVE_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10SAVE_MODE_APPEND\x10\x01\x12\x17\n\x13SAVE_MODE_OVERWRITE\x10\x02\x12\x1d\n\x19SAVE_MODE_ERROR_IF_EXISTS\x10\x03\x12\x14\n\x10SAVE_MODE_IGNORE\x10\x04\x42\x0b\n\tsave_typeB\t\n\x07_source"\xdc\x06\n\x10WriteOperationV2\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\ntable_name\x18\x02 \x01(\tR\ttableName\x12\x1f\n\x08provider\x18\x03 \x01(\tH\x00R\x08provider\x88\x01\x01\x12L\n\x14partitioning_columns\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13partitioningColumns\x12\x46\n\x07options\x18\x05 \x03(\x0b\x32,.spark.connect.WriteOperationV2.OptionsEntryR\x07options\x12_\n\x10table_properties\x18\x06 \x03(\x0b\x32\x34.spark.connect.WriteOperationV2.TablePropertiesEntryR\x0ftableProperties\x12\x38\n\x04mode\x18\x07 \x01(\x0e\x32$.spark.connect.WriteOperationV2.ModeR\x04mode\x12J\n\x13overwrite_condition\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x12overwriteCondition\x12-\n\x12\x63lustering_columns\x18\t \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x42\n\x14TablePropertiesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\x9f\x01\n\x04Mode\x12\x14\n\x10MODE_UNSPECIFIED\x10\x00\x12\x0f\n\x0bMODE_CREATE\x10\x01\x12\x12\n\x0eMODE_OVERWRITE\x10\x02\x12\x1d\n\x19MODE_OVERWRITE_PARTITIONS\x10\x03\x12\x0f\n\x0bMODE_APPEND\x10\x04\x12\x10\n\x0cMODE_REPLACE\x10\x05\x12\x1a\n\x16MODE_CREATE_OR_REPLACE\x10\x06\x42\x0b\n\t_provider"\xd8\x06\n\x19WriteStreamOperationStart\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06\x66ormat\x18\x02 \x01(\tR\x06\x66ormat\x12O\n\x07options\x18\x03 \x03(\x0b\x32\x35.spark.connect.WriteStreamOperationStart.OptionsEntryR\x07options\x12:\n\x19partitioning_column_names\x18\x04 \x03(\tR\x17partitioningColumnNames\x12:\n\x18processing_time_interval\x18\x05 \x01(\tH\x00R\x16processingTimeInterval\x12%\n\ravailable_now\x18\x06 \x01(\x08H\x00R\x0c\x61vailableNow\x12\x14\n\x04once\x18\x07 \x01(\x08H\x00R\x04once\x12\x46\n\x1e\x63ontinuous_checkpoint_interval\x18\x08 \x01(\tH\x00R\x1c\x63ontinuousCheckpointInterval\x12\x1f\n\x0boutput_mode\x18\t \x01(\tR\noutputMode\x12\x1d\n\nquery_name\x18\n \x01(\tR\tqueryName\x12\x14\n\x04path\x18\x0b \x01(\tH\x01R\x04path\x12\x1f\n\ntable_name\x18\x0c \x01(\tH\x01R\ttableName\x12N\n\x0e\x66oreach_writer\x18\r \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\rforeachWriter\x12L\n\rforeach_batch\x18\x0e \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\x0c\x66oreachBatch\x12\x36\n\x17\x63lustering_column_names\x18\x0f \x03(\tR\x15\x63lusteringColumnNames\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07triggerB\x12\n\x10sink_destination"\xb3\x01\n\x18StreamingForeachFunction\x12\x43\n\x0fpython_function\x18\x01 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x0epythonFunction\x12\x46\n\x0escala_function\x18\x02 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\rscalaFunctionB\n\n\x08\x66unction"\xd4\x01\n\x1fWriteStreamOperationStartResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12<\n\x18query_started_event_json\x18\x03 \x01(\tH\x00R\x15queryStartedEventJson\x88\x01\x01\x42\x1b\n\x19_query_started_event_json"A\n\x18StreamingQueryInstanceId\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x15\n\x06run_id\x18\x02 \x01(\tR\x05runId"\xf8\x04\n\x15StreamingQueryCommand\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x18\n\x06status\x18\x02 \x01(\x08H\x00R\x06status\x12%\n\rlast_progress\x18\x03 \x01(\x08H\x00R\x0clastProgress\x12)\n\x0frecent_progress\x18\x04 \x01(\x08H\x00R\x0erecentProgress\x12\x14\n\x04stop\x18\x05 \x01(\x08H\x00R\x04stop\x12\x34\n\x15process_all_available\x18\x06 \x01(\x08H\x00R\x13processAllAvailable\x12O\n\x07\x65xplain\x18\x07 \x01(\x0b\x32\x33.spark.connect.StreamingQueryCommand.ExplainCommandH\x00R\x07\x65xplain\x12\x1e\n\texception\x18\x08 \x01(\x08H\x00R\texception\x12k\n\x11\x61wait_termination\x18\t \x01(\x0b\x32<.spark.connect.StreamingQueryCommand.AwaitTerminationCommandH\x00R\x10\x61waitTermination\x1a,\n\x0e\x45xplainCommand\x12\x1a\n\x08\x65xtended\x18\x01 \x01(\x08R\x08\x65xtended\x1aL\n\x17\x41waitTerminationCommand\x12"\n\ntimeout_ms\x18\x02 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_msB\t\n\x07\x63ommand"\xf5\x08\n\x1bStreamingQueryCommandResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12Q\n\x06status\x18\x02 \x01(\x0b\x32\x37.spark.connect.StreamingQueryCommandResult.StatusResultH\x00R\x06status\x12j\n\x0frecent_progress\x18\x03 \x01(\x0b\x32?.spark.connect.StreamingQueryCommandResult.RecentProgressResultH\x00R\x0erecentProgress\x12T\n\x07\x65xplain\x18\x04 \x01(\x0b\x32\x38.spark.connect.StreamingQueryCommandResult.ExplainResultH\x00R\x07\x65xplain\x12Z\n\texception\x18\x05 \x01(\x0b\x32:.spark.connect.StreamingQueryCommandResult.ExceptionResultH\x00R\texception\x12p\n\x11\x61wait_termination\x18\x06 \x01(\x0b\x32\x41.spark.connect.StreamingQueryCommandResult.AwaitTerminationResultH\x00R\x10\x61waitTermination\x1a\xaa\x01\n\x0cStatusResult\x12%\n\x0estatus_message\x18\x01 \x01(\tR\rstatusMessage\x12*\n\x11is_data_available\x18\x02 \x01(\x08R\x0fisDataAvailable\x12*\n\x11is_trigger_active\x18\x03 \x01(\x08R\x0fisTriggerActive\x12\x1b\n\tis_active\x18\x04 \x01(\x08R\x08isActive\x1aH\n\x14RecentProgressResult\x12\x30\n\x14recent_progress_json\x18\x05 \x03(\tR\x12recentProgressJson\x1a\'\n\rExplainResult\x12\x16\n\x06result\x18\x01 \x01(\tR\x06result\x1a\xc5\x01\n\x0f\x45xceptionResult\x12\x30\n\x11\x65xception_message\x18\x01 \x01(\tH\x00R\x10\x65xceptionMessage\x88\x01\x01\x12$\n\x0b\x65rror_class\x18\x02 \x01(\tH\x01R\nerrorClass\x88\x01\x01\x12$\n\x0bstack_trace\x18\x03 \x01(\tH\x02R\nstackTrace\x88\x01\x01\x42\x14\n\x12_exception_messageB\x0e\n\x0c_error_classB\x0e\n\x0c_stack_trace\x1a\x38\n\x16\x41waitTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminatedB\r\n\x0bresult_type"\xbd\x06\n\x1cStreamingQueryManagerCommand\x12\x18\n\x06\x61\x63tive\x18\x01 \x01(\x08H\x00R\x06\x61\x63tive\x12\x1d\n\tget_query\x18\x02 \x01(\tH\x00R\x08getQuery\x12|\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32\x46.spark.connect.StreamingQueryManagerCommand.AwaitAnyTerminationCommandH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12n\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0b\x61\x64\x64Listener\x12t\n\x0fremove_listener\x18\x06 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0eremoveListener\x12\'\n\x0elist_listeners\x18\x07 \x01(\x08H\x00R\rlistListeners\x1aO\n\x1a\x41waitAnyTerminationCommand\x12"\n\ntimeout_ms\x18\x01 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_ms\x1a\xcd\x01\n\x1dStreamingQueryListenerCommand\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x12U\n\x17python_listener_payload\x18\x02 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x15pythonListenerPayload\x88\x01\x01\x12\x0e\n\x02id\x18\x03 \x01(\tR\x02idB\x1a\n\x18_python_listener_payloadB\t\n\x07\x63ommand"\xb4\x08\n"StreamingQueryManagerCommandResult\x12X\n\x06\x61\x63tive\x18\x01 \x01(\x0b\x32>.spark.connect.StreamingQueryManagerCommandResult.ActiveResultH\x00R\x06\x61\x63tive\x12`\n\x05query\x18\x02 \x01(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceH\x00R\x05query\x12\x81\x01\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32K.spark.connect.StreamingQueryManagerCommandResult.AwaitAnyTerminationResultH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12#\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x08H\x00R\x0b\x61\x64\x64Listener\x12)\n\x0fremove_listener\x18\x06 \x01(\x08H\x00R\x0eremoveListener\x12{\n\x0elist_listeners\x18\x07 \x01(\x0b\x32R.spark.connect.StreamingQueryManagerCommandResult.ListStreamingQueryListenerResultH\x00R\rlistListeners\x1a\x7f\n\x0c\x41\x63tiveResult\x12o\n\x0e\x61\x63tive_queries\x18\x01 \x03(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceR\ractiveQueries\x1as\n\x16StreamingQueryInstance\x12\x37\n\x02id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x02id\x12\x17\n\x04name\x18\x02 \x01(\tH\x00R\x04name\x88\x01\x01\x42\x07\n\x05_name\x1a;\n\x19\x41waitAnyTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminated\x1aK\n\x1eStreamingQueryListenerInstance\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x1a\x45\n ListStreamingQueryListenerResult\x12!\n\x0clistener_ids\x18\x01 \x03(\tR\x0blistenerIdsB\r\n\x0bresult_type"\xad\x01\n StreamingQueryListenerBusCommand\x12;\n\x19\x61\x64\x64_listener_bus_listener\x18\x01 \x01(\x08H\x00R\x16\x61\x64\x64ListenerBusListener\x12\x41\n\x1cremove_listener_bus_listener\x18\x02 \x01(\x08H\x00R\x19removeListenerBusListenerB\t\n\x07\x63ommand"\x83\x01\n\x1bStreamingQueryListenerEvent\x12\x1d\n\nevent_json\x18\x01 \x01(\tR\teventJson\x12\x45\n\nevent_type\x18\x02 \x01(\x0e\x32&.spark.connect.StreamingQueryEventTypeR\teventType"\xcc\x01\n"StreamingQueryListenerEventsResult\x12\x42\n\x06\x65vents\x18\x01 \x03(\x0b\x32*.spark.connect.StreamingQueryListenerEventR\x06\x65vents\x12\x42\n\x1blistener_bus_listener_added\x18\x02 \x01(\x08H\x00R\x18listenerBusListenerAdded\x88\x01\x01\x42\x1e\n\x1c_listener_bus_listener_added"\x15\n\x13GetResourcesCommand"\xd4\x01\n\x19GetResourcesCommandResult\x12U\n\tresources\x18\x01 \x03(\x0b\x32\x37.spark.connect.GetResourcesCommandResult.ResourcesEntryR\tresources\x1a`\n\x0eResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32".spark.connect.ResourceInformationR\x05value:\x02\x38\x01"X\n\x1c\x43reateResourceProfileCommand\x12\x38\n\x07profile\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ResourceProfileR\x07profile"C\n"CreateResourceProfileCommandResult\x12\x1d\n\nprofile_id\x18\x01 \x01(\x05R\tprofileId"d\n!RemoveCachedRemoteRelationCommand\x12?\n\x08relation\x18\x01 \x01(\x0b\x32#.spark.connect.CachedRemoteRelationR\x08relation"\xcd\x01\n\x11\x43heckpointCommand\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x14\n\x05local\x18\x02 \x01(\x08R\x05local\x12\x14\n\x05\x65\x61ger\x18\x03 \x01(\x08R\x05\x65\x61ger\x12\x45\n\rstorage_level\x18\x04 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level"\xe8\x03\n\x15MergeIntoTableCommand\x12*\n\x11target_table_name\x18\x01 \x01(\tR\x0ftargetTableName\x12\x43\n\x11source_table_plan\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x0fsourceTablePlan\x12\x42\n\x0fmerge_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0emergeCondition\x12>\n\rmatch_actions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cmatchActions\x12I\n\x13not_matched_actions\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x11notMatchedActions\x12[\n\x1dnot_matched_by_source_actions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x19notMatchedBySourceActions\x12\x32\n\x15with_schema_evolution\x18\x07 \x01(\x08R\x13withSchemaEvolution*\x85\x01\n\x17StreamingQueryEventType\x12\x1e\n\x1aQUERY_PROGRESS_UNSPECIFIED\x10\x00\x12\x18\n\x14QUERY_PROGRESS_EVENT\x10\x01\x12\x1a\n\x16QUERY_TERMINATED_EVENT\x10\x02\x12\x14\n\x10QUERY_IDLE_EVENT\x10\x03\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _globals = globals()
@@ -78,114 +79,114 @@
     _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._serialized_options = b"8\001"
     _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._loaded_options = None
     _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_options = b"8\001"
-    _globals["_STREAMINGQUERYEVENTTYPE"]._serialized_start = 11252
-    _globals["_STREAMINGQUERYEVENTTYPE"]._serialized_end = 11385
-    _globals["_COMMAND"]._serialized_start = 167
-    _globals["_COMMAND"]._serialized_end = 1847
-    _globals["_SQLCOMMAND"]._serialized_start = 1850
-    _globals["_SQLCOMMAND"]._serialized_end = 2404
-    _globals["_SQLCOMMAND_ARGSENTRY"]._serialized_start = 2220
-    _globals["_SQLCOMMAND_ARGSENTRY"]._serialized_end = 2310
-    _globals["_SQLCOMMAND_NAMEDARGUMENTSENTRY"]._serialized_start = 2312
-    _globals["_SQLCOMMAND_NAMEDARGUMENTSENTRY"]._serialized_end = 2404
-    _globals["_CREATEDATAFRAMEVIEWCOMMAND"]._serialized_start = 2407
-    _globals["_CREATEDATAFRAMEVIEWCOMMAND"]._serialized_end = 2557
-    _globals["_WRITEOPERATION"]._serialized_start = 2560
-    _globals["_WRITEOPERATION"]._serialized_end = 3658
-    _globals["_WRITEOPERATION_OPTIONSENTRY"]._serialized_start = 3082
-    _globals["_WRITEOPERATION_OPTIONSENTRY"]._serialized_end = 3140
-    _globals["_WRITEOPERATION_SAVETABLE"]._serialized_start = 3143
-    _globals["_WRITEOPERATION_SAVETABLE"]._serialized_end = 3401
-    _globals["_WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD"]._serialized_start = 3277
-    _globals["_WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD"]._serialized_end = 3401
-    _globals["_WRITEOPERATION_BUCKETBY"]._serialized_start = 3403
-    _globals["_WRITEOPERATION_BUCKETBY"]._serialized_end = 3494
-    _globals["_WRITEOPERATION_SAVEMODE"]._serialized_start = 3497
-    _globals["_WRITEOPERATION_SAVEMODE"]._serialized_end = 3634
-    _globals["_WRITEOPERATIONV2"]._serialized_start = 3661
-    _globals["_WRITEOPERATIONV2"]._serialized_end = 4521
-    _globals["_WRITEOPERATIONV2_OPTIONSENTRY"]._serialized_start = 3082
-    _globals["_WRITEOPERATIONV2_OPTIONSENTRY"]._serialized_end = 3140
-    _globals["_WRITEOPERATIONV2_TABLEPROPERTIESENTRY"]._serialized_start = 4280
-    _globals["_WRITEOPERATIONV2_TABLEPROPERTIESENTRY"]._serialized_end = 4346
-    _globals["_WRITEOPERATIONV2_MODE"]._serialized_start = 4349
-    _globals["_WRITEOPERATIONV2_MODE"]._serialized_end = 4508
-    _globals["_WRITESTREAMOPERATIONSTART"]._serialized_start = 4524
-    _globals["_WRITESTREAMOPERATIONSTART"]._serialized_end = 5380
-    _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._serialized_start = 3082
-    _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._serialized_end = 3140
-    _globals["_STREAMINGFOREACHFUNCTION"]._serialized_start = 5383
-    _globals["_STREAMINGFOREACHFUNCTION"]._serialized_end = 5562
-    _globals["_WRITESTREAMOPERATIONSTARTRESULT"]._serialized_start = 5565
-    _globals["_WRITESTREAMOPERATIONSTARTRESULT"]._serialized_end = 5777
-    _globals["_STREAMINGQUERYINSTANCEID"]._serialized_start = 5779
-    _globals["_STREAMINGQUERYINSTANCEID"]._serialized_end = 5844
-    _globals["_STREAMINGQUERYCOMMAND"]._serialized_start = 5847
-    _globals["_STREAMINGQUERYCOMMAND"]._serialized_end = 6479
-    _globals["_STREAMINGQUERYCOMMAND_EXPLAINCOMMAND"]._serialized_start = 6346
-    _globals["_STREAMINGQUERYCOMMAND_EXPLAINCOMMAND"]._serialized_end = 6390
-    _globals["_STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND"]._serialized_start = 6392
-    _globals["_STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND"]._serialized_end = 6468
-    _globals["_STREAMINGQUERYCOMMANDRESULT"]._serialized_start = 6482
-    _globals["_STREAMINGQUERYCOMMANDRESULT"]._serialized_end = 7623
-    _globals["_STREAMINGQUERYCOMMANDRESULT_STATUSRESULT"]._serialized_start = 7065
-    _globals["_STREAMINGQUERYCOMMANDRESULT_STATUSRESULT"]._serialized_end = 7235
-    _globals["_STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT"]._serialized_start = 7237
-    _globals["_STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT"]._serialized_end = 7309
-    _globals["_STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT"]._serialized_start = 7311
-    _globals["_STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT"]._serialized_end = 7350
-    _globals["_STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT"]._serialized_start = 7353
-    _globals["_STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT"]._serialized_end = 7550
-    _globals["_STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT"]._serialized_start = 7552
-    _globals["_STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT"]._serialized_end = 7608
-    _globals["_STREAMINGQUERYMANAGERCOMMAND"]._serialized_start = 7626
-    _globals["_STREAMINGQUERYMANAGERCOMMAND"]._serialized_end = 8455
-    _globals["_STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND"]._serialized_start = 8157
-    _globals["_STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND"]._serialized_end = 8236
-    _globals["_STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND"]._serialized_start = 8239
-    _globals["_STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND"]._serialized_end = 8444
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT"]._serialized_start = 8458
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT"]._serialized_end = 9534
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT"]._serialized_start = 9066
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT"]._serialized_end = 9193
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE"]._serialized_start = 9195
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE"]._serialized_end = 9310
+    _globals["_STREAMINGQUERYEVENTTYPE"]._serialized_start = 11335
+    _globals["_STREAMINGQUERYEVENTTYPE"]._serialized_end = 11468
+    _globals["_COMMAND"]._serialized_start = 191
+    _globals["_COMMAND"]._serialized_end = 1930
+    _globals["_SQLCOMMAND"]._serialized_start = 1933
+    _globals["_SQLCOMMAND"]._serialized_end = 2487
+    _globals["_SQLCOMMAND_ARGSENTRY"]._serialized_start = 2303
+    _globals["_SQLCOMMAND_ARGSENTRY"]._serialized_end = 2393
+    _globals["_SQLCOMMAND_NAMEDARGUMENTSENTRY"]._serialized_start = 2395
+    _globals["_SQLCOMMAND_NAMEDARGUMENTSENTRY"]._serialized_end = 2487
+    _globals["_CREATEDATAFRAMEVIEWCOMMAND"]._serialized_start = 2490
+    _globals["_CREATEDATAFRAMEVIEWCOMMAND"]._serialized_end = 2640
+    _globals["_WRITEOPERATION"]._serialized_start = 2643
+    _globals["_WRITEOPERATION"]._serialized_end = 3741
+    _globals["_WRITEOPERATION_OPTIONSENTRY"]._serialized_start = 3165
+    _globals["_WRITEOPERATION_OPTIONSENTRY"]._serialized_end = 3223
+    _globals["_WRITEOPERATION_SAVETABLE"]._serialized_start = 3226
+    _globals["_WRITEOPERATION_SAVETABLE"]._serialized_end = 3484
+    _globals["_WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD"]._serialized_start = 3360
+    _globals["_WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD"]._serialized_end = 3484
+    _globals["_WRITEOPERATION_BUCKETBY"]._serialized_start = 3486
+    _globals["_WRITEOPERATION_BUCKETBY"]._serialized_end = 3577
+    _globals["_WRITEOPERATION_SAVEMODE"]._serialized_start = 3580
+    _globals["_WRITEOPERATION_SAVEMODE"]._serialized_end = 3717
+    _globals["_WRITEOPERATIONV2"]._serialized_start = 3744
+    _globals["_WRITEOPERATIONV2"]._serialized_end = 4604
+    _globals["_WRITEOPERATIONV2_OPTIONSENTRY"]._serialized_start = 3165
+    _globals["_WRITEOPERATIONV2_OPTIONSENTRY"]._serialized_end = 3223
+    _globals["_WRITEOPERATIONV2_TABLEPROPERTIESENTRY"]._serialized_start = 4363
+    _globals["_WRITEOPERATIONV2_TABLEPROPERTIESENTRY"]._serialized_end = 4429
+    _globals["_WRITEOPERATIONV2_MODE"]._serialized_start = 4432
+    _globals["_WRITEOPERATIONV2_MODE"]._serialized_end = 4591
+    _globals["_WRITESTREAMOPERATIONSTART"]._serialized_start = 4607
+    _globals["_WRITESTREAMOPERATIONSTART"]._serialized_end = 5463
+    _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._serialized_start = 3165
+    _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._serialized_end = 3223
+    _globals["_STREAMINGFOREACHFUNCTION"]._serialized_start = 5466
+    _globals["_STREAMINGFOREACHFUNCTION"]._serialized_end = 5645
+    _globals["_WRITESTREAMOPERATIONSTARTRESULT"]._serialized_start = 5648
+    _globals["_WRITESTREAMOPERATIONSTARTRESULT"]._serialized_end = 5860
+    _globals["_STREAMINGQUERYINSTANCEID"]._serialized_start = 5862
+    _globals["_STREAMINGQUERYINSTANCEID"]._serialized_end = 5927
+    _globals["_STREAMINGQUERYCOMMAND"]._serialized_start = 5930
+    _globals["_STREAMINGQUERYCOMMAND"]._serialized_end = 6562
+    _globals["_STREAMINGQUERYCOMMAND_EXPLAINCOMMAND"]._serialized_start = 6429
+    _globals["_STREAMINGQUERYCOMMAND_EXPLAINCOMMAND"]._serialized_end = 6473
+    _globals["_STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND"]._serialized_start = 6475
+    _globals["_STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND"]._serialized_end = 6551
+    _globals["_STREAMINGQUERYCOMMANDRESULT"]._serialized_start = 6565
+    _globals["_STREAMINGQUERYCOMMANDRESULT"]._serialized_end = 7706
+    _globals["_STREAMINGQUERYCOMMANDRESULT_STATUSRESULT"]._serialized_start = 7148
+    _globals["_STREAMINGQUERYCOMMANDRESULT_STATUSRESULT"]._serialized_end = 7318
+    _globals["_STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT"]._serialized_start = 7320
+    _globals["_STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT"]._serialized_end = 7392
+    _globals["_STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT"]._serialized_start = 7394
+    _globals["_STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT"]._serialized_end = 7433
+    _globals["_STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT"]._serialized_start = 7436
+    _globals["_STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT"]._serialized_end = 7633
+    _globals["_STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT"]._serialized_start = 7635
+    _globals["_STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT"]._serialized_end = 7691
+    _globals["_STREAMINGQUERYMANAGERCOMMAND"]._serialized_start = 7709
+    _globals["_STREAMINGQUERYMANAGERCOMMAND"]._serialized_end = 8538
+    _globals["_STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND"]._serialized_start = 8240
+    _globals["_STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND"]._serialized_end = 8319
+    _globals["_STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND"]._serialized_start = 8322
+    _globals["_STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND"]._serialized_end = 8527
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT"]._serialized_start = 8541
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT"]._serialized_end = 9617
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT"]._serialized_start = 9149
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT"]._serialized_end = 9276
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE"]._serialized_start = 9278
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE"]._serialized_end = 9393
     _globals[
         "_STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT"
-    ]._serialized_start = 9312
-    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT"]._serialized_end = 9371
+    ]._serialized_start = 9395
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT"]._serialized_end = 9454
     _globals[
         "_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYLISTENERINSTANCE"
-    ]._serialized_start = 9373
+    ]._serialized_start = 9456
     _globals[
         "_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYLISTENERINSTANCE"
-    ]._serialized_end = 9448
+    ]._serialized_end = 9531
     _globals[
         "_STREAMINGQUERYMANAGERCOMMANDRESULT_LISTSTREAMINGQUERYLISTENERRESULT"
-    ]._serialized_start = 9450
+    ]._serialized_start = 9533
     _globals[
         "_STREAMINGQUERYMANAGERCOMMANDRESULT_LISTSTREAMINGQUERYLISTENERRESULT"
-    ]._serialized_end = 9519
-    _globals["_STREAMINGQUERYLISTENERBUSCOMMAND"]._serialized_start = 9537
-    _globals["_STREAMINGQUERYLISTENERBUSCOMMAND"]._serialized_end = 9710
-    _globals["_STREAMINGQUERYLISTENEREVENT"]._serialized_start = 9713
-    _globals["_STREAMINGQUERYLISTENEREVENT"]._serialized_end = 9844
-    _globals["_STREAMINGQUERYLISTENEREVENTSRESULT"]._serialized_start = 9847
-    _globals["_STREAMINGQUERYLISTENEREVENTSRESULT"]._serialized_end = 10051
-    _globals["_GETRESOURCESCOMMAND"]._serialized_start = 10053
-    _globals["_GETRESOURCESCOMMAND"]._serialized_end = 10074
-    _globals["_GETRESOURCESCOMMANDRESULT"]._serialized_start = 10077
-    _globals["_GETRESOURCESCOMMANDRESULT"]._serialized_end = 10289
-    _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_start = 10193
-    _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_end = 10289
-    _globals["_CREATERESOURCEPROFILECOMMAND"]._serialized_start = 10291
-    _globals["_CREATERESOURCEPROFILECOMMAND"]._serialized_end = 10379
-    _globals["_CREATERESOURCEPROFILECOMMANDRESULT"]._serialized_start = 10381
-    _globals["_CREATERESOURCEPROFILECOMMANDRESULT"]._serialized_end = 10448
-    _globals["_REMOVECACHEDREMOTERELATIONCOMMAND"]._serialized_start = 10450
-    _globals["_REMOVECACHEDREMOTERELATIONCOMMAND"]._serialized_end = 10550
-    _globals["_CHECKPOINTCOMMAND"]._serialized_start = 10553
-    _globals["_CHECKPOINTCOMMAND"]._serialized_end = 10758
-    _globals["_MERGEINTOTABLECOMMAND"]._serialized_start = 10761
-    _globals["_MERGEINTOTABLECOMMAND"]._serialized_end = 11249
+    ]._serialized_end = 9602
+    _globals["_STREAMINGQUERYLISTENERBUSCOMMAND"]._serialized_start = 9620
+    _globals["_STREAMINGQUERYLISTENERBUSCOMMAND"]._serialized_end = 9793
+    _globals["_STREAMINGQUERYLISTENEREVENT"]._serialized_start = 9796
+    _globals["_STREAMINGQUERYLISTENEREVENT"]._serialized_end = 9927
+    _globals["_STREAMINGQUERYLISTENEREVENTSRESULT"]._serialized_start = 9930
+    _globals["_STREAMINGQUERYLISTENEREVENTSRESULT"]._serialized_end = 10134
+    _globals["_GETRESOURCESCOMMAND"]._serialized_start = 10136
+    _globals["_GETRESOURCESCOMMAND"]._serialized_end = 10157
+    _globals["_GETRESOURCESCOMMANDRESULT"]._serialized_start = 10160
+    _globals["_GETRESOURCESCOMMANDRESULT"]._serialized_end = 10372
+    _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_start = 10276
+    _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_end = 10372
+    _globals["_CREATERESOURCEPROFILECOMMAND"]._serialized_start = 10374
+    _globals["_CREATERESOURCEPROFILECOMMAND"]._serialized_end = 10462
+    _globals["_CREATERESOURCEPROFILECOMMANDRESULT"]._serialized_start = 10464
+    _globals["_CREATERESOURCEPROFILECOMMANDRESULT"]._serialized_end = 10531
+    _globals["_REMOVECACHEDREMOTERELATIONCOMMAND"]._serialized_start = 10533
+    _globals["_REMOVECACHEDREMOTERELATIONCOMMAND"]._serialized_end = 10633
+    _globals["_CHECKPOINTCOMMAND"]._serialized_start = 10636
+    _globals["_CHECKPOINTCOMMAND"]._serialized_end = 10841
+    _globals["_MERGEINTOTABLECOMMAND"]._serialized_start = 10844
+    _globals["_MERGEINTOTABLECOMMAND"]._serialized_end = 11332
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/commands_pb2.pyi b/python/pyspark/sql/connect/proto/commands_pb2.pyi
index 6192a29607cbf..906f1aad10574 100644
--- a/python/pyspark/sql/connect/proto/commands_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/commands_pb2.pyi
@@ -42,6 +42,7 @@ import google.protobuf.internal.enum_type_wrapper
 import google.protobuf.message
 import pyspark.sql.connect.proto.common_pb2
 import pyspark.sql.connect.proto.expressions_pb2
+import pyspark.sql.connect.proto.ml_pb2
 import pyspark.sql.connect.proto.relations_pb2
 import sys
 import typing
@@ -104,6 +105,7 @@ class Command(google.protobuf.message.Message):
     CHECKPOINT_COMMAND_FIELD_NUMBER: builtins.int
     REMOVE_CACHED_REMOTE_RELATION_COMMAND_FIELD_NUMBER: builtins.int
     MERGE_INTO_TABLE_COMMAND_FIELD_NUMBER: builtins.int
+    ML_COMMAND_FIELD_NUMBER: builtins.int
     EXTENSION_FIELD_NUMBER: builtins.int
     @property
     def register_function(
@@ -146,6 +148,8 @@ class Command(google.protobuf.message.Message):
     @property
     def merge_into_table_command(self) -> global___MergeIntoTableCommand: ...
     @property
+    def ml_command(self) -> pyspark.sql.connect.proto.ml_pb2.MlCommand: ...
+    @property
     def extension(self) -> google.protobuf.any_pb2.Any:
         """This field is used to mark extensions to the protocol. When plugins generate arbitrary
         Commands they can add them here. During the planning the correct resolution is done.
@@ -174,6 +178,7 @@ class Command(google.protobuf.message.Message):
         remove_cached_remote_relation_command: global___RemoveCachedRemoteRelationCommand
         | None = ...,
         merge_into_table_command: global___MergeIntoTableCommand | None = ...,
+        ml_command: pyspark.sql.connect.proto.ml_pb2.MlCommand | None = ...,
         extension: google.protobuf.any_pb2.Any | None = ...,
     ) -> None: ...
     def HasField(
@@ -193,6 +198,8 @@ class Command(google.protobuf.message.Message):
             b"get_resources_command",
             "merge_into_table_command",
             b"merge_into_table_command",
+            "ml_command",
+            b"ml_command",
             "register_data_source",
             b"register_data_source",
             "register_function",
@@ -234,6 +241,8 @@ class Command(google.protobuf.message.Message):
             b"get_resources_command",
             "merge_into_table_command",
             b"merge_into_table_command",
+            "ml_command",
+            b"ml_command",
             "register_data_source",
             b"register_data_source",
             "register_function",
@@ -278,6 +287,7 @@ class Command(google.protobuf.message.Message):
             "checkpoint_command",
             "remove_cached_remote_relation_command",
             "merge_into_table_command",
+            "ml_command",
             "extension",
         ]
         | None
diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.py b/python/pyspark/sql/connect/proto/expressions_pb2.py
index 0d4730ac736e3..7edcbcac15c73 100644
--- a/python/pyspark/sql/connect/proto/expressions_pb2.py
+++ b/python/pyspark/sql/connect/proto/expressions_pb2.py
@@ -40,7 +40,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto\x1a\x1aspark/connect/common.proto"\xc1\x30\n\nExpression\x12\x37\n\x06\x63ommon\x18\x12 \x01(\x0b\x32\x1f.spark.connect.ExpressionCommonR\x06\x63ommon\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12?\n\x0cmerge_action\x18\x13 \x01(\x0b\x32\x1a.spark.connect.MergeActionH\x00R\x0bmergeAction\x12g\n\x1atyped_aggregate_expression\x18\x14 \x01(\x0b\x32\'.spark.connect.TypedAggregateExpressionH\x00R\x18typedAggregateExpression\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\x9b\x0c\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xe3\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\x81\x01\n\x06Struct\x12\x38\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lementsB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\xcc\x01\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"A\n\x10\x45xpressionCommon\x12-\n\x06origin\x18\x01 \x01(\x0b\x32\x15.spark.connect.OriginR\x06origin"\xec\x02\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdfB\n\n\x08\x66unction"\xcc\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer\x12/\n\x13\x61\x64\x64itional_includes\x18\x05 \x03(\tR\x12\x61\x64\x64itionalIncludes"\xd6\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable\x12\x1c\n\taggregate\x18\x05 \x01(\x08R\taggregate"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"c\n\x18TypedAggregateExpression\x12G\n\x10scalar_scala_udf\x18\x01 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFR\x0escalarScalaUdf"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\x80\x04\n\x0bMergeAction\x12\x46\n\x0b\x61\x63tion_type\x18\x01 \x01(\x0e\x32%.spark.connect.MergeAction.ActionTypeR\nactionType\x12<\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\tcondition\x88\x01\x01\x12G\n\x0b\x61ssignments\x18\x03 \x03(\x0b\x32%.spark.connect.MergeAction.AssignmentR\x0b\x61ssignments\x1aj\n\nAssignment\x12+\n\x03key\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\xa7\x01\n\nActionType\x12\x17\n\x13\x41\x43TION_TYPE_INVALID\x10\x00\x12\x16\n\x12\x41\x43TION_TYPE_DELETE\x10\x01\x12\x16\n\x12\x41\x43TION_TYPE_INSERT\x10\x02\x12\x1b\n\x17\x41\x43TION_TYPE_INSERT_STAR\x10\x03\x12\x16\n\x12\x41\x43TION_TYPE_UPDATE\x10\x04\x12\x1b\n\x17\x41\x43TION_TYPE_UPDATE_STAR\x10\x05\x42\x0c\n\n_conditionB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto\x1a\x1aspark/connect/common.proto"\x97\x32\n\nExpression\x12\x37\n\x06\x63ommon\x18\x12 \x01(\x0b\x32\x1f.spark.connect.ExpressionCommonR\x06\x63ommon\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12?\n\x0cmerge_action\x18\x13 \x01(\x0b\x32\x1a.spark.connect.MergeActionH\x00R\x0bmergeAction\x12g\n\x1atyped_aggregate_expression\x18\x14 \x01(\x0b\x32\'.spark.connect.TypedAggregateExpressionH\x00R\x18typedAggregateExpression\x12H\n\x0flazy_expression\x18\x15 \x01(\x0b\x32\x1d.spark.connect.LazyExpressionH\x00R\x0elazyExpression\x12T\n\x13subquery_expression\x18\x16 \x01(\x0b\x32!.spark.connect.SubqueryExpressionH\x00R\x12subqueryExpression\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\x9b\x0c\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xe3\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\x81\x01\n\x06Struct\x12\x38\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lementsB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\x82\x02\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x12$\n\x0bis_internal\x18\x05 \x01(\x08H\x00R\nisInternal\x88\x01\x01\x42\x0e\n\x0c_is_internal\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"A\n\x10\x45xpressionCommon\x12-\n\x06origin\x18\x01 \x01(\x0b\x32\x15.spark.connect.OriginR\x06origin"\xec\x02\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdfB\n\n\x08\x66unction"\xcc\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer\x12/\n\x13\x61\x64\x64itional_includes\x18\x05 \x03(\tR\x12\x61\x64\x64itionalIncludes"\xd6\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable\x12\x1c\n\taggregate\x18\x05 \x01(\x08R\taggregate"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"c\n\x18TypedAggregateExpression\x12G\n\x10scalar_scala_udf\x18\x01 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFR\x0escalarScalaUdf"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\x80\x04\n\x0bMergeAction\x12\x46\n\x0b\x61\x63tion_type\x18\x01 \x01(\x0e\x32%.spark.connect.MergeAction.ActionTypeR\nactionType\x12<\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\tcondition\x88\x01\x01\x12G\n\x0b\x61ssignments\x18\x03 \x03(\x0b\x32%.spark.connect.MergeAction.AssignmentR\x0b\x61ssignments\x1aj\n\nAssignment\x12+\n\x03key\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\xa7\x01\n\nActionType\x12\x17\n\x13\x41\x43TION_TYPE_INVALID\x10\x00\x12\x16\n\x12\x41\x43TION_TYPE_DELETE\x10\x01\x12\x16\n\x12\x41\x43TION_TYPE_INSERT\x10\x02\x12\x1b\n\x17\x41\x43TION_TYPE_INSERT_STAR\x10\x03\x12\x16\n\x12\x41\x43TION_TYPE_UPDATE\x10\x04\x12\x1b\n\x17\x41\x43TION_TYPE_UPDATE_STAR\x10\x05\x42\x0c\n\n_condition"A\n\x0eLazyExpression\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild"\xe1\x01\n\x12SubqueryExpression\x12\x17\n\x07plan_id\x18\x01 \x01(\x03R\x06planId\x12S\n\rsubquery_type\x18\x02 \x01(\x0e\x32..spark.connect.SubqueryExpression.SubqueryTypeR\x0csubqueryType"]\n\x0cSubqueryType\x12\x19\n\x15SUBQUERY_TYPE_UNKNOWN\x10\x00\x12\x18\n\x14SUBQUERY_TYPE_SCALAR\x10\x01\x12\x18\n\x14SUBQUERY_TYPE_EXISTS\x10\x02\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _globals = globals()
@@ -54,77 +54,83 @@
         "DESCRIPTOR"
     ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
     _globals["_EXPRESSION"]._serialized_start = 133
-    _globals["_EXPRESSION"]._serialized_end = 6342
-    _globals["_EXPRESSION_WINDOW"]._serialized_start = 1900
-    _globals["_EXPRESSION_WINDOW"]._serialized_end = 2683
-    _globals["_EXPRESSION_WINDOW_WINDOWFRAME"]._serialized_start = 2190
-    _globals["_EXPRESSION_WINDOW_WINDOWFRAME"]._serialized_end = 2683
-    _globals["_EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY"]._serialized_start = 2457
-    _globals["_EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY"]._serialized_end = 2602
-    _globals["_EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE"]._serialized_start = 2604
-    _globals["_EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE"]._serialized_end = 2683
-    _globals["_EXPRESSION_SORTORDER"]._serialized_start = 2686
-    _globals["_EXPRESSION_SORTORDER"]._serialized_end = 3111
-    _globals["_EXPRESSION_SORTORDER_SORTDIRECTION"]._serialized_start = 2916
-    _globals["_EXPRESSION_SORTORDER_SORTDIRECTION"]._serialized_end = 3024
-    _globals["_EXPRESSION_SORTORDER_NULLORDERING"]._serialized_start = 3026
-    _globals["_EXPRESSION_SORTORDER_NULLORDERING"]._serialized_end = 3111
-    _globals["_EXPRESSION_CAST"]._serialized_start = 3114
-    _globals["_EXPRESSION_CAST"]._serialized_end = 3429
-    _globals["_EXPRESSION_CAST_EVALMODE"]._serialized_start = 3315
-    _globals["_EXPRESSION_CAST_EVALMODE"]._serialized_end = 3413
-    _globals["_EXPRESSION_LITERAL"]._serialized_start = 3432
-    _globals["_EXPRESSION_LITERAL"]._serialized_end = 4995
-    _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_start = 4267
-    _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_end = 4384
-    _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_start = 4386
-    _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_end = 4484
-    _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_start = 4487
-    _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_end = 4617
-    _globals["_EXPRESSION_LITERAL_MAP"]._serialized_start = 4620
-    _globals["_EXPRESSION_LITERAL_MAP"]._serialized_end = 4847
-    _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_start = 4850
-    _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_end = 4979
-    _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_start = 4998
-    _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_end = 5184
-    _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_start = 5187
-    _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_end = 5391
-    _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_start = 5393
-    _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_end = 5443
-    _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_start = 5445
-    _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_end = 5569
-    _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_start = 5571
-    _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_end = 5657
-    _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_start = 5660
-    _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_end = 5792
-    _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_start = 5795
-    _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_end = 5982
-    _globals["_EXPRESSION_ALIAS"]._serialized_start = 5984
-    _globals["_EXPRESSION_ALIAS"]._serialized_end = 6104
-    _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_start = 6107
-    _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_end = 6265
-    _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_start = 6267
-    _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_end = 6329
-    _globals["_EXPRESSIONCOMMON"]._serialized_start = 6344
-    _globals["_EXPRESSIONCOMMON"]._serialized_end = 6409
-    _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_start = 6412
-    _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_end = 6776
-    _globals["_PYTHONUDF"]._serialized_start = 6779
-    _globals["_PYTHONUDF"]._serialized_end = 6983
-    _globals["_SCALARSCALAUDF"]._serialized_start = 6986
-    _globals["_SCALARSCALAUDF"]._serialized_end = 7200
-    _globals["_JAVAUDF"]._serialized_start = 7203
-    _globals["_JAVAUDF"]._serialized_end = 7352
-    _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_start = 7354
-    _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_end = 7453
-    _globals["_CALLFUNCTION"]._serialized_start = 7455
-    _globals["_CALLFUNCTION"]._serialized_end = 7563
-    _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_start = 7565
-    _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_end = 7657
-    _globals["_MERGEACTION"]._serialized_start = 7660
-    _globals["_MERGEACTION"]._serialized_end = 8172
-    _globals["_MERGEACTION_ASSIGNMENT"]._serialized_start = 7882
-    _globals["_MERGEACTION_ASSIGNMENT"]._serialized_end = 7988
-    _globals["_MERGEACTION_ACTIONTYPE"]._serialized_start = 7991
-    _globals["_MERGEACTION_ACTIONTYPE"]._serialized_end = 8158
+    _globals["_EXPRESSION"]._serialized_end = 6556
+    _globals["_EXPRESSION_WINDOW"]._serialized_start = 2060
+    _globals["_EXPRESSION_WINDOW"]._serialized_end = 2843
+    _globals["_EXPRESSION_WINDOW_WINDOWFRAME"]._serialized_start = 2350
+    _globals["_EXPRESSION_WINDOW_WINDOWFRAME"]._serialized_end = 2843
+    _globals["_EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY"]._serialized_start = 2617
+    _globals["_EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY"]._serialized_end = 2762
+    _globals["_EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE"]._serialized_start = 2764
+    _globals["_EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE"]._serialized_end = 2843
+    _globals["_EXPRESSION_SORTORDER"]._serialized_start = 2846
+    _globals["_EXPRESSION_SORTORDER"]._serialized_end = 3271
+    _globals["_EXPRESSION_SORTORDER_SORTDIRECTION"]._serialized_start = 3076
+    _globals["_EXPRESSION_SORTORDER_SORTDIRECTION"]._serialized_end = 3184
+    _globals["_EXPRESSION_SORTORDER_NULLORDERING"]._serialized_start = 3186
+    _globals["_EXPRESSION_SORTORDER_NULLORDERING"]._serialized_end = 3271
+    _globals["_EXPRESSION_CAST"]._serialized_start = 3274
+    _globals["_EXPRESSION_CAST"]._serialized_end = 3589
+    _globals["_EXPRESSION_CAST_EVALMODE"]._serialized_start = 3475
+    _globals["_EXPRESSION_CAST_EVALMODE"]._serialized_end = 3573
+    _globals["_EXPRESSION_LITERAL"]._serialized_start = 3592
+    _globals["_EXPRESSION_LITERAL"]._serialized_end = 5155
+    _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_start = 4427
+    _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_end = 4544
+    _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_start = 4546
+    _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_end = 4644
+    _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_start = 4647
+    _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_end = 4777
+    _globals["_EXPRESSION_LITERAL_MAP"]._serialized_start = 4780
+    _globals["_EXPRESSION_LITERAL_MAP"]._serialized_end = 5007
+    _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_start = 5010
+    _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_end = 5139
+    _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_start = 5158
+    _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_end = 5344
+    _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_start = 5347
+    _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_end = 5605
+    _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_start = 5607
+    _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_end = 5657
+    _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_start = 5659
+    _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_end = 5783
+    _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_start = 5785
+    _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_end = 5871
+    _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_start = 5874
+    _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_end = 6006
+    _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_start = 6009
+    _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_end = 6196
+    _globals["_EXPRESSION_ALIAS"]._serialized_start = 6198
+    _globals["_EXPRESSION_ALIAS"]._serialized_end = 6318
+    _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_start = 6321
+    _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_end = 6479
+    _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_start = 6481
+    _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_end = 6543
+    _globals["_EXPRESSIONCOMMON"]._serialized_start = 6558
+    _globals["_EXPRESSIONCOMMON"]._serialized_end = 6623
+    _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_start = 6626
+    _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_end = 6990
+    _globals["_PYTHONUDF"]._serialized_start = 6993
+    _globals["_PYTHONUDF"]._serialized_end = 7197
+    _globals["_SCALARSCALAUDF"]._serialized_start = 7200
+    _globals["_SCALARSCALAUDF"]._serialized_end = 7414
+    _globals["_JAVAUDF"]._serialized_start = 7417
+    _globals["_JAVAUDF"]._serialized_end = 7566
+    _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_start = 7568
+    _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_end = 7667
+    _globals["_CALLFUNCTION"]._serialized_start = 7669
+    _globals["_CALLFUNCTION"]._serialized_end = 7777
+    _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_start = 7779
+    _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_end = 7871
+    _globals["_MERGEACTION"]._serialized_start = 7874
+    _globals["_MERGEACTION"]._serialized_end = 8386
+    _globals["_MERGEACTION_ASSIGNMENT"]._serialized_start = 8096
+    _globals["_MERGEACTION_ASSIGNMENT"]._serialized_end = 8202
+    _globals["_MERGEACTION_ACTIONTYPE"]._serialized_start = 8205
+    _globals["_MERGEACTION_ACTIONTYPE"]._serialized_end = 8372
+    _globals["_LAZYEXPRESSION"]._serialized_start = 8388
+    _globals["_LAZYEXPRESSION"]._serialized_end = 8453
+    _globals["_SUBQUERYEXPRESSION"]._serialized_start = 8456
+    _globals["_SUBQUERYEXPRESSION"]._serialized_end = 8681
+    _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_start = 8588
+    _globals["_SUBQUERYEXPRESSION_SUBQUERYTYPE"]._serialized_end = 8681
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.pyi b/python/pyspark/sql/connect/proto/expressions_pb2.pyi
index 1566eb1b1e9e2..1a8c60f673054 100644
--- a/python/pyspark/sql/connect/proto/expressions_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/expressions_pb2.pyi
@@ -847,6 +847,7 @@ class Expression(google.protobuf.message.Message):
         ARGUMENTS_FIELD_NUMBER: builtins.int
         IS_DISTINCT_FIELD_NUMBER: builtins.int
         IS_USER_DEFINED_FUNCTION_FIELD_NUMBER: builtins.int
+        IS_INTERNAL_FIELD_NUMBER: builtins.int
         function_name: builtins.str
         """(Required) name (or unparsed name for user defined function) for the unresolved function."""
         @property
@@ -864,6 +865,11 @@ class Expression(google.protobuf.message.Message):
         When it is not a user defined function, Connect will use the function name directly.
         When it is a user defined function, Connect will parse the function name first.
         """
+        is_internal: builtins.bool
+        """(Optional) Indicate if this function is defined in the internal function registry.
+        If not set, the server will try to look up the function in the internal function registry
+        and decide appropriately.
+        """
         def __init__(
             self,
             *,
@@ -871,20 +877,34 @@ class Expression(google.protobuf.message.Message):
             arguments: collections.abc.Iterable[global___Expression] | None = ...,
             is_distinct: builtins.bool = ...,
             is_user_defined_function: builtins.bool = ...,
+            is_internal: builtins.bool | None = ...,
         ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_is_internal", b"_is_internal", "is_internal", b"is_internal"
+            ],
+        ) -> builtins.bool: ...
         def ClearField(
             self,
             field_name: typing_extensions.Literal[
+                "_is_internal",
+                b"_is_internal",
                 "arguments",
                 b"arguments",
                 "function_name",
                 b"function_name",
                 "is_distinct",
                 b"is_distinct",
+                "is_internal",
+                b"is_internal",
                 "is_user_defined_function",
                 b"is_user_defined_function",
             ],
         ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_is_internal", b"_is_internal"]
+        ) -> typing_extensions.Literal["is_internal"] | None: ...
 
     class ExpressionString(google.protobuf.message.Message):
         """Expression as string."""
@@ -1184,6 +1204,8 @@ class Expression(google.protobuf.message.Message):
     NAMED_ARGUMENT_EXPRESSION_FIELD_NUMBER: builtins.int
     MERGE_ACTION_FIELD_NUMBER: builtins.int
     TYPED_AGGREGATE_EXPRESSION_FIELD_NUMBER: builtins.int
+    LAZY_EXPRESSION_FIELD_NUMBER: builtins.int
+    SUBQUERY_EXPRESSION_FIELD_NUMBER: builtins.int
     EXTENSION_FIELD_NUMBER: builtins.int
     @property
     def common(self) -> global___ExpressionCommon: ...
@@ -1228,6 +1250,10 @@ class Expression(google.protobuf.message.Message):
     @property
     def typed_aggregate_expression(self) -> global___TypedAggregateExpression: ...
     @property
+    def lazy_expression(self) -> global___LazyExpression: ...
+    @property
+    def subquery_expression(self) -> global___SubqueryExpression: ...
+    @property
     def extension(self) -> google.protobuf.any_pb2.Any:
         """This field is used to mark extensions to the protocol. When plugins generate arbitrary
         relations they can add them here. During the planning the correct resolution is done.
@@ -1256,6 +1282,8 @@ class Expression(google.protobuf.message.Message):
         named_argument_expression: global___NamedArgumentExpression | None = ...,
         merge_action: global___MergeAction | None = ...,
         typed_aggregate_expression: global___TypedAggregateExpression | None = ...,
+        lazy_expression: global___LazyExpression | None = ...,
+        subquery_expression: global___SubqueryExpression | None = ...,
         extension: google.protobuf.any_pb2.Any | None = ...,
     ) -> None: ...
     def HasField(
@@ -1279,6 +1307,8 @@ class Expression(google.protobuf.message.Message):
             b"extension",
             "lambda_function",
             b"lambda_function",
+            "lazy_expression",
+            b"lazy_expression",
             "literal",
             b"literal",
             "merge_action",
@@ -1287,6 +1317,8 @@ class Expression(google.protobuf.message.Message):
             b"named_argument_expression",
             "sort_order",
             b"sort_order",
+            "subquery_expression",
+            b"subquery_expression",
             "typed_aggregate_expression",
             b"typed_aggregate_expression",
             "unresolved_attribute",
@@ -1328,6 +1360,8 @@ class Expression(google.protobuf.message.Message):
             b"extension",
             "lambda_function",
             b"lambda_function",
+            "lazy_expression",
+            b"lazy_expression",
             "literal",
             b"literal",
             "merge_action",
@@ -1336,6 +1370,8 @@ class Expression(google.protobuf.message.Message):
             b"named_argument_expression",
             "sort_order",
             b"sort_order",
+            "subquery_expression",
+            b"subquery_expression",
             "typed_aggregate_expression",
             b"typed_aggregate_expression",
             "unresolved_attribute",
@@ -1379,6 +1415,8 @@ class Expression(google.protobuf.message.Message):
             "named_argument_expression",
             "merge_action",
             "typed_aggregate_expression",
+            "lazy_expression",
+            "subquery_expression",
             "extension",
         ]
         | None
@@ -1801,3 +1839,66 @@ class MergeAction(google.protobuf.message.Message):
     ) -> typing_extensions.Literal["condition"] | None: ...
 
 global___MergeAction = MergeAction
+
+class LazyExpression(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    CHILD_FIELD_NUMBER: builtins.int
+    @property
+    def child(self) -> global___Expression:
+        """(Required) The expression to be marked as lazy."""
+    def __init__(
+        self,
+        *,
+        child: global___Expression | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["child", b"child"]
+    ) -> builtins.bool: ...
+    def ClearField(self, field_name: typing_extensions.Literal["child", b"child"]) -> None: ...
+
+global___LazyExpression = LazyExpression
+
+class SubqueryExpression(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class _SubqueryType:
+        ValueType = typing.NewType("ValueType", builtins.int)
+        V: typing_extensions.TypeAlias = ValueType
+
+    class _SubqueryTypeEnumTypeWrapper(
+        google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[
+            SubqueryExpression._SubqueryType.ValueType
+        ],
+        builtins.type,
+    ):  # noqa: F821
+        DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+        SUBQUERY_TYPE_UNKNOWN: SubqueryExpression._SubqueryType.ValueType  # 0
+        SUBQUERY_TYPE_SCALAR: SubqueryExpression._SubqueryType.ValueType  # 1
+        SUBQUERY_TYPE_EXISTS: SubqueryExpression._SubqueryType.ValueType  # 2
+
+    class SubqueryType(_SubqueryType, metaclass=_SubqueryTypeEnumTypeWrapper): ...
+    SUBQUERY_TYPE_UNKNOWN: SubqueryExpression.SubqueryType.ValueType  # 0
+    SUBQUERY_TYPE_SCALAR: SubqueryExpression.SubqueryType.ValueType  # 1
+    SUBQUERY_TYPE_EXISTS: SubqueryExpression.SubqueryType.ValueType  # 2
+
+    PLAN_ID_FIELD_NUMBER: builtins.int
+    SUBQUERY_TYPE_FIELD_NUMBER: builtins.int
+    plan_id: builtins.int
+    """(Required) The id of corresponding connect plan."""
+    subquery_type: global___SubqueryExpression.SubqueryType.ValueType
+    """(Required) The type of the subquery."""
+    def __init__(
+        self,
+        *,
+        plan_id: builtins.int = ...,
+        subquery_type: global___SubqueryExpression.SubqueryType.ValueType = ...,
+    ) -> None: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "plan_id", b"plan_id", "subquery_type", b"subquery_type"
+        ],
+    ) -> None: ...
+
+global___SubqueryExpression = SubqueryExpression
diff --git a/python/pyspark/sql/connect/proto/ml_common_pb2.py b/python/pyspark/sql/connect/proto/ml_common_pb2.py
new file mode 100644
index 0000000000000..70e0e91652892
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/ml_common_pb2.py
@@ -0,0 +1,80 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
+# source: spark/connect/ml_common.proto
+# Protobuf Python Version: 5.28.3
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
+
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC, 5, 28, 3, "", "spark/connect/ml_common.proto"
+)
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_expressions__pb2
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x1dspark/connect/ml_common.proto\x12\rspark.connect\x1a\x1fspark/connect/expressions.proto"\x98\x01\n\x08MlParams\x12;\n\x06params\x18\x01 \x03(\x0b\x32#.spark.connect.MlParams.ParamsEntryR\x06params\x1aO\n\x0bParamsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x14.spark.connect.ParamR\x05value:\x02\x38\x01"\xb6\x01\n\x05Param\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12/\n\x06vector\x18\x02 \x01(\x0b\x32\x15.spark.connect.VectorH\x00R\x06vector\x12/\n\x06matrix\x18\x03 \x01(\x0b\x32\x15.spark.connect.MatrixH\x00R\x06matrixB\x0c\n\nparam_type"\xc9\x01\n\nMlOperator\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x10\n\x03uid\x18\x02 \x01(\tR\x03uid\x12:\n\x04type\x18\x03 \x01(\x0e\x32&.spark.connect.MlOperator.OperatorTypeR\x04type"Y\n\x0cOperatorType\x12\x0f\n\x0bUNSPECIFIED\x10\x00\x12\r\n\tESTIMATOR\x10\x01\x12\x0f\n\x0bTRANSFORMER\x10\x02\x12\r\n\tEVALUATOR\x10\x03\x12\t\n\x05MODEL\x10\x04"\x1b\n\tObjectRef\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id"\xed\x01\n\x06Vector\x12\x33\n\x05\x64\x65nse\x18\x01 \x01(\x0b\x32\x1b.spark.connect.Vector.DenseH\x00R\x05\x64\x65nse\x12\x36\n\x06sparse\x18\x02 \x01(\x0b\x32\x1c.spark.connect.Vector.SparseH\x00R\x06sparse\x1a\x1d\n\x05\x44\x65nse\x12\x14\n\x05value\x18\x01 \x03(\x01R\x05value\x1aH\n\x06Sparse\x12\x12\n\x04size\x18\x01 \x01(\x05R\x04size\x12\x14\n\x05index\x18\x02 \x03(\x05R\x05index\x12\x14\n\x05value\x18\x03 \x03(\x01R\x05valueB\r\n\x0bvector_type"\xaf\x03\n\x06Matrix\x12\x33\n\x05\x64\x65nse\x18\x01 \x01(\x0b\x32\x1b.spark.connect.Matrix.DenseH\x00R\x05\x64\x65nse\x12\x36\n\x06sparse\x18\x02 \x01(\x0b\x32\x1c.spark.connect.Matrix.SparseH\x00R\x06sparse\x1ax\n\x05\x44\x65nse\x12\x19\n\x08num_rows\x18\x01 \x01(\x05R\x07numRows\x12\x19\n\x08num_cols\x18\x02 \x01(\x05R\x07numCols\x12\x14\n\x05value\x18\x03 \x03(\x01R\x05value\x12#\n\ris_transposed\x18\x04 \x01(\x08R\x0cisTransposed\x1a\xae\x01\n\x06Sparse\x12\x19\n\x08num_rows\x18\x01 \x01(\x05R\x07numRows\x12\x19\n\x08num_cols\x18\x02 \x01(\x05R\x07numCols\x12\x16\n\x06\x63olptr\x18\x03 \x03(\x05R\x06\x63olptr\x12\x1b\n\trow_index\x18\x04 \x03(\x05R\x08rowIndex\x12\x14\n\x05value\x18\x05 \x03(\x01R\x05value\x12#\n\ris_transposed\x18\x06 \x01(\x08R\x0cisTransposedB\r\n\x0bmatrix_typeB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+)
+
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(
+    DESCRIPTOR, "pyspark.sql.connect.proto.ml_common_pb2", _globals
+)
+if not _descriptor._USE_C_DESCRIPTORS:
+    _globals["DESCRIPTOR"]._loaded_options = None
+    _globals[
+        "DESCRIPTOR"
+    ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
+    _globals["_MLPARAMS_PARAMSENTRY"]._loaded_options = None
+    _globals["_MLPARAMS_PARAMSENTRY"]._serialized_options = b"8\001"
+    _globals["_MLPARAMS"]._serialized_start = 82
+    _globals["_MLPARAMS"]._serialized_end = 234
+    _globals["_MLPARAMS_PARAMSENTRY"]._serialized_start = 155
+    _globals["_MLPARAMS_PARAMSENTRY"]._serialized_end = 234
+    _globals["_PARAM"]._serialized_start = 237
+    _globals["_PARAM"]._serialized_end = 419
+    _globals["_MLOPERATOR"]._serialized_start = 422
+    _globals["_MLOPERATOR"]._serialized_end = 623
+    _globals["_MLOPERATOR_OPERATORTYPE"]._serialized_start = 534
+    _globals["_MLOPERATOR_OPERATORTYPE"]._serialized_end = 623
+    _globals["_OBJECTREF"]._serialized_start = 625
+    _globals["_OBJECTREF"]._serialized_end = 652
+    _globals["_VECTOR"]._serialized_start = 655
+    _globals["_VECTOR"]._serialized_end = 892
+    _globals["_VECTOR_DENSE"]._serialized_start = 774
+    _globals["_VECTOR_DENSE"]._serialized_end = 803
+    _globals["_VECTOR_SPARSE"]._serialized_start = 805
+    _globals["_VECTOR_SPARSE"]._serialized_end = 877
+    _globals["_MATRIX"]._serialized_start = 895
+    _globals["_MATRIX"]._serialized_end = 1326
+    _globals["_MATRIX_DENSE"]._serialized_start = 1014
+    _globals["_MATRIX_DENSE"]._serialized_end = 1134
+    _globals["_MATRIX_SPARSE"]._serialized_start = 1137
+    _globals["_MATRIX_SPARSE"]._serialized_end = 1311
+# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/ml_common_pb2.pyi b/python/pyspark/sql/connect/proto/ml_common_pb2.pyi
new file mode 100644
index 0000000000000..64029b6679f19
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/ml_common_pb2.pyi
@@ -0,0 +1,427 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+@generated by mypy-protobuf.  Do not edit manually!
+isort:skip_file
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import builtins
+import collections.abc
+import google.protobuf.descriptor
+import google.protobuf.internal.containers
+import google.protobuf.internal.enum_type_wrapper
+import google.protobuf.message
+import pyspark.sql.connect.proto.expressions_pb2
+import sys
+import typing
+
+if sys.version_info >= (3, 10):
+    import typing as typing_extensions
+else:
+    import typing_extensions
+
+DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
+
+class MlParams(google.protobuf.message.Message):
+    """MlParams stores param settings for ML Estimator / Transformer / Evaluator"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class ParamsEntry(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEY_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        key: builtins.str
+        @property
+        def value(self) -> global___Param: ...
+        def __init__(
+            self,
+            *,
+            key: builtins.str = ...,
+            value: global___Param | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["value", b"value"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+        ) -> None: ...
+
+    PARAMS_FIELD_NUMBER: builtins.int
+    @property
+    def params(
+        self,
+    ) -> google.protobuf.internal.containers.MessageMap[builtins.str, global___Param]:
+        """User-supplied params"""
+    def __init__(
+        self,
+        *,
+        params: collections.abc.Mapping[builtins.str, global___Param] | None = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["params", b"params"]) -> None: ...
+
+global___MlParams = MlParams
+
+class Param(google.protobuf.message.Message):
+    """Represents the parameter type of the ML instance, or the returned value
+    of the attribute
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    LITERAL_FIELD_NUMBER: builtins.int
+    VECTOR_FIELD_NUMBER: builtins.int
+    MATRIX_FIELD_NUMBER: builtins.int
+    @property
+    def literal(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal: ...
+    @property
+    def vector(self) -> global___Vector: ...
+    @property
+    def matrix(self) -> global___Matrix: ...
+    def __init__(
+        self,
+        *,
+        literal: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ...,
+        vector: global___Vector | None = ...,
+        matrix: global___Matrix | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "literal",
+            b"literal",
+            "matrix",
+            b"matrix",
+            "param_type",
+            b"param_type",
+            "vector",
+            b"vector",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "literal",
+            b"literal",
+            "matrix",
+            b"matrix",
+            "param_type",
+            b"param_type",
+            "vector",
+            b"vector",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["param_type", b"param_type"]
+    ) -> typing_extensions.Literal["literal", "vector", "matrix"] | None: ...
+
+global___Param = Param
+
+class MlOperator(google.protobuf.message.Message):
+    """MLOperator represents the ML operators like (Estimator, Transformer or Evaluator)"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class _OperatorType:
+        ValueType = typing.NewType("ValueType", builtins.int)
+        V: typing_extensions.TypeAlias = ValueType
+
+    class _OperatorTypeEnumTypeWrapper(
+        google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[
+            MlOperator._OperatorType.ValueType
+        ],
+        builtins.type,
+    ):  # noqa: F821
+        DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+        UNSPECIFIED: MlOperator._OperatorType.ValueType  # 0
+        ESTIMATOR: MlOperator._OperatorType.ValueType  # 1
+        TRANSFORMER: MlOperator._OperatorType.ValueType  # 2
+        EVALUATOR: MlOperator._OperatorType.ValueType  # 3
+        MODEL: MlOperator._OperatorType.ValueType  # 4
+
+    class OperatorType(_OperatorType, metaclass=_OperatorTypeEnumTypeWrapper): ...
+    UNSPECIFIED: MlOperator.OperatorType.ValueType  # 0
+    ESTIMATOR: MlOperator.OperatorType.ValueType  # 1
+    TRANSFORMER: MlOperator.OperatorType.ValueType  # 2
+    EVALUATOR: MlOperator.OperatorType.ValueType  # 3
+    MODEL: MlOperator.OperatorType.ValueType  # 4
+
+    NAME_FIELD_NUMBER: builtins.int
+    UID_FIELD_NUMBER: builtins.int
+    TYPE_FIELD_NUMBER: builtins.int
+    name: builtins.str
+    """The qualified name of the ML operator."""
+    uid: builtins.str
+    """Unique id of the ML operator"""
+    type: global___MlOperator.OperatorType.ValueType
+    """Represents what the ML operator is"""
+    def __init__(
+        self,
+        *,
+        name: builtins.str = ...,
+        uid: builtins.str = ...,
+        type: global___MlOperator.OperatorType.ValueType = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["name", b"name", "type", b"type", "uid", b"uid"]
+    ) -> None: ...
+
+global___MlOperator = MlOperator
+
+class ObjectRef(google.protobuf.message.Message):
+    """Represents a reference to the cached object which could be a model
+    or summary evaluated by a model
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    ID_FIELD_NUMBER: builtins.int
+    id: builtins.str
+    """The ID is used to lookup the object on the server side."""
+    def __init__(
+        self,
+        *,
+        id: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["id", b"id"]) -> None: ...
+
+global___ObjectRef = ObjectRef
+
+class Vector(google.protobuf.message.Message):
+    """See pyspark.ml.linalg.Vector"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Dense(google.protobuf.message.Message):
+        """See pyspark.ml.linalg.DenseVector"""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        VALUE_FIELD_NUMBER: builtins.int
+        @property
+        def value(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: ...
+        def __init__(
+            self,
+            *,
+            value: collections.abc.Iterable[builtins.float] | None = ...,
+        ) -> None: ...
+        def ClearField(self, field_name: typing_extensions.Literal["value", b"value"]) -> None: ...
+
+    class Sparse(google.protobuf.message.Message):
+        """See pyspark.ml.linalg.SparseVector"""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        SIZE_FIELD_NUMBER: builtins.int
+        INDEX_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        size: builtins.int
+        @property
+        def index(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: ...
+        @property
+        def value(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: ...
+        def __init__(
+            self,
+            *,
+            size: builtins.int = ...,
+            index: collections.abc.Iterable[builtins.int] | None = ...,
+            value: collections.abc.Iterable[builtins.float] | None = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "index", b"index", "size", b"size", "value", b"value"
+            ],
+        ) -> None: ...
+
+    DENSE_FIELD_NUMBER: builtins.int
+    SPARSE_FIELD_NUMBER: builtins.int
+    @property
+    def dense(self) -> global___Vector.Dense: ...
+    @property
+    def sparse(self) -> global___Vector.Sparse: ...
+    def __init__(
+        self,
+        *,
+        dense: global___Vector.Dense | None = ...,
+        sparse: global___Vector.Sparse | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "dense", b"dense", "sparse", b"sparse", "vector_type", b"vector_type"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "dense", b"dense", "sparse", b"sparse", "vector_type", b"vector_type"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["vector_type", b"vector_type"]
+    ) -> typing_extensions.Literal["dense", "sparse"] | None: ...
+
+global___Vector = Vector
+
+class Matrix(google.protobuf.message.Message):
+    """See pyspark.ml.linalg.Matrix"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Dense(google.protobuf.message.Message):
+        """See pyspark.ml.linalg.DenseMatrix"""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        NUM_ROWS_FIELD_NUMBER: builtins.int
+        NUM_COLS_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        IS_TRANSPOSED_FIELD_NUMBER: builtins.int
+        num_rows: builtins.int
+        num_cols: builtins.int
+        @property
+        def value(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: ...
+        is_transposed: builtins.bool
+        def __init__(
+            self,
+            *,
+            num_rows: builtins.int = ...,
+            num_cols: builtins.int = ...,
+            value: collections.abc.Iterable[builtins.float] | None = ...,
+            is_transposed: builtins.bool = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "is_transposed",
+                b"is_transposed",
+                "num_cols",
+                b"num_cols",
+                "num_rows",
+                b"num_rows",
+                "value",
+                b"value",
+            ],
+        ) -> None: ...
+
+    class Sparse(google.protobuf.message.Message):
+        """See pyspark.ml.linalg.SparseMatrix"""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        NUM_ROWS_FIELD_NUMBER: builtins.int
+        NUM_COLS_FIELD_NUMBER: builtins.int
+        COLPTR_FIELD_NUMBER: builtins.int
+        ROW_INDEX_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        IS_TRANSPOSED_FIELD_NUMBER: builtins.int
+        num_rows: builtins.int
+        num_cols: builtins.int
+        @property
+        def colptr(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: ...
+        @property
+        def row_index(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.int]: ...
+        @property
+        def value(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]: ...
+        is_transposed: builtins.bool
+        def __init__(
+            self,
+            *,
+            num_rows: builtins.int = ...,
+            num_cols: builtins.int = ...,
+            colptr: collections.abc.Iterable[builtins.int] | None = ...,
+            row_index: collections.abc.Iterable[builtins.int] | None = ...,
+            value: collections.abc.Iterable[builtins.float] | None = ...,
+            is_transposed: builtins.bool = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "colptr",
+                b"colptr",
+                "is_transposed",
+                b"is_transposed",
+                "num_cols",
+                b"num_cols",
+                "num_rows",
+                b"num_rows",
+                "row_index",
+                b"row_index",
+                "value",
+                b"value",
+            ],
+        ) -> None: ...
+
+    DENSE_FIELD_NUMBER: builtins.int
+    SPARSE_FIELD_NUMBER: builtins.int
+    @property
+    def dense(self) -> global___Matrix.Dense: ...
+    @property
+    def sparse(self) -> global___Matrix.Sparse: ...
+    def __init__(
+        self,
+        *,
+        dense: global___Matrix.Dense | None = ...,
+        sparse: global___Matrix.Sparse | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "dense", b"dense", "matrix_type", b"matrix_type", "sparse", b"sparse"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "dense", b"dense", "matrix_type", b"matrix_type", "sparse", b"sparse"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["matrix_type", b"matrix_type"]
+    ) -> typing_extensions.Literal["dense", "sparse"] | None: ...
+
+global___Matrix = Matrix
diff --git a/python/pyspark/sql/connect/proto/ml_pb2.py b/python/pyspark/sql/connect/proto/ml_pb2.py
new file mode 100644
index 0000000000000..5005f82d5d533
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/ml_pb2.py
@@ -0,0 +1,71 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
+# source: spark/connect/ml.proto
+# Protobuf Python Version: 5.28.3
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
+
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC, 5, 28, 3, "", "spark/connect/ml.proto"
+)
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from pyspark.sql.connect.proto import relations_pb2 as spark_dot_connect_dot_relations__pb2
+from pyspark.sql.connect.proto import ml_common_pb2 as spark_dot_connect_dot_ml__common__pb2
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x16spark/connect/ml.proto\x12\rspark.connect\x1a\x1dspark/connect/relations.proto\x1a\x1dspark/connect/ml_common.proto"\xc6\x07\n\tMlCommand\x12\x30\n\x03\x66it\x18\x01 \x01(\x0b\x32\x1c.spark.connect.MlCommand.FitH\x00R\x03\x66it\x12,\n\x05\x66\x65tch\x18\x02 \x01(\x0b\x32\x14.spark.connect.FetchH\x00R\x05\x66\x65tch\x12\x39\n\x06\x64\x65lete\x18\x03 \x01(\x0b\x32\x1f.spark.connect.MlCommand.DeleteH\x00R\x06\x64\x65lete\x12\x36\n\x05write\x18\x04 \x01(\x0b\x32\x1e.spark.connect.MlCommand.WriteH\x00R\x05write\x12\x33\n\x04read\x18\x05 \x01(\x0b\x32\x1d.spark.connect.MlCommand.ReadH\x00R\x04read\x1a\xa2\x01\n\x03\x46it\x12\x37\n\testimator\x18\x01 \x01(\x0b\x32\x19.spark.connect.MlOperatorR\testimator\x12/\n\x06params\x18\x02 \x01(\x0b\x32\x17.spark.connect.MlParamsR\x06params\x12\x31\n\x07\x64\x61taset\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x07\x64\x61taset\x1a;\n\x06\x44\x65lete\x12\x31\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefR\x06objRef\x1a\xf0\x02\n\x05Write\x12\x37\n\x08operator\x18\x01 \x01(\x0b\x32\x19.spark.connect.MlOperatorH\x00R\x08operator\x12\x33\n\x07obj_ref\x18\x02 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00R\x06objRef\x12/\n\x06params\x18\x03 \x01(\x0b\x32\x17.spark.connect.MlParamsR\x06params\x12\x12\n\x04path\x18\x04 \x01(\tR\x04path\x12)\n\x10should_overwrite\x18\x05 \x01(\x08R\x0fshouldOverwrite\x12\x45\n\x07options\x18\x06 \x03(\x0b\x32+.spark.connect.MlCommand.Write.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x06\n\x04type\x1aQ\n\x04Read\x12\x35\n\x08operator\x18\x01 \x01(\x0b\x32\x19.spark.connect.MlOperatorR\x08operator\x12\x12\n\x04path\x18\x02 \x01(\tR\x04pathB\t\n\x07\x63ommand"\xe9\x02\n\x0fMlCommandResult\x12,\n\x05param\x18\x01 \x01(\x0b\x32\x14.spark.connect.ParamH\x00R\x05param\x12\x1a\n\x07summary\x18\x02 \x01(\tH\x00R\x07summary\x12T\n\roperator_info\x18\x03 \x01(\x0b\x32-.spark.connect.MlCommandResult.MlOperatorInfoH\x00R\x0coperatorInfo\x1a\xa6\x01\n\x0eMlOperatorInfo\x12\x33\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00R\x06objRef\x12\x14\n\x04name\x18\x02 \x01(\tH\x00R\x04name\x12\x10\n\x03uid\x18\x03 \x01(\tR\x03uid\x12/\n\x06params\x18\x04 \x01(\x0b\x32\x17.spark.connect.MlParamsR\x06paramsB\x06\n\x04typeB\r\n\x0bresult_typeB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+)
+
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "pyspark.sql.connect.proto.ml_pb2", _globals)
+if not _descriptor._USE_C_DESCRIPTORS:
+    _globals["DESCRIPTOR"]._loaded_options = None
+    _globals[
+        "DESCRIPTOR"
+    ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
+    _globals["_MLCOMMAND_WRITE_OPTIONSENTRY"]._loaded_options = None
+    _globals["_MLCOMMAND_WRITE_OPTIONSENTRY"]._serialized_options = b"8\001"
+    _globals["_MLCOMMAND"]._serialized_start = 104
+    _globals["_MLCOMMAND"]._serialized_end = 1070
+    _globals["_MLCOMMAND_FIT"]._serialized_start = 382
+    _globals["_MLCOMMAND_FIT"]._serialized_end = 544
+    _globals["_MLCOMMAND_DELETE"]._serialized_start = 546
+    _globals["_MLCOMMAND_DELETE"]._serialized_end = 605
+    _globals["_MLCOMMAND_WRITE"]._serialized_start = 608
+    _globals["_MLCOMMAND_WRITE"]._serialized_end = 976
+    _globals["_MLCOMMAND_WRITE_OPTIONSENTRY"]._serialized_start = 910
+    _globals["_MLCOMMAND_WRITE_OPTIONSENTRY"]._serialized_end = 968
+    _globals["_MLCOMMAND_READ"]._serialized_start = 978
+    _globals["_MLCOMMAND_READ"]._serialized_end = 1059
+    _globals["_MLCOMMANDRESULT"]._serialized_start = 1073
+    _globals["_MLCOMMANDRESULT"]._serialized_end = 1434
+    _globals["_MLCOMMANDRESULT_MLOPERATORINFO"]._serialized_start = 1253
+    _globals["_MLCOMMANDRESULT_MLOPERATORINFO"]._serialized_end = 1419
+# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/ml_pb2.pyi b/python/pyspark/sql/connect/proto/ml_pb2.pyi
new file mode 100644
index 0000000000000..95bfefb524e2a
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/ml_pb2.pyi
@@ -0,0 +1,393 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+@generated by mypy-protobuf.  Do not edit manually!
+isort:skip_file
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import builtins
+import collections.abc
+import google.protobuf.descriptor
+import google.protobuf.internal.containers
+import google.protobuf.message
+import pyspark.sql.connect.proto.ml_common_pb2
+import pyspark.sql.connect.proto.relations_pb2
+import sys
+
+if sys.version_info >= (3, 8):
+    import typing as typing_extensions
+else:
+    import typing_extensions
+
+DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
+
+class MlCommand(google.protobuf.message.Message):
+    """Command for ML"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Fit(google.protobuf.message.Message):
+        """Command for estimator.fit(dataset)"""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        ESTIMATOR_FIELD_NUMBER: builtins.int
+        PARAMS_FIELD_NUMBER: builtins.int
+        DATASET_FIELD_NUMBER: builtins.int
+        @property
+        def estimator(self) -> pyspark.sql.connect.proto.ml_common_pb2.MlOperator:
+            """Estimator information"""
+        @property
+        def params(self) -> pyspark.sql.connect.proto.ml_common_pb2.MlParams:
+            """parameters of the Estimator"""
+        @property
+        def dataset(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
+            """the training dataset"""
+        def __init__(
+            self,
+            *,
+            estimator: pyspark.sql.connect.proto.ml_common_pb2.MlOperator | None = ...,
+            params: pyspark.sql.connect.proto.ml_common_pb2.MlParams | None = ...,
+            dataset: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "dataset", b"dataset", "estimator", b"estimator", "params", b"params"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "dataset", b"dataset", "estimator", b"estimator", "params", b"params"
+            ],
+        ) -> None: ...
+
+    class Delete(google.protobuf.message.Message):
+        """Command to delete the cached object which could be a model
+        or summary evaluated by a model
+        """
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        OBJ_REF_FIELD_NUMBER: builtins.int
+        @property
+        def obj_ref(self) -> pyspark.sql.connect.proto.ml_common_pb2.ObjectRef: ...
+        def __init__(
+            self,
+            *,
+            obj_ref: pyspark.sql.connect.proto.ml_common_pb2.ObjectRef | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["obj_ref", b"obj_ref"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["obj_ref", b"obj_ref"]
+        ) -> None: ...
+
+    class Write(google.protobuf.message.Message):
+        """Command to write ML operator"""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        class OptionsEntry(google.protobuf.message.Message):
+            DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+            KEY_FIELD_NUMBER: builtins.int
+            VALUE_FIELD_NUMBER: builtins.int
+            key: builtins.str
+            value: builtins.str
+            def __init__(
+                self,
+                *,
+                key: builtins.str = ...,
+                value: builtins.str = ...,
+            ) -> None: ...
+            def ClearField(
+                self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+            ) -> None: ...
+
+        OPERATOR_FIELD_NUMBER: builtins.int
+        OBJ_REF_FIELD_NUMBER: builtins.int
+        PARAMS_FIELD_NUMBER: builtins.int
+        PATH_FIELD_NUMBER: builtins.int
+        SHOULD_OVERWRITE_FIELD_NUMBER: builtins.int
+        OPTIONS_FIELD_NUMBER: builtins.int
+        @property
+        def operator(self) -> pyspark.sql.connect.proto.ml_common_pb2.MlOperator:
+            """Estimator or evaluator"""
+        @property
+        def obj_ref(self) -> pyspark.sql.connect.proto.ml_common_pb2.ObjectRef:
+            """The cached model"""
+        @property
+        def params(self) -> pyspark.sql.connect.proto.ml_common_pb2.MlParams:
+            """The parameters of operator which could be estimator/evaluator or a cached model"""
+        path: builtins.str
+        """Save the ML instance to the path"""
+        should_overwrite: builtins.bool
+        """Overwrites if the output path already exists."""
+        @property
+        def options(
+            self,
+        ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
+            """The options of the writer"""
+        def __init__(
+            self,
+            *,
+            operator: pyspark.sql.connect.proto.ml_common_pb2.MlOperator | None = ...,
+            obj_ref: pyspark.sql.connect.proto.ml_common_pb2.ObjectRef | None = ...,
+            params: pyspark.sql.connect.proto.ml_common_pb2.MlParams | None = ...,
+            path: builtins.str = ...,
+            should_overwrite: builtins.bool = ...,
+            options: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "obj_ref", b"obj_ref", "operator", b"operator", "params", b"params", "type", b"type"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "obj_ref",
+                b"obj_ref",
+                "operator",
+                b"operator",
+                "options",
+                b"options",
+                "params",
+                b"params",
+                "path",
+                b"path",
+                "should_overwrite",
+                b"should_overwrite",
+                "type",
+                b"type",
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["type", b"type"]
+        ) -> typing_extensions.Literal["operator", "obj_ref"] | None: ...
+
+    class Read(google.protobuf.message.Message):
+        """Command to load ML operator."""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        OPERATOR_FIELD_NUMBER: builtins.int
+        PATH_FIELD_NUMBER: builtins.int
+        @property
+        def operator(self) -> pyspark.sql.connect.proto.ml_common_pb2.MlOperator:
+            """ML operator information"""
+        path: builtins.str
+        """Load the ML instance from the input path"""
+        def __init__(
+            self,
+            *,
+            operator: pyspark.sql.connect.proto.ml_common_pb2.MlOperator | None = ...,
+            path: builtins.str = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["operator", b"operator"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["operator", b"operator", "path", b"path"]
+        ) -> None: ...
+
+    FIT_FIELD_NUMBER: builtins.int
+    FETCH_FIELD_NUMBER: builtins.int
+    DELETE_FIELD_NUMBER: builtins.int
+    WRITE_FIELD_NUMBER: builtins.int
+    READ_FIELD_NUMBER: builtins.int
+    @property
+    def fit(self) -> global___MlCommand.Fit: ...
+    @property
+    def fetch(self) -> pyspark.sql.connect.proto.relations_pb2.Fetch: ...
+    @property
+    def delete(self) -> global___MlCommand.Delete: ...
+    @property
+    def write(self) -> global___MlCommand.Write: ...
+    @property
+    def read(self) -> global___MlCommand.Read: ...
+    def __init__(
+        self,
+        *,
+        fit: global___MlCommand.Fit | None = ...,
+        fetch: pyspark.sql.connect.proto.relations_pb2.Fetch | None = ...,
+        delete: global___MlCommand.Delete | None = ...,
+        write: global___MlCommand.Write | None = ...,
+        read: global___MlCommand.Read | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "command",
+            b"command",
+            "delete",
+            b"delete",
+            "fetch",
+            b"fetch",
+            "fit",
+            b"fit",
+            "read",
+            b"read",
+            "write",
+            b"write",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "command",
+            b"command",
+            "delete",
+            b"delete",
+            "fetch",
+            b"fetch",
+            "fit",
+            b"fit",
+            "read",
+            b"read",
+            "write",
+            b"write",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["command", b"command"]
+    ) -> typing_extensions.Literal["fit", "fetch", "delete", "write", "read"] | None: ...
+
+global___MlCommand = MlCommand
+
+class MlCommandResult(google.protobuf.message.Message):
+    """The result of MlCommand"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class MlOperatorInfo(google.protobuf.message.Message):
+        """Represents an operator info"""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        OBJ_REF_FIELD_NUMBER: builtins.int
+        NAME_FIELD_NUMBER: builtins.int
+        UID_FIELD_NUMBER: builtins.int
+        PARAMS_FIELD_NUMBER: builtins.int
+        @property
+        def obj_ref(self) -> pyspark.sql.connect.proto.ml_common_pb2.ObjectRef:
+            """The cached object which could be a model or summary evaluated by a model"""
+        name: builtins.str
+        """Operator name"""
+        uid: builtins.str
+        @property
+        def params(self) -> pyspark.sql.connect.proto.ml_common_pb2.MlParams: ...
+        def __init__(
+            self,
+            *,
+            obj_ref: pyspark.sql.connect.proto.ml_common_pb2.ObjectRef | None = ...,
+            name: builtins.str = ...,
+            uid: builtins.str = ...,
+            params: pyspark.sql.connect.proto.ml_common_pb2.MlParams | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "name", b"name", "obj_ref", b"obj_ref", "params", b"params", "type", b"type"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "name",
+                b"name",
+                "obj_ref",
+                b"obj_ref",
+                "params",
+                b"params",
+                "type",
+                b"type",
+                "uid",
+                b"uid",
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["type", b"type"]
+        ) -> typing_extensions.Literal["obj_ref", "name"] | None: ...
+
+    PARAM_FIELD_NUMBER: builtins.int
+    SUMMARY_FIELD_NUMBER: builtins.int
+    OPERATOR_INFO_FIELD_NUMBER: builtins.int
+    @property
+    def param(self) -> pyspark.sql.connect.proto.ml_common_pb2.Param:
+        """The result of the attribute"""
+    summary: builtins.str
+    """Evaluate a Dataset in a model and return the cached ID of summary"""
+    @property
+    def operator_info(self) -> global___MlCommandResult.MlOperatorInfo:
+        """Operator information"""
+    def __init__(
+        self,
+        *,
+        param: pyspark.sql.connect.proto.ml_common_pb2.Param | None = ...,
+        summary: builtins.str = ...,
+        operator_info: global___MlCommandResult.MlOperatorInfo | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "operator_info",
+            b"operator_info",
+            "param",
+            b"param",
+            "result_type",
+            b"result_type",
+            "summary",
+            b"summary",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "operator_info",
+            b"operator_info",
+            "param",
+            b"param",
+            "result_type",
+            b"result_type",
+            "summary",
+            b"summary",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["result_type", b"result_type"]
+    ) -> typing_extensions.Literal["param", "summary", "operator_info"] | None: ...
+
+global___MlCommandResult = MlCommandResult
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py
index 479abcfb597a1..4327d0240b355 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -39,10 +39,11 @@
 from pyspark.sql.connect.proto import types_pb2 as spark_dot_connect_dot_types__pb2
 from pyspark.sql.connect.proto import catalog_pb2 as spark_dot_connect_dot_catalog__pb2
 from pyspark.sql.connect.proto import common_pb2 as spark_dot_connect_dot_common__pb2
+from pyspark.sql.connect.proto import ml_common_pb2 as spark_dot_connect_dot_ml__common__pb2
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto\x1a\x1aspark/connect/common.proto"\x9c\x1c\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x37\n\nas_of_join\x18\' \x01(\x0b\x32\x17.spark.connect.AsOfJoinH\x00R\x08\x61sOfJoin\x12\x85\x01\n&common_inline_user_defined_data_source\x18( \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R!commonInlineUserDefinedDataSource\x12\x45\n\x0ewith_relations\x18) \x01(\x0b\x32\x1c.spark.connect.WithRelationsH\x00R\rwithRelations\x12\x38\n\ttranspose\x18* \x01(\x0b\x32\x18.spark.connect.TransposeH\x00R\ttranspose\x12w\n unresolved_table_valued_function\x18+ \x01(\x0b\x32,.spark.connect.UnresolvedTableValuedFunctionH\x00R\x1dunresolvedTableValuedFunction\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"\x8e\x01\n\x0eRelationCommon\x12#\n\x0bsource_info\x18\x01 \x01(\tB\x02\x18\x01R\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12-\n\x06origin\x18\x03 \x01(\x0b\x32\x15.spark.connect.OriginR\x06originB\n\n\x08_plan_id"\xde\x03\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12O\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32&.spark.connect.SQL.NamedArgumentsEntryR\x0enamedArguments\x12>\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cposArguments\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"u\n\rWithRelations\x12+\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04root\x12\x37\n\nreferences\x18\x02 \x03(\x0b\x32\x17.spark.connect.RelationR\nreferences"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xfe\x05\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x12J\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.spark.connect.Aggregate.GroupingSetsR\x0cgroupingSets\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1aL\n\x0cGroupingSets\x12<\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0bgroupingSet"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xfe\x02\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12i\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryB\x02\x18\x01R\x10renameColumnsMap\x12\x42\n\x07renames\x18\x03 \x03(\x0b\x32(.spark.connect.WithColumnsRenamed.RenameR\x07renames\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x45\n\x06Rename\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12 \n\x0cnew_col_name\x18\x02 \x01(\tR\nnewColName"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"z\n\tTranspose\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12>\n\rindex_columns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cindexColumns"}\n\x1dUnresolvedTableValuedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xe8\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x12"\n\nprofile_id\x18\x04 \x01(\x05H\x01R\tprofileId\x88\x01\x01\x42\r\n\x0b_is_barrierB\r\n\x0b_profile_id"\xfb\x04\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_conf"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x97\x01\n!CommonInlineUserDefinedDataSource\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12O\n\x12python_data_source\x18\x02 \x01(\x0b\x32\x1f.spark.connect.PythonDataSourceH\x00R\x10pythonDataSourceB\r\n\x0b\x64\x61ta_source"K\n\x10PythonDataSource\x12\x18\n\x07\x63ommand\x18\x01 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x02 \x01(\tR\tpythonVer"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schema"\xdb\x03\n\x08\x41sOfJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12\x37\n\nleft_as_of\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08leftAsOf\x12\x39\n\x0bright_as_of\x18\x04 \x01(\x0b\x32\x19.spark.connect.ExpressionR\trightAsOf\x12\x36\n\tjoin_expr\x18\x05 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08joinExpr\x12#\n\rusing_columns\x18\x06 \x03(\tR\x0cusingColumns\x12\x1b\n\tjoin_type\x18\x07 \x01(\tR\x08joinType\x12\x37\n\ttolerance\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\ttolerance\x12.\n\x13\x61llow_exact_matches\x18\t \x01(\x08R\x11\x61llowExactMatches\x12\x1c\n\tdirection\x18\n \x01(\tR\tdirectionB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto\x1a\x1aspark/connect/common.proto\x1a\x1dspark/connect/ml_common.proto"\x9c\x1d\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x37\n\nas_of_join\x18\' \x01(\x0b\x32\x17.spark.connect.AsOfJoinH\x00R\x08\x61sOfJoin\x12\x85\x01\n&common_inline_user_defined_data_source\x18( \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R!commonInlineUserDefinedDataSource\x12\x45\n\x0ewith_relations\x18) \x01(\x0b\x32\x1c.spark.connect.WithRelationsH\x00R\rwithRelations\x12\x38\n\ttranspose\x18* \x01(\x0b\x32\x18.spark.connect.TransposeH\x00R\ttranspose\x12w\n unresolved_table_valued_function\x18+ \x01(\x0b\x32,.spark.connect.UnresolvedTableValuedFunctionH\x00R\x1dunresolvedTableValuedFunction\x12?\n\x0clateral_join\x18, \x01(\x0b\x32\x1a.spark.connect.LateralJoinH\x00R\x0blateralJoin\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12=\n\x0bml_relation\x18\xac\x02 \x01(\x0b\x32\x19.spark.connect.MlRelationH\x00R\nmlRelation\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\xf8\x02\n\nMlRelation\x12\x43\n\ttransform\x18\x01 \x01(\x0b\x32#.spark.connect.MlRelation.TransformH\x00R\ttransform\x12,\n\x05\x66\x65tch\x18\x02 \x01(\x0b\x32\x14.spark.connect.FetchH\x00R\x05\x66\x65tch\x1a\xeb\x01\n\tTransform\x12\x33\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefH\x00R\x06objRef\x12=\n\x0btransformer\x18\x02 \x01(\x0b\x32\x19.spark.connect.MlOperatorH\x00R\x0btransformer\x12-\n\x05input\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06params\x18\x04 \x01(\x0b\x32\x17.spark.connect.MlParamsR\x06paramsB\n\n\x08operatorB\t\n\x07ml_type"\xbe\x02\n\x05\x46\x65tch\x12\x31\n\x07obj_ref\x18\x01 \x01(\x0b\x32\x18.spark.connect.ObjectRefR\x06objRef\x12\x35\n\x07methods\x18\x02 \x03(\x0b\x32\x1b.spark.connect.Fetch.MethodR\x07methods\x1a\xca\x01\n\x06Method\x12\x16\n\x06method\x18\x01 \x01(\tR\x06method\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32 .spark.connect.Fetch.Method.ArgsR\x04\x61rgs\x1ar\n\x04\x41rgs\x12,\n\x05param\x18\x01 \x01(\x0b\x32\x14.spark.connect.ParamH\x00R\x05param\x12/\n\x05input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x05inputB\x0b\n\targs_type"\t\n\x07Unknown"\x8e\x01\n\x0eRelationCommon\x12#\n\x0bsource_info\x18\x01 \x01(\tB\x02\x18\x01R\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12-\n\x06origin\x18\x03 \x01(\x0b\x32\x15.spark.connect.OriginR\x06originB\n\n\x08_plan_id"\xde\x03\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12O\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32&.spark.connect.SQL.NamedArgumentsEntryR\x0enamedArguments\x12>\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cposArguments\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"u\n\rWithRelations\x12+\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04root\x12\x37\n\nreferences\x18\x02 \x03(\x0b\x32\x17.spark.connect.RelationR\nreferences"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xfe\x05\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x12J\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.spark.connect.Aggregate.GroupingSetsR\x0cgroupingSets\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1aL\n\x0cGroupingSets\x12<\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0bgroupingSet"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xfe\x02\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12i\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryB\x02\x18\x01R\x10renameColumnsMap\x12\x42\n\x07renames\x18\x03 \x03(\x0b\x32(.spark.connect.WithColumnsRenamed.RenameR\x07renames\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x45\n\x06Rename\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12 \n\x0cnew_col_name\x18\x02 \x01(\tR\nnewColName"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"z\n\tTranspose\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12>\n\rindex_columns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cindexColumns"}\n\x1dUnresolvedTableValuedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xe8\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x12"\n\nprofile_id\x18\x04 \x01(\x05H\x01R\tprofileId\x88\x01\x01\x42\r\n\x0b_is_barrierB\r\n\x0b_profile_id"\xcd\x05\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x12?\n\x0cstate_schema\x18\n \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03R\x0bstateSchema\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_confB\x0f\n\r_state_schema"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x97\x01\n!CommonInlineUserDefinedDataSource\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12O\n\x12python_data_source\x18\x02 \x01(\x0b\x32\x1f.spark.connect.PythonDataSourceH\x00R\x10pythonDataSourceB\r\n\x0b\x64\x61ta_source"K\n\x10PythonDataSource\x12\x18\n\x07\x63ommand\x18\x01 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x02 \x01(\tR\tpythonVer"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schema"\xdb\x03\n\x08\x41sOfJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12\x37\n\nleft_as_of\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08leftAsOf\x12\x39\n\x0bright_as_of\x18\x04 \x01(\x0b\x32\x19.spark.connect.ExpressionR\trightAsOf\x12\x36\n\tjoin_expr\x18\x05 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08joinExpr\x12#\n\rusing_columns\x18\x06 \x03(\tR\x0cusingColumns\x12\x1b\n\tjoin_type\x18\x07 \x01(\tR\x08joinType\x12\x37\n\ttolerance\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\ttolerance\x12.\n\x13\x61llow_exact_matches\x18\t \x01(\x08R\x11\x61llowExactMatches\x12\x1c\n\tdirection\x18\n \x01(\tR\tdirection"\xe6\x01\n\x0bLateralJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinTypeB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _globals = globals()
@@ -77,158 +78,170 @@
     ]._serialized_options = b"\030\001"
     _globals["_PARSE_OPTIONSENTRY"]._loaded_options = None
     _globals["_PARSE_OPTIONSENTRY"]._serialized_options = b"8\001"
-    _globals["_RELATION"]._serialized_start = 193
-    _globals["_RELATION"]._serialized_end = 3805
-    _globals["_UNKNOWN"]._serialized_start = 3807
-    _globals["_UNKNOWN"]._serialized_end = 3816
-    _globals["_RELATIONCOMMON"]._serialized_start = 3819
-    _globals["_RELATIONCOMMON"]._serialized_end = 3961
-    _globals["_SQL"]._serialized_start = 3964
-    _globals["_SQL"]._serialized_end = 4442
-    _globals["_SQL_ARGSENTRY"]._serialized_start = 4258
-    _globals["_SQL_ARGSENTRY"]._serialized_end = 4348
-    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_start = 4350
-    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_end = 4442
-    _globals["_WITHRELATIONS"]._serialized_start = 4444
-    _globals["_WITHRELATIONS"]._serialized_end = 4561
-    _globals["_READ"]._serialized_start = 4564
-    _globals["_READ"]._serialized_end = 5227
-    _globals["_READ_NAMEDTABLE"]._serialized_start = 4742
-    _globals["_READ_NAMEDTABLE"]._serialized_end = 4934
-    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_start = 4876
-    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_end = 4934
-    _globals["_READ_DATASOURCE"]._serialized_start = 4937
-    _globals["_READ_DATASOURCE"]._serialized_end = 5214
-    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_start = 4876
-    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_end = 4934
-    _globals["_PROJECT"]._serialized_start = 5229
-    _globals["_PROJECT"]._serialized_end = 5346
-    _globals["_FILTER"]._serialized_start = 5348
-    _globals["_FILTER"]._serialized_end = 5460
-    _globals["_JOIN"]._serialized_start = 5463
-    _globals["_JOIN"]._serialized_end = 6124
-    _globals["_JOIN_JOINDATATYPE"]._serialized_start = 5802
-    _globals["_JOIN_JOINDATATYPE"]._serialized_end = 5894
-    _globals["_JOIN_JOINTYPE"]._serialized_start = 5897
-    _globals["_JOIN_JOINTYPE"]._serialized_end = 6105
-    _globals["_SETOPERATION"]._serialized_start = 6127
-    _globals["_SETOPERATION"]._serialized_end = 6606
-    _globals["_SETOPERATION_SETOPTYPE"]._serialized_start = 6443
-    _globals["_SETOPERATION_SETOPTYPE"]._serialized_end = 6557
-    _globals["_LIMIT"]._serialized_start = 6608
-    _globals["_LIMIT"]._serialized_end = 6684
-    _globals["_OFFSET"]._serialized_start = 6686
-    _globals["_OFFSET"]._serialized_end = 6765
-    _globals["_TAIL"]._serialized_start = 6767
-    _globals["_TAIL"]._serialized_end = 6842
-    _globals["_AGGREGATE"]._serialized_start = 6845
-    _globals["_AGGREGATE"]._serialized_end = 7611
-    _globals["_AGGREGATE_PIVOT"]._serialized_start = 7260
-    _globals["_AGGREGATE_PIVOT"]._serialized_end = 7371
-    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_start = 7373
-    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_end = 7449
-    _globals["_AGGREGATE_GROUPTYPE"]._serialized_start = 7452
-    _globals["_AGGREGATE_GROUPTYPE"]._serialized_end = 7611
-    _globals["_SORT"]._serialized_start = 7614
-    _globals["_SORT"]._serialized_end = 7774
-    _globals["_DROP"]._serialized_start = 7777
-    _globals["_DROP"]._serialized_end = 7918
-    _globals["_DEDUPLICATE"]._serialized_start = 7921
-    _globals["_DEDUPLICATE"]._serialized_end = 8161
-    _globals["_LOCALRELATION"]._serialized_start = 8163
-    _globals["_LOCALRELATION"]._serialized_end = 8252
-    _globals["_CACHEDLOCALRELATION"]._serialized_start = 8254
-    _globals["_CACHEDLOCALRELATION"]._serialized_end = 8326
-    _globals["_CACHEDREMOTERELATION"]._serialized_start = 8328
-    _globals["_CACHEDREMOTERELATION"]._serialized_end = 8383
-    _globals["_SAMPLE"]._serialized_start = 8386
-    _globals["_SAMPLE"]._serialized_end = 8659
-    _globals["_RANGE"]._serialized_start = 8662
-    _globals["_RANGE"]._serialized_end = 8807
-    _globals["_SUBQUERYALIAS"]._serialized_start = 8809
-    _globals["_SUBQUERYALIAS"]._serialized_end = 8923
-    _globals["_REPARTITION"]._serialized_start = 8926
-    _globals["_REPARTITION"]._serialized_end = 9068
-    _globals["_SHOWSTRING"]._serialized_start = 9071
-    _globals["_SHOWSTRING"]._serialized_end = 9213
-    _globals["_HTMLSTRING"]._serialized_start = 9215
-    _globals["_HTMLSTRING"]._serialized_end = 9329
-    _globals["_STATSUMMARY"]._serialized_start = 9331
-    _globals["_STATSUMMARY"]._serialized_end = 9423
-    _globals["_STATDESCRIBE"]._serialized_start = 9425
-    _globals["_STATDESCRIBE"]._serialized_end = 9506
-    _globals["_STATCROSSTAB"]._serialized_start = 9508
-    _globals["_STATCROSSTAB"]._serialized_end = 9609
-    _globals["_STATCOV"]._serialized_start = 9611
-    _globals["_STATCOV"]._serialized_end = 9707
-    _globals["_STATCORR"]._serialized_start = 9710
-    _globals["_STATCORR"]._serialized_end = 9847
-    _globals["_STATAPPROXQUANTILE"]._serialized_start = 9850
-    _globals["_STATAPPROXQUANTILE"]._serialized_end = 10014
-    _globals["_STATFREQITEMS"]._serialized_start = 10016
-    _globals["_STATFREQITEMS"]._serialized_end = 10141
-    _globals["_STATSAMPLEBY"]._serialized_start = 10144
-    _globals["_STATSAMPLEBY"]._serialized_end = 10453
-    _globals["_STATSAMPLEBY_FRACTION"]._serialized_start = 10345
-    _globals["_STATSAMPLEBY_FRACTION"]._serialized_end = 10444
-    _globals["_NAFILL"]._serialized_start = 10456
-    _globals["_NAFILL"]._serialized_end = 10590
-    _globals["_NADROP"]._serialized_start = 10593
-    _globals["_NADROP"]._serialized_end = 10727
-    _globals["_NAREPLACE"]._serialized_start = 10730
-    _globals["_NAREPLACE"]._serialized_end = 11026
-    _globals["_NAREPLACE_REPLACEMENT"]._serialized_start = 10885
-    _globals["_NAREPLACE_REPLACEMENT"]._serialized_end = 11026
-    _globals["_TODF"]._serialized_start = 11028
-    _globals["_TODF"]._serialized_end = 11116
-    _globals["_WITHCOLUMNSRENAMED"]._serialized_start = 11119
-    _globals["_WITHCOLUMNSRENAMED"]._serialized_end = 11501
-    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_start = 11363
-    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_end = 11430
-    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_start = 11432
-    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_end = 11501
-    _globals["_WITHCOLUMNS"]._serialized_start = 11503
-    _globals["_WITHCOLUMNS"]._serialized_end = 11622
-    _globals["_WITHWATERMARK"]._serialized_start = 11625
-    _globals["_WITHWATERMARK"]._serialized_end = 11759
-    _globals["_HINT"]._serialized_start = 11762
-    _globals["_HINT"]._serialized_end = 11894
-    _globals["_UNPIVOT"]._serialized_start = 11897
-    _globals["_UNPIVOT"]._serialized_end = 12224
-    _globals["_UNPIVOT_VALUES"]._serialized_start = 12154
-    _globals["_UNPIVOT_VALUES"]._serialized_end = 12213
-    _globals["_TRANSPOSE"]._serialized_start = 12226
-    _globals["_TRANSPOSE"]._serialized_end = 12348
-    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_start = 12350
-    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_end = 12475
-    _globals["_TOSCHEMA"]._serialized_start = 12477
-    _globals["_TOSCHEMA"]._serialized_end = 12583
-    _globals["_REPARTITIONBYEXPRESSION"]._serialized_start = 12586
-    _globals["_REPARTITIONBYEXPRESSION"]._serialized_end = 12789
-    _globals["_MAPPARTITIONS"]._serialized_start = 12792
-    _globals["_MAPPARTITIONS"]._serialized_end = 13024
-    _globals["_GROUPMAP"]._serialized_start = 13027
-    _globals["_GROUPMAP"]._serialized_end = 13662
-    _globals["_COGROUPMAP"]._serialized_start = 13665
-    _globals["_COGROUPMAP"]._serialized_end = 14191
-    _globals["_APPLYINPANDASWITHSTATE"]._serialized_start = 14194
-    _globals["_APPLYINPANDASWITHSTATE"]._serialized_end = 14551
-    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_start = 14554
-    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_end = 14798
-    _globals["_PYTHONUDTF"]._serialized_start = 14801
-    _globals["_PYTHONUDTF"]._serialized_end = 14978
-    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_start = 14981
-    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_end = 15132
-    _globals["_PYTHONDATASOURCE"]._serialized_start = 15134
-    _globals["_PYTHONDATASOURCE"]._serialized_end = 15209
-    _globals["_COLLECTMETRICS"]._serialized_start = 15212
-    _globals["_COLLECTMETRICS"]._serialized_end = 15348
-    _globals["_PARSE"]._serialized_start = 15351
-    _globals["_PARSE"]._serialized_end = 15739
-    _globals["_PARSE_OPTIONSENTRY"]._serialized_start = 4876
-    _globals["_PARSE_OPTIONSENTRY"]._serialized_end = 4934
-    _globals["_PARSE_PARSEFORMAT"]._serialized_start = 15640
-    _globals["_PARSE_PARSEFORMAT"]._serialized_end = 15728
-    _globals["_ASOFJOIN"]._serialized_start = 15742
-    _globals["_ASOFJOIN"]._serialized_end = 16217
+    _globals["_RELATION"]._serialized_start = 224
+    _globals["_RELATION"]._serialized_end = 3964
+    _globals["_MLRELATION"]._serialized_start = 3967
+    _globals["_MLRELATION"]._serialized_end = 4343
+    _globals["_MLRELATION_TRANSFORM"]._serialized_start = 4097
+    _globals["_MLRELATION_TRANSFORM"]._serialized_end = 4332
+    _globals["_FETCH"]._serialized_start = 4346
+    _globals["_FETCH"]._serialized_end = 4664
+    _globals["_FETCH_METHOD"]._serialized_start = 4462
+    _globals["_FETCH_METHOD"]._serialized_end = 4664
+    _globals["_FETCH_METHOD_ARGS"]._serialized_start = 4550
+    _globals["_FETCH_METHOD_ARGS"]._serialized_end = 4664
+    _globals["_UNKNOWN"]._serialized_start = 4666
+    _globals["_UNKNOWN"]._serialized_end = 4675
+    _globals["_RELATIONCOMMON"]._serialized_start = 4678
+    _globals["_RELATIONCOMMON"]._serialized_end = 4820
+    _globals["_SQL"]._serialized_start = 4823
+    _globals["_SQL"]._serialized_end = 5301
+    _globals["_SQL_ARGSENTRY"]._serialized_start = 5117
+    _globals["_SQL_ARGSENTRY"]._serialized_end = 5207
+    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_start = 5209
+    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_end = 5301
+    _globals["_WITHRELATIONS"]._serialized_start = 5303
+    _globals["_WITHRELATIONS"]._serialized_end = 5420
+    _globals["_READ"]._serialized_start = 5423
+    _globals["_READ"]._serialized_end = 6086
+    _globals["_READ_NAMEDTABLE"]._serialized_start = 5601
+    _globals["_READ_NAMEDTABLE"]._serialized_end = 5793
+    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_start = 5735
+    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_end = 5793
+    _globals["_READ_DATASOURCE"]._serialized_start = 5796
+    _globals["_READ_DATASOURCE"]._serialized_end = 6073
+    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_start = 5735
+    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_end = 5793
+    _globals["_PROJECT"]._serialized_start = 6088
+    _globals["_PROJECT"]._serialized_end = 6205
+    _globals["_FILTER"]._serialized_start = 6207
+    _globals["_FILTER"]._serialized_end = 6319
+    _globals["_JOIN"]._serialized_start = 6322
+    _globals["_JOIN"]._serialized_end = 6983
+    _globals["_JOIN_JOINDATATYPE"]._serialized_start = 6661
+    _globals["_JOIN_JOINDATATYPE"]._serialized_end = 6753
+    _globals["_JOIN_JOINTYPE"]._serialized_start = 6756
+    _globals["_JOIN_JOINTYPE"]._serialized_end = 6964
+    _globals["_SETOPERATION"]._serialized_start = 6986
+    _globals["_SETOPERATION"]._serialized_end = 7465
+    _globals["_SETOPERATION_SETOPTYPE"]._serialized_start = 7302
+    _globals["_SETOPERATION_SETOPTYPE"]._serialized_end = 7416
+    _globals["_LIMIT"]._serialized_start = 7467
+    _globals["_LIMIT"]._serialized_end = 7543
+    _globals["_OFFSET"]._serialized_start = 7545
+    _globals["_OFFSET"]._serialized_end = 7624
+    _globals["_TAIL"]._serialized_start = 7626
+    _globals["_TAIL"]._serialized_end = 7701
+    _globals["_AGGREGATE"]._serialized_start = 7704
+    _globals["_AGGREGATE"]._serialized_end = 8470
+    _globals["_AGGREGATE_PIVOT"]._serialized_start = 8119
+    _globals["_AGGREGATE_PIVOT"]._serialized_end = 8230
+    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_start = 8232
+    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_end = 8308
+    _globals["_AGGREGATE_GROUPTYPE"]._serialized_start = 8311
+    _globals["_AGGREGATE_GROUPTYPE"]._serialized_end = 8470
+    _globals["_SORT"]._serialized_start = 8473
+    _globals["_SORT"]._serialized_end = 8633
+    _globals["_DROP"]._serialized_start = 8636
+    _globals["_DROP"]._serialized_end = 8777
+    _globals["_DEDUPLICATE"]._serialized_start = 8780
+    _globals["_DEDUPLICATE"]._serialized_end = 9020
+    _globals["_LOCALRELATION"]._serialized_start = 9022
+    _globals["_LOCALRELATION"]._serialized_end = 9111
+    _globals["_CACHEDLOCALRELATION"]._serialized_start = 9113
+    _globals["_CACHEDLOCALRELATION"]._serialized_end = 9185
+    _globals["_CACHEDREMOTERELATION"]._serialized_start = 9187
+    _globals["_CACHEDREMOTERELATION"]._serialized_end = 9242
+    _globals["_SAMPLE"]._serialized_start = 9245
+    _globals["_SAMPLE"]._serialized_end = 9518
+    _globals["_RANGE"]._serialized_start = 9521
+    _globals["_RANGE"]._serialized_end = 9666
+    _globals["_SUBQUERYALIAS"]._serialized_start = 9668
+    _globals["_SUBQUERYALIAS"]._serialized_end = 9782
+    _globals["_REPARTITION"]._serialized_start = 9785
+    _globals["_REPARTITION"]._serialized_end = 9927
+    _globals["_SHOWSTRING"]._serialized_start = 9930
+    _globals["_SHOWSTRING"]._serialized_end = 10072
+    _globals["_HTMLSTRING"]._serialized_start = 10074
+    _globals["_HTMLSTRING"]._serialized_end = 10188
+    _globals["_STATSUMMARY"]._serialized_start = 10190
+    _globals["_STATSUMMARY"]._serialized_end = 10282
+    _globals["_STATDESCRIBE"]._serialized_start = 10284
+    _globals["_STATDESCRIBE"]._serialized_end = 10365
+    _globals["_STATCROSSTAB"]._serialized_start = 10367
+    _globals["_STATCROSSTAB"]._serialized_end = 10468
+    _globals["_STATCOV"]._serialized_start = 10470
+    _globals["_STATCOV"]._serialized_end = 10566
+    _globals["_STATCORR"]._serialized_start = 10569
+    _globals["_STATCORR"]._serialized_end = 10706
+    _globals["_STATAPPROXQUANTILE"]._serialized_start = 10709
+    _globals["_STATAPPROXQUANTILE"]._serialized_end = 10873
+    _globals["_STATFREQITEMS"]._serialized_start = 10875
+    _globals["_STATFREQITEMS"]._serialized_end = 11000
+    _globals["_STATSAMPLEBY"]._serialized_start = 11003
+    _globals["_STATSAMPLEBY"]._serialized_end = 11312
+    _globals["_STATSAMPLEBY_FRACTION"]._serialized_start = 11204
+    _globals["_STATSAMPLEBY_FRACTION"]._serialized_end = 11303
+    _globals["_NAFILL"]._serialized_start = 11315
+    _globals["_NAFILL"]._serialized_end = 11449
+    _globals["_NADROP"]._serialized_start = 11452
+    _globals["_NADROP"]._serialized_end = 11586
+    _globals["_NAREPLACE"]._serialized_start = 11589
+    _globals["_NAREPLACE"]._serialized_end = 11885
+    _globals["_NAREPLACE_REPLACEMENT"]._serialized_start = 11744
+    _globals["_NAREPLACE_REPLACEMENT"]._serialized_end = 11885
+    _globals["_TODF"]._serialized_start = 11887
+    _globals["_TODF"]._serialized_end = 11975
+    _globals["_WITHCOLUMNSRENAMED"]._serialized_start = 11978
+    _globals["_WITHCOLUMNSRENAMED"]._serialized_end = 12360
+    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_start = 12222
+    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_end = 12289
+    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_start = 12291
+    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_end = 12360
+    _globals["_WITHCOLUMNS"]._serialized_start = 12362
+    _globals["_WITHCOLUMNS"]._serialized_end = 12481
+    _globals["_WITHWATERMARK"]._serialized_start = 12484
+    _globals["_WITHWATERMARK"]._serialized_end = 12618
+    _globals["_HINT"]._serialized_start = 12621
+    _globals["_HINT"]._serialized_end = 12753
+    _globals["_UNPIVOT"]._serialized_start = 12756
+    _globals["_UNPIVOT"]._serialized_end = 13083
+    _globals["_UNPIVOT_VALUES"]._serialized_start = 13013
+    _globals["_UNPIVOT_VALUES"]._serialized_end = 13072
+    _globals["_TRANSPOSE"]._serialized_start = 13085
+    _globals["_TRANSPOSE"]._serialized_end = 13207
+    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_start = 13209
+    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_end = 13334
+    _globals["_TOSCHEMA"]._serialized_start = 13336
+    _globals["_TOSCHEMA"]._serialized_end = 13442
+    _globals["_REPARTITIONBYEXPRESSION"]._serialized_start = 13445
+    _globals["_REPARTITIONBYEXPRESSION"]._serialized_end = 13648
+    _globals["_MAPPARTITIONS"]._serialized_start = 13651
+    _globals["_MAPPARTITIONS"]._serialized_end = 13883
+    _globals["_GROUPMAP"]._serialized_start = 13886
+    _globals["_GROUPMAP"]._serialized_end = 14603
+    _globals["_COGROUPMAP"]._serialized_start = 14606
+    _globals["_COGROUPMAP"]._serialized_end = 15132
+    _globals["_APPLYINPANDASWITHSTATE"]._serialized_start = 15135
+    _globals["_APPLYINPANDASWITHSTATE"]._serialized_end = 15492
+    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_start = 15495
+    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_end = 15739
+    _globals["_PYTHONUDTF"]._serialized_start = 15742
+    _globals["_PYTHONUDTF"]._serialized_end = 15919
+    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_start = 15922
+    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_end = 16073
+    _globals["_PYTHONDATASOURCE"]._serialized_start = 16075
+    _globals["_PYTHONDATASOURCE"]._serialized_end = 16150
+    _globals["_COLLECTMETRICS"]._serialized_start = 16153
+    _globals["_COLLECTMETRICS"]._serialized_end = 16289
+    _globals["_PARSE"]._serialized_start = 16292
+    _globals["_PARSE"]._serialized_end = 16680
+    _globals["_PARSE_OPTIONSENTRY"]._serialized_start = 5735
+    _globals["_PARSE_OPTIONSENTRY"]._serialized_end = 5793
+    _globals["_PARSE_PARSEFORMAT"]._serialized_start = 16581
+    _globals["_PARSE_PARSEFORMAT"]._serialized_end = 16669
+    _globals["_ASOFJOIN"]._serialized_start = 16683
+    _globals["_ASOFJOIN"]._serialized_end = 17158
+    _globals["_LATERALJOIN"]._serialized_start = 17161
+    _globals["_LATERALJOIN"]._serialized_end = 17391
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index 03753056c6bf1..0c8cf8dd3eda8 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -43,6 +43,7 @@ import google.protobuf.message
 import pyspark.sql.connect.proto.catalog_pb2
 import pyspark.sql.connect.proto.common_pb2
 import pyspark.sql.connect.proto.expressions_pb2
+import pyspark.sql.connect.proto.ml_common_pb2
 import pyspark.sql.connect.proto.types_pb2
 import sys
 import typing
@@ -106,6 +107,7 @@ class Relation(google.protobuf.message.Message):
     WITH_RELATIONS_FIELD_NUMBER: builtins.int
     TRANSPOSE_FIELD_NUMBER: builtins.int
     UNRESOLVED_TABLE_VALUED_FUNCTION_FIELD_NUMBER: builtins.int
+    LATERAL_JOIN_FIELD_NUMBER: builtins.int
     FILL_NA_FIELD_NUMBER: builtins.int
     DROP_NA_FIELD_NUMBER: builtins.int
     REPLACE_FIELD_NUMBER: builtins.int
@@ -118,6 +120,7 @@ class Relation(google.protobuf.message.Message):
     FREQ_ITEMS_FIELD_NUMBER: builtins.int
     SAMPLE_BY_FIELD_NUMBER: builtins.int
     CATALOG_FIELD_NUMBER: builtins.int
+    ML_RELATION_FIELD_NUMBER: builtins.int
     EXTENSION_FIELD_NUMBER: builtins.int
     UNKNOWN_FIELD_NUMBER: builtins.int
     @property
@@ -211,6 +214,8 @@ class Relation(google.protobuf.message.Message):
     @property
     def unresolved_table_valued_function(self) -> global___UnresolvedTableValuedFunction: ...
     @property
+    def lateral_join(self) -> global___LateralJoin: ...
+    @property
     def fill_na(self) -> global___NAFill:
         """NA functions"""
     @property
@@ -238,6 +243,9 @@ class Relation(google.protobuf.message.Message):
     def catalog(self) -> pyspark.sql.connect.proto.catalog_pb2.Catalog:
         """Catalog API (experimental / unstable)"""
     @property
+    def ml_relation(self) -> global___MlRelation:
+        """ML relation"""
+    @property
     def extension(self) -> google.protobuf.any_pb2.Any:
         """This field is used to mark extensions to the protocol. When plugins generate arbitrary
         relations they can add them here. During the planning the correct resolution is done.
@@ -292,6 +300,7 @@ class Relation(google.protobuf.message.Message):
         with_relations: global___WithRelations | None = ...,
         transpose: global___Transpose | None = ...,
         unresolved_table_valued_function: global___UnresolvedTableValuedFunction | None = ...,
+        lateral_join: global___LateralJoin | None = ...,
         fill_na: global___NAFill | None = ...,
         drop_na: global___NADrop | None = ...,
         replace: global___NAReplace | None = ...,
@@ -304,6 +313,7 @@ class Relation(google.protobuf.message.Message):
         freq_items: global___StatFreqItems | None = ...,
         sample_by: global___StatSampleBy | None = ...,
         catalog: pyspark.sql.connect.proto.catalog_pb2.Catalog | None = ...,
+        ml_relation: global___MlRelation | None = ...,
         extension: google.protobuf.any_pb2.Any | None = ...,
         unknown: global___Unknown | None = ...,
     ) -> None: ...
@@ -364,12 +374,16 @@ class Relation(google.protobuf.message.Message):
             b"html_string",
             "join",
             b"join",
+            "lateral_join",
+            b"lateral_join",
             "limit",
             b"limit",
             "local_relation",
             b"local_relation",
             "map_partitions",
             b"map_partitions",
+            "ml_relation",
+            b"ml_relation",
             "offset",
             b"offset",
             "parse",
@@ -485,12 +499,16 @@ class Relation(google.protobuf.message.Message):
             b"html_string",
             "join",
             b"join",
+            "lateral_join",
+            b"lateral_join",
             "limit",
             b"limit",
             "local_relation",
             b"local_relation",
             "map_partitions",
             b"map_partitions",
+            "ml_relation",
+            b"ml_relation",
             "offset",
             b"offset",
             "parse",
@@ -595,6 +613,7 @@ class Relation(google.protobuf.message.Message):
             "with_relations",
             "transpose",
             "unresolved_table_valued_function",
+            "lateral_join",
             "fill_na",
             "drop_na",
             "replace",
@@ -607,6 +626,7 @@ class Relation(google.protobuf.message.Message):
             "freq_items",
             "sample_by",
             "catalog",
+            "ml_relation",
             "extension",
             "unknown",
         ]
@@ -615,6 +635,198 @@ class Relation(google.protobuf.message.Message):
 
 global___Relation = Relation
 
+class MlRelation(google.protobuf.message.Message):
+    """Relation to represent ML world"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Transform(google.protobuf.message.Message):
+        """Relation to represent transform(input) of the operator
+        which could be a cached model or a new transformer
+        """
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        OBJ_REF_FIELD_NUMBER: builtins.int
+        TRANSFORMER_FIELD_NUMBER: builtins.int
+        INPUT_FIELD_NUMBER: builtins.int
+        PARAMS_FIELD_NUMBER: builtins.int
+        @property
+        def obj_ref(self) -> pyspark.sql.connect.proto.ml_common_pb2.ObjectRef:
+            """Object reference"""
+        @property
+        def transformer(self) -> pyspark.sql.connect.proto.ml_common_pb2.MlOperator:
+            """Could be an ML transformer like VectorAssembler"""
+        @property
+        def input(self) -> global___Relation:
+            """the input dataframe"""
+        @property
+        def params(self) -> pyspark.sql.connect.proto.ml_common_pb2.MlParams:
+            """the operator specific parameters"""
+        def __init__(
+            self,
+            *,
+            obj_ref: pyspark.sql.connect.proto.ml_common_pb2.ObjectRef | None = ...,
+            transformer: pyspark.sql.connect.proto.ml_common_pb2.MlOperator | None = ...,
+            input: global___Relation | None = ...,
+            params: pyspark.sql.connect.proto.ml_common_pb2.MlParams | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "input",
+                b"input",
+                "obj_ref",
+                b"obj_ref",
+                "operator",
+                b"operator",
+                "params",
+                b"params",
+                "transformer",
+                b"transformer",
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "input",
+                b"input",
+                "obj_ref",
+                b"obj_ref",
+                "operator",
+                b"operator",
+                "params",
+                b"params",
+                "transformer",
+                b"transformer",
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["operator", b"operator"]
+        ) -> typing_extensions.Literal["obj_ref", "transformer"] | None: ...
+
+    TRANSFORM_FIELD_NUMBER: builtins.int
+    FETCH_FIELD_NUMBER: builtins.int
+    @property
+    def transform(self) -> global___MlRelation.Transform: ...
+    @property
+    def fetch(self) -> global___Fetch: ...
+    def __init__(
+        self,
+        *,
+        transform: global___MlRelation.Transform | None = ...,
+        fetch: global___Fetch | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "fetch", b"fetch", "ml_type", b"ml_type", "transform", b"transform"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "fetch", b"fetch", "ml_type", b"ml_type", "transform", b"transform"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["ml_type", b"ml_type"]
+    ) -> typing_extensions.Literal["transform", "fetch"] | None: ...
+
+global___MlRelation = MlRelation
+
+class Fetch(google.protobuf.message.Message):
+    """Message for fetching attribute from object on the server side.
+    Fetch can be represented as a Relation or a ML command
+    Command: model.coefficients, model.summary.weightedPrecision which
+    returns the final literal result
+    Relation: model.summary.roc which returns a DataFrame (Relation)
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Method(google.protobuf.message.Message):
+        """Represents a method with inclusion of method name and its arguments"""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        class Args(google.protobuf.message.Message):
+            DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+            PARAM_FIELD_NUMBER: builtins.int
+            INPUT_FIELD_NUMBER: builtins.int
+            @property
+            def param(self) -> pyspark.sql.connect.proto.ml_common_pb2.Param: ...
+            @property
+            def input(self) -> global___Relation: ...
+            def __init__(
+                self,
+                *,
+                param: pyspark.sql.connect.proto.ml_common_pb2.Param | None = ...,
+                input: global___Relation | None = ...,
+            ) -> None: ...
+            def HasField(
+                self,
+                field_name: typing_extensions.Literal[
+                    "args_type", b"args_type", "input", b"input", "param", b"param"
+                ],
+            ) -> builtins.bool: ...
+            def ClearField(
+                self,
+                field_name: typing_extensions.Literal[
+                    "args_type", b"args_type", "input", b"input", "param", b"param"
+                ],
+            ) -> None: ...
+            def WhichOneof(
+                self, oneof_group: typing_extensions.Literal["args_type", b"args_type"]
+            ) -> typing_extensions.Literal["param", "input"] | None: ...
+
+        METHOD_FIELD_NUMBER: builtins.int
+        ARGS_FIELD_NUMBER: builtins.int
+        method: builtins.str
+        """(Required) the method name"""
+        @property
+        def args(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+            global___Fetch.Method.Args
+        ]:
+            """(Optional) the arguments of the method"""
+        def __init__(
+            self,
+            *,
+            method: builtins.str = ...,
+            args: collections.abc.Iterable[global___Fetch.Method.Args] | None = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["args", b"args", "method", b"method"]
+        ) -> None: ...
+
+    OBJ_REF_FIELD_NUMBER: builtins.int
+    METHODS_FIELD_NUMBER: builtins.int
+    @property
+    def obj_ref(self) -> pyspark.sql.connect.proto.ml_common_pb2.ObjectRef:
+        """(Required) reference to the object on the server side"""
+    @property
+    def methods(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___Fetch.Method]:
+        """(Required) the calling method chains"""
+    def __init__(
+        self,
+        *,
+        obj_ref: pyspark.sql.connect.proto.ml_common_pb2.ObjectRef | None = ...,
+        methods: collections.abc.Iterable[global___Fetch.Method] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["obj_ref", b"obj_ref"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["methods", b"methods", "obj_ref", b"obj_ref"]
+    ) -> None: ...
+
+global___Fetch = Fetch
+
 class Unknown(google.protobuf.message.Message):
     """Used for testing purposes only."""
 
@@ -3400,6 +3612,7 @@ class GroupMap(google.protobuf.message.Message):
     IS_MAP_GROUPS_WITH_STATE_FIELD_NUMBER: builtins.int
     OUTPUT_MODE_FIELD_NUMBER: builtins.int
     TIMEOUT_CONF_FIELD_NUMBER: builtins.int
+    STATE_SCHEMA_FIELD_NUMBER: builtins.int
     @property
     def input(self) -> global___Relation:
         """(Required) Input relation for Group Map API: apply, applyInPandas."""
@@ -3438,6 +3651,9 @@ class GroupMap(google.protobuf.message.Message):
     """(Optional) The output mode of the function."""
     timeout_conf: builtins.str
     """(Optional) Timeout configuration for groups that do not receive data for a while."""
+    @property
+    def state_schema(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
+        """(Optional) The schema for the grouped state."""
     def __init__(
         self,
         *,
@@ -3460,6 +3676,7 @@ class GroupMap(google.protobuf.message.Message):
         is_map_groups_with_state: builtins.bool | None = ...,
         output_mode: builtins.str | None = ...,
         timeout_conf: builtins.str | None = ...,
+        state_schema: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
     ) -> None: ...
     def HasField(
         self,
@@ -3468,6 +3685,8 @@ class GroupMap(google.protobuf.message.Message):
             b"_is_map_groups_with_state",
             "_output_mode",
             b"_output_mode",
+            "_state_schema",
+            b"_state_schema",
             "_timeout_conf",
             b"_timeout_conf",
             "func",
@@ -3480,6 +3699,8 @@ class GroupMap(google.protobuf.message.Message):
             b"is_map_groups_with_state",
             "output_mode",
             b"output_mode",
+            "state_schema",
+            b"state_schema",
             "timeout_conf",
             b"timeout_conf",
         ],
@@ -3491,6 +3712,8 @@ class GroupMap(google.protobuf.message.Message):
             b"_is_map_groups_with_state",
             "_output_mode",
             b"_output_mode",
+            "_state_schema",
+            b"_state_schema",
             "_timeout_conf",
             b"_timeout_conf",
             "func",
@@ -3509,6 +3732,8 @@ class GroupMap(google.protobuf.message.Message):
             b"output_mode",
             "sorting_expressions",
             b"sorting_expressions",
+            "state_schema",
+            b"state_schema",
             "timeout_conf",
             b"timeout_conf",
         ],
@@ -3525,6 +3750,10 @@ class GroupMap(google.protobuf.message.Message):
         self, oneof_group: typing_extensions.Literal["_output_mode", b"_output_mode"]
     ) -> typing_extensions.Literal["output_mode"] | None: ...
     @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_state_schema", b"_state_schema"]
+    ) -> typing_extensions.Literal["state_schema"] | None: ...
+    @typing.overload
     def WhichOneof(
         self, oneof_group: typing_extensions.Literal["_timeout_conf", b"_timeout_conf"]
     ) -> typing_extensions.Literal["timeout_conf"] | None: ...
@@ -4109,3 +4338,56 @@ class AsOfJoin(google.protobuf.message.Message):
     ) -> None: ...
 
 global___AsOfJoin = AsOfJoin
+
+class LateralJoin(google.protobuf.message.Message):
+    """Relation of type [[LateralJoin]].
+
+    `left` and `right` must be present.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    LEFT_FIELD_NUMBER: builtins.int
+    RIGHT_FIELD_NUMBER: builtins.int
+    JOIN_CONDITION_FIELD_NUMBER: builtins.int
+    JOIN_TYPE_FIELD_NUMBER: builtins.int
+    @property
+    def left(self) -> global___Relation:
+        """(Required) Left input relation for a Join."""
+    @property
+    def right(self) -> global___Relation:
+        """(Required) Right input relation for a Join."""
+    @property
+    def join_condition(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression:
+        """(Optional) The join condition."""
+    join_type: global___Join.JoinType.ValueType
+    """(Required) The join type."""
+    def __init__(
+        self,
+        *,
+        left: global___Relation | None = ...,
+        right: global___Relation | None = ...,
+        join_condition: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ...,
+        join_type: global___Join.JoinType.ValueType = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "join_condition", b"join_condition", "left", b"left", "right", b"right"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "join_condition",
+            b"join_condition",
+            "join_type",
+            b"join_type",
+            "left",
+            b"left",
+            "right",
+            b"right",
+        ],
+    ) -> None: ...
+
+global___LateralJoin = LateralJoin
diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py
index aeb0f98d71076..6cc38aca4fc4c 100644
--- a/python/pyspark/sql/connect/readwriter.py
+++ b/python/pyspark/sql/connect/readwriter.py
@@ -751,7 +751,7 @@ def parquet(
         self.mode(mode)
         if partitionBy is not None:
             self.partitionBy(partitionBy)
-        self.option("compression", compression)
+        self._set_opts(compression=compression)
         self.format("parquet").save(path)
 
     parquet.__doc__ = PySparkDataFrameWriter.parquet.__doc__
diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py
index bfd79092ccf4d..59349a17886bb 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -113,13 +113,6 @@
     from pyspark.sql.connect.shell.progress import ProgressHandler
     from pyspark.sql.connect.datasource import DataSourceRegistration
 
-try:
-    import memory_profiler  # noqa: F401
-
-    has_memory_profiler = True
-except Exception:
-    has_memory_profiler = False
-
 
 class SparkSession:
     # The active SparkSession for the current thread
@@ -207,34 +200,26 @@ def _apply_options(self, session: "SparkSession") -> None:
             for i in range(int(os.environ.get("PYSPARK_REMOTE_INIT_CONF_LEN", "0"))):
                 init_opts = json.loads(os.environ[f"PYSPARK_REMOTE_INIT_CONF_{i}"])
 
+            # The options are applied after session creation,
+            # so options ["spark.remote", "spark.master"] always take no effect.
+            invalid_opts = ["spark.remote", "spark.master"]
+
             with self._lock:
+                opts = {}
+
+                # Only attempts to set Spark SQL configurations.
+                # If the configurations are static, it might throw an exception so
+                # simply ignore it for now.
                 for k, v in init_opts.items():
-                    # the options are applied after session creation,
-                    # so following options always take no effect
-                    if k not in [
-                        "spark.remote",
-                        "spark.master",
-                    ] and k.startswith("spark.sql."):
-                        # Only attempts to set Spark SQL configurations.
-                        # If the configurations are static, it might throw an exception so
-                        # simply ignore it for now.
-                        try:
-                            session.conf.set(k, v)
-                        except Exception as e:
-                            logger.warn(f"Failed to set configuration {k} due to {e}")
+                    if k not in invalid_opts and k.startswith("spark.sql."):
+                        opts[k] = v
 
-            with self._lock:
                 for k, v in self._options.items():
-                    # the options are applied after session creation,
-                    # so following options always take no effect
-                    if k not in [
-                        "spark.remote",
-                        "spark.master",
-                    ]:
-                        try:
-                            session.conf.set(k, v)
-                        except Exception as e:
-                            logger.warn(f"Failed to set configuration {k} due to {e}")
+                    if k not in invalid_opts:
+                        opts[k] = v
+
+                if len(opts) > 0:
+                    session.conf._set_all(configs=opts, silent=True)
 
         def create(self) -> "SparkSession":
             has_channel_builder = self._channel_builder is not None
@@ -797,13 +782,11 @@ def range(
 
     range.__doc__ = PySparkSession.range.__doc__
 
-    @property
+    @functools.cached_property
     def catalog(self) -> "Catalog":
         from pyspark.sql.connect.catalog import Catalog
 
-        if not hasattr(self, "_catalog"):
-            self._catalog = Catalog(self)
-        return self._catalog
+        return Catalog(self)
 
     catalog.__doc__ = PySparkSession.catalog.__doc__
 
@@ -1051,7 +1034,7 @@ def _start_connect_server(master: str, opts: Dict[str, Any]) -> None:
             default_conf = {
                 "spark.plugins": "org.apache.spark.sql.connect.SparkConnectPlugin",
                 "spark.sql.artifact.isolation.enabled": "true",
-                "spark.sql.artifact.isolation.always.apply.classloader": "true",
+                "spark.sql.artifact.isolation.alwaysApplyClassloader": "true",
             }
 
             if "SPARK_TESTING" in os.environ:
@@ -1120,6 +1103,16 @@ def creator(old_session_id: str) -> "SparkSession":
 
         return creator, (self._session_id,)
 
+    def _to_ddl(self, struct: StructType) -> str:
+        ddl = self._client._analyze(method="json_to_ddl", json_string=struct.json()).ddl_string
+        assert ddl is not None
+        return ddl
+
+    def _parse_ddl(self, ddl: str) -> DataType:
+        dt = self._client._analyze(method="ddl_parse", ddl_string=ddl).parsed
+        assert dt is not None
+        return dt
+
 
 SparkSession.__doc__ = PySparkSession.__doc__
 
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 8a5b982bc7f23..2d12704485ad2 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -42,6 +42,7 @@
 from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2
 from pyspark.sql.merge import MergeIntoWriter
 from pyspark.sql.streaming import DataStreamWriter
+from pyspark.sql.table_arg import TableArg
 from pyspark.sql.types import StructType, Row
 from pyspark.sql.utils import dispatch_df_method
 
@@ -2276,6 +2277,28 @@ def columns(self) -> List[str]:
         """
         ...
 
+    @dispatch_df_method
+    def metadataColumn(self, colName: str) -> Column:
+        """
+        Selects a metadata column based on its logical column name and returns it as a
+        :class:`Column`.
+
+        A metadata column can be accessed this way even if the underlying data source defines a data
+        column with a conflicting name.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        colName : str
+            string, metadata column name
+
+        Returns
+        -------
+        :class:`Column`
+        """
+        ...
+
     @dispatch_df_method
     def colRegex(self, colName: str) -> Column:
         """
@@ -2549,7 +2572,7 @@ def join(
         pyspark.errors.exceptions.captured.AnalysisException: Column name#0 are ambiguous...
 
         A better approach is to assign aliases to the dataframes, and then reference
-        the ouptut columns from the join operation using these aliases:
+        the output columns from the join operation using these aliases:
 
         >>> df.alias("a").join(
         ...     df.alias("b"), sf.col("a.name") == sf.col("b.name"), "outer"
@@ -2629,6 +2652,108 @@ def join(
         """
         ...
 
+    def lateralJoin(
+        self,
+        other: "DataFrame",
+        on: Optional[Column] = None,
+        how: Optional[str] = None,
+    ) -> "DataFrame":
+        """
+        Lateral joins with another :class:`DataFrame`, using the given join expression.
+
+        A lateral join (also known as a correlated join) is a type of join where each row from
+        one DataFrame is used as input to a subquery or a derived table that computes a result
+        specific to that row. The right side `DataFrame` can reference columns from the current
+        row of the left side `DataFrame`, allowing for more complex and context-dependent results
+        than a standard join.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            Right side of the join
+        on : :class:`Column`, optional
+            a join expression (Column).
+        how : str, optional
+            default ``inner``. Must be one of: ``inner``, ``cross``, ``left``, ``leftouter``,
+            and ``left_outer``.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Joined DataFrame.
+
+        Examples
+        --------
+        Setup a sample DataFrame.
+
+        >>> from pyspark.sql import functions as sf
+        >>> from pyspark.sql import Row
+        >>> customers_data = [
+        ...     Row(customer_id=1, name="Alice"), Row(customer_id=2, name="Bob"),
+        ...     Row(customer_id=3, name="Charlie"), Row(customer_id=4, name="Diana")
+        ... ]
+        >>> customers = spark.createDataFrame(customers_data)
+        >>> orders_data = [
+        ...     Row(order_id=101, customer_id=1, order_date="2024-01-10",
+        ...         items=[Row(product="laptop", quantity=5), Row(product="mouse", quantity=12)]),
+        ...     Row(order_id=102, customer_id=1, order_date="2024-02-15",
+        ...         items=[Row(product="phone", quantity=2), Row(product="charger", quantity=15)]),
+        ...     Row(order_id=105, customer_id=1, order_date="2024-03-20",
+        ...         items=[Row(product="tablet", quantity=4)]),
+        ...     Row(order_id=103, customer_id=2, order_date="2024-01-12",
+        ...         items=[Row(product="tablet", quantity=8)]),
+        ...     Row(order_id=104, customer_id=2, order_date="2024-03-05",
+        ...         items=[Row(product="laptop", quantity=7)]),
+        ...     Row(order_id=106, customer_id=3, order_date="2024-04-05",
+        ...         items=[Row(product="monitor", quantity=1)]),
+        ... ]
+        >>> orders = spark.createDataFrame(orders_data)
+
+        Example 1 (use TVF): Expanding Items in Each Order into Separate Rows
+
+        >>> customers.join(orders, "customer_id").lateralJoin(
+        ...     spark.tvf.explode(sf.col("items").outer()).select("col.*")
+        ... ).select(
+        ...     "customer_id", "name", "order_id", "order_date", "product", "quantity"
+        ... ).orderBy("customer_id", "order_id", "product").show()
+        +-----------+-------+--------+----------+-------+--------+
+        |customer_id|   name|order_id|order_date|product|quantity|
+        +-----------+-------+--------+----------+-------+--------+
+        |          1|  Alice|     101|2024-01-10| laptop|       5|
+        |          1|  Alice|     101|2024-01-10|  mouse|      12|
+        |          1|  Alice|     102|2024-02-15|charger|      15|
+        |          1|  Alice|     102|2024-02-15|  phone|       2|
+        |          1|  Alice|     105|2024-03-20| tablet|       4|
+        |          2|    Bob|     103|2024-01-12| tablet|       8|
+        |          2|    Bob|     104|2024-03-05| laptop|       7|
+        |          3|Charlie|     106|2024-04-05|monitor|       1|
+        +-----------+-------+--------+----------+-------+--------+
+
+        Example 2 (use subquery): Finding the Two Most Recent Orders for Customer
+
+        >>> customers.alias("c").lateralJoin(
+        ...     orders.alias("o")
+        ...     .where(sf.col("o.customer_id") == sf.col("c.customer_id").outer())
+        ...     .select("order_id", "order_date")
+        ...     .orderBy(sf.col("order_date").desc())
+        ...     .limit(2),
+        ...     how="left"
+        ... ).orderBy("customer_id", "order_id").show()
+        +-----------+-------+--------+----------+
+        |customer_id|   name|order_id|order_date|
+        +-----------+-------+--------+----------+
+        |          1|  Alice|     102|2024-02-15|
+        |          1|  Alice|     105|2024-03-20|
+        |          2|    Bob|     103|2024-01-12|
+        |          2|    Bob|     104|2024-03-05|
+        |          3|Charlie|     106|2024-04-05|
+        |          4|  Diana|    NULL|      NULL|
+        +-----------+-------+--------+----------+
+        """
+        ...
+
     # TODO(SPARK-22947): Fix the DataFrame API.
     @dispatch_df_method
     def _joinAsOf(
@@ -3907,7 +4032,7 @@ def groupingSets(
         groupingSets : sequence of sequence of columns or str
             Individual set of columns to group on.
         cols : :class:`Column` or str
-            Addional grouping columns specified by users.
+            Additional grouping columns specified by users.
             Those columns are shown as the output columns after aggregation.
 
         Returns
@@ -6476,6 +6601,29 @@ def transpose(self, indexColumn: Optional["ColumnOrName"] = None) -> "DataFrame"
         """
         ...
 
+    def asTable(self) -> TableArg:
+        """
+        Converts the DataFrame into a `TableArg` object, which can be used as a table argument
+        in a user-defined table function (UDTF).
+
+        After obtaining a TableArg from a DataFrame using this method, you can specify partitioning
+        and ordering for the table argument by calling methods such as `partitionBy`, `orderBy`, and
+        `withSinglePartition` on the `TableArg` instance.
+        - partitionBy: Partitions the data based on the specified columns. This method cannot
+        be called after withSinglePartition() has been called.
+        - orderBy: Orders the data within partitions based on the specified columns.
+        - withSinglePartition: Indicates that the data should be treated as a single partition.
+        This method cannot be called after partitionBy() has been called.
+
+        .. versionadded:: 4.0.0
+
+        Returns
+        -------
+        :class:`TableArg`
+            A `TableArg` object representing a table argument.
+        """
+        ...
+
     def scalar(self) -> Column:
         """
         Return a `Column` object for a SCALAR Subquery containing exactly one row and one column.
@@ -6509,7 +6657,7 @@ def scalar(self) -> Column:
         >>> from pyspark.sql import functions as sf
         >>> employees.where(
         ...     sf.col("salary") > employees.select(sf.avg("salary")).scalar()
-        ... ).select("name", "salary", "department_id").show()
+        ... ).select("name", "salary", "department_id").orderBy("name").show()
         +-----+------+-------------+
         | name|salary|department_id|
         +-----+------+-------------+
@@ -6522,11 +6670,12 @@ def scalar(self) -> Column:
         in their department.
 
         >>> from pyspark.sql import functions as sf
-        >>> employees.where(
+        >>> employees.alias("e1").where(
         ...     sf.col("salary")
-        ...     > employees.where(sf.col("department_id") == sf.col("department_id").outer())
-        ...         .select(sf.avg("salary")).scalar()
-        ... ).select("name", "salary", "department_id").show()
+        ...     > employees.alias("e2").where(
+        ...         sf.col("e2.department_id") == sf.col("e1.department_id").outer()
+        ...     ).select(sf.avg("salary")).scalar()
+        ... ).select("name", "salary", "department_id").orderBy("name").show()
         +-----+------+-------------+
         | name|salary|department_id|
         +-----+------+-------------+
@@ -6538,23 +6687,24 @@ def scalar(self) -> Column:
         department.
 
         >>> from pyspark.sql import functions as sf
-        >>> employees.select(
+        >>> employees.alias("e1").select(
         ...     "name", "salary", "department_id",
         ...     sf.format_number(
         ...         sf.lit(100) * sf.col("salary") /
-        ...             employees.where(sf.col("department_id") == sf.col("department_id").outer())
-        ...             .select(sf.sum("salary")).scalar().alias("avg_salary"),
+        ...             employees.alias("e2").where(
+        ...                 sf.col("e2.department_id") == sf.col("e1.department_id").outer()
+        ...             ).select(sf.sum("salary")).scalar().alias("avg_salary"),
         ...         1
         ...     ).alias("salary_proportion_in_department")
-        ... ).show()
+        ... ).orderBy("name").show()
         +-------+------+-------------+-------------------------------+
         |   name|salary|department_id|salary_proportion_in_department|
         +-------+------+-------------+-------------------------------+
         |  Alice| 45000|          101|                           30.6|
         |    Bob| 54000|          101|                           36.7|
         |Charlie| 29000|          102|                           32.2|
-        |    Eve| 48000|          101|                           32.7|
         |  David| 61000|          102|                           67.8|
+        |    Eve| 48000|          101|                           32.7|
         +-------+------+-------------+-------------------------------+
         """
         ...
@@ -6595,8 +6745,10 @@ def exists(self) -> Column:
         Example 1: Filter for customers who have placed at least one order.
 
         >>> from pyspark.sql import functions as sf
-        >>> customers.where(
-        ...     orders.where(sf.col("customer_id") == sf.col("customer_id").outer()).exists()
+        >>> customers.alias("c").where(
+        ...     orders.alias("o").where(
+        ...         sf.col("o.customer_id") == sf.col("c.customer_id").outer()
+        ...     ).exists()
         ... ).orderBy("customer_id").show()
         +-----------+-------------+-------+
         |customer_id|customer_name|country|
@@ -6609,8 +6761,10 @@ def exists(self) -> Column:
         Example 2: Filter for customers who have never placed an order.
 
         >>> from pyspark.sql import functions as sf
-        >>> customers.where(
-        ...     ~orders.where(sf.col("customer_id") == sf.col("customer_id").outer()).exists()
+        >>> customers.alias("c").where(
+        ...     ~orders.alias("o").where(
+        ...         sf.col("o.customer_id") == sf.col("c.customer_id").outer()
+        ...     ).exists()
         ... ).orderBy("customer_id").show()
         +-----------+-------------+---------+
         |customer_id|customer_name|  country|
@@ -6621,9 +6775,9 @@ def exists(self) -> Column:
         Example 3: Find Orders from Customers in the USA.
 
         >>> from pyspark.sql import functions as sf
-        >>> orders.where(
-        ...     customers.where(
-        ...         (sf.col("customer_id") == sf.col("customer_id").outer())
+        >>> orders.alias("o").where(
+        ...     customers.alias("c").where(
+        ...         (sf.col("c.customer_id") == sf.col("o.customer_id").outer())
         ...         & (sf.col("country") == "USA")
         ...     ).exists()
         ... ).orderBy("order_id").show()
@@ -6676,6 +6830,9 @@ def plot(self) -> "PySparkPlotAccessor":
         Notes
         -----
         This API is experimental.
+        It provides two ways to create plots:
+        1. Chaining style (e.g., `df.plot.line(...)`).
+        2. Explicit style (e.g., `df.plot(kind="line", ...)`).
 
         Examples
         --------
@@ -6685,6 +6842,7 @@ def plot(self) -> "PySparkPlotAccessor":
         >>> type(df.plot)
         <class 'pyspark.sql.plot.core.PySparkPlotAccessor'>
         >>> df.plot.line(x="category", y=["int_val", "float_val"])  # doctest: +SKIP
+        >>> df.plot(kind="line", x="category", y=["int_val", "float_val"])  # doctest: +SKIP
         """
         ...
 
diff --git a/python/pyspark/sql/datasource.py b/python/pyspark/sql/datasource.py
index a51c96a9d178f..651e84e84390e 100644
--- a/python/pyspark/sql/datasource.py
+++ b/python/pyspark/sql/datasource.py
@@ -32,6 +32,7 @@
     "DataSourceStreamReader",
     "SimpleDataSourceStreamReader",
     "DataSourceWriter",
+    "DataSourceArrowWriter",
     "DataSourceStreamWriter",
     "DataSourceRegistration",
     "InputPartition",
@@ -666,6 +667,44 @@ def abort(self, messages: List[Optional["WriterCommitMessage"]]) -> None:
         ...
 
 
+class DataSourceArrowWriter(DataSourceWriter):
+    """
+    A base class for data source writers that process data using PyArrow’s `RecordBatch`.
+
+    Unlike :class:`DataSourceWriter`, which works with an iterator of Spark Rows, this class
+    is optimized for using the Arrow format when writing data. It can offer better performance
+    when interfacing with systems or libraries that natively support Arrow.
+
+    .. versionadded: 4.0.0
+    """
+
+    @abstractmethod
+    def write(self, iterator: Iterator["RecordBatch"]) -> "WriterCommitMessage":
+        """
+        Writes an iterator of PyArrow `RecordBatch` objects to the sink.
+
+        This method is called once on each executor to write data to the data source.
+        It accepts an iterator of PyArrow `RecordBatch`\\s and returns a single row
+        representing a commit message, or None if there is no commit message.
+
+        The driver collects commit messages, if any, from all executors and passes them
+        to the :class:`DataSourceWriter.commit` method if all tasks run successfully. If any
+        task fails, the :class:`DataSourceWriter.abort` method will be called with the
+        collected commit messages.
+
+        Parameters
+        ----------
+        iterator : iterator of :class:`RecordBatch`\\s
+            An iterator of PyArrow `RecordBatch` objects representing the input data.
+
+        Returns
+        -------
+        :class:`WriterCommitMessage`
+            a serializable commit message
+        """
+        ...
+
+
 class DataSourceStreamWriter(ABC):
     """
     A base class for data stream writers. Data stream writers are responsible for writing
@@ -783,9 +822,9 @@ def register(
         wrapped = _wrap_function(sc, dataSource)
         assert sc._jvm is not None
         jvm = sc._jvm
-        ds = jvm.org.apache.spark.sql.execution.datasources.v2.python.UserDefinedPythonDataSource(
-            wrapped
-        )
+        ds = getattr(
+            jvm, "org.apache.spark.sql.execution.datasources.v2.python.UserDefinedPythonDataSource"
+        )(wrapped)
         self.sparkSession._jsparkSession.dataSource().registerPython(name, ds)
 
 
diff --git a/python/pyspark/sql/functions/__init__.py b/python/pyspark/sql/functions/__init__.py
index dd09c4aa5c774..fc0120bc681d8 100644
--- a/python/pyspark/sql/functions/__init__.py
+++ b/python/pyspark/sql/functions/__init__.py
@@ -19,3 +19,491 @@
 
 from pyspark.sql.functions.builtin import *  # noqa: F401,F403
 from pyspark.sql.functions import partitioning  # noqa: F401,F403
+
+__all__ = [  # noqa: F405
+    # Normal functions
+    "broadcast",
+    "call_function",
+    "col",
+    "column",
+    "lit",
+    "expr",
+    # Conditional Functions
+    "coalesce",
+    "ifnull",
+    "nanvl",
+    "nullif",
+    "nullifzero",
+    "nvl",
+    "nvl2",
+    "when",
+    "zeroifnull",
+    # Predicate Functions
+    "equal_null",
+    "ilike",
+    "isnan",
+    "isnotnull",
+    "isnull",
+    "like",
+    "regexp",
+    "regexp_like",
+    "rlike",
+    # Sort Functions
+    "asc",
+    "asc_nulls_first",
+    "asc_nulls_last",
+    "desc",
+    "desc_nulls_first",
+    "desc_nulls_last",
+    # Mathematical Functions
+    "abs",
+    "acos",
+    "acosh",
+    "asin",
+    "asinh",
+    "atan",
+    "atan2",
+    "atanh",
+    "bin",
+    "bround",
+    "cbrt",
+    "ceil",
+    "ceiling",
+    "conv",
+    "cos",
+    "cosh",
+    "cot",
+    "csc",
+    "degrees",
+    "e",
+    "exp",
+    "expm1",
+    "factorial",
+    "floor",
+    "greatest",
+    "hex",
+    "hypot",
+    "least",
+    "ln",
+    "log",
+    "log10",
+    "log1p",
+    "log2",
+    "negate",
+    "negative",
+    "pi",
+    "pmod",
+    "positive",
+    "pow",
+    "power",
+    "radians",
+    "rand",
+    "randn",
+    "rint",
+    "round",
+    "sec",
+    "sign",
+    "signum",
+    "sin",
+    "sinh",
+    "sqrt",
+    "tan",
+    "tanh",
+    "try_add",
+    "try_divide",
+    "try_mod",
+    "try_multiply",
+    "try_subtract",
+    "unhex",
+    "uniform",
+    "width_bucket",
+    # String Functions
+    "ascii",
+    "base64",
+    "bit_length",
+    "btrim",
+    "char",
+    "char_length",
+    "character_length",
+    "collate",
+    "collation",
+    "concat_ws",
+    "contains",
+    "decode",
+    "elt",
+    "encode",
+    "endswith",
+    "find_in_set",
+    "format_number",
+    "format_string",
+    "initcap",
+    "instr",
+    "is_valid_utf8",
+    "lcase",
+    "left",
+    "length",
+    "levenshtein",
+    "locate",
+    "lower",
+    "lpad",
+    "ltrim",
+    "make_valid_utf8",
+    "mask",
+    "octet_length",
+    "overlay",
+    "position",
+    "printf",
+    "randstr",
+    "regexp_count",
+    "regexp_extract",
+    "regexp_extract_all",
+    "regexp_instr",
+    "regexp_replace",
+    "regexp_substr",
+    "repeat",
+    "replace",
+    "right",
+    "rpad",
+    "rtrim",
+    "sentences",
+    "soundex",
+    "split",
+    "split_part",
+    "startswith",
+    "substr",
+    "substring",
+    "substring_index",
+    "to_binary",
+    "to_char",
+    "to_number",
+    "to_varchar",
+    "translate",
+    "trim",
+    "try_to_binary",
+    "try_to_number",
+    "try_validate_utf8",
+    "ucase",
+    "unbase64",
+    "upper",
+    "validate_utf8",
+    # Bitwise Functions
+    "bit_count",
+    "bit_get",
+    "bitwise_not",
+    "getbit",
+    "shiftleft",
+    "shiftright",
+    "shiftrightunsigned",
+    # Date and Timestamp Functions
+    "add_months",
+    "convert_timezone",
+    "curdate",
+    "current_date",
+    "current_timestamp",
+    "current_timezone",
+    "date_add",
+    "date_diff",
+    "date_format",
+    "date_from_unix_date",
+    "date_part",
+    "date_sub",
+    "date_trunc",
+    "dateadd",
+    "datediff",
+    "datepart",
+    "day",
+    "dayname",
+    "dayofmonth",
+    "dayofweek",
+    "dayofyear",
+    "extract",
+    "from_unixtime",
+    "from_utc_timestamp",
+    "hour",
+    "last_day",
+    "localtimestamp",
+    "make_date",
+    "make_dt_interval",
+    "make_interval",
+    "make_timestamp",
+    "make_timestamp_ltz",
+    "make_timestamp_ntz",
+    "make_ym_interval",
+    "minute",
+    "month",
+    "monthname",
+    "months_between",
+    "next_day",
+    "now",
+    "quarter",
+    "second",
+    "session_window",
+    "timestamp_add",
+    "timestamp_diff",
+    "timestamp_micros",
+    "timestamp_millis",
+    "timestamp_seconds",
+    "to_date",
+    "to_timestamp",
+    "to_timestamp_ltz",
+    "to_timestamp_ntz",
+    "to_unix_timestamp",
+    "to_utc_timestamp",
+    "trunc",
+    "try_make_interval",
+    "try_make_timestamp",
+    "try_make_timestamp_ltz",
+    "try_make_timestamp_ntz",
+    "try_to_timestamp",
+    "unix_date",
+    "unix_micros",
+    "unix_millis",
+    "unix_seconds",
+    "unix_timestamp",
+    "weekday",
+    "weekofyear",
+    "window",
+    "window_time",
+    "year",
+    # Hash Functions
+    "crc32",
+    "hash",
+    "md5",
+    "sha",
+    "sha1",
+    "sha2",
+    "xxhash64",
+    # Collection Functions
+    "aggregate",
+    "array_sort",
+    "cardinality",
+    "concat",
+    "element_at",
+    "exists",
+    "filter",
+    "forall",
+    "map_filter",
+    "map_zip_with",
+    "reduce",
+    "reverse",
+    "size",
+    "transform",
+    "transform_keys",
+    "transform_values",
+    "try_element_at",
+    "zip_with",
+    # Array Functions
+    "array",
+    "array_append",
+    "array_compact",
+    "array_contains",
+    "array_distinct",
+    "array_except",
+    "array_insert",
+    "array_intersect",
+    "array_join",
+    "array_max",
+    "array_min",
+    "array_position",
+    "array_prepend",
+    "array_remove",
+    "array_repeat",
+    "array_size",
+    "array_union",
+    "arrays_overlap",
+    "arrays_zip",
+    "flatten",
+    "get",
+    "sequence",
+    "shuffle",
+    "slice",
+    "sort_array",
+    # Struct Functions
+    "named_struct",
+    "struct",
+    # Map Functions
+    "create_map",
+    "map_concat",
+    "map_contains_key",
+    "map_entries",
+    "map_from_arrays",
+    "map_from_entries",
+    "map_keys",
+    "map_values",
+    "str_to_map",
+    # Aggregate Functions
+    "any_value",
+    "approx_count_distinct",
+    "approx_percentile",
+    "array_agg",
+    "avg",
+    "bit_and",
+    "bit_or",
+    "bit_xor",
+    "bitmap_construct_agg",
+    "bitmap_or_agg",
+    "bool_and",
+    "bool_or",
+    "collect_list",
+    "collect_set",
+    "corr",
+    "count",
+    "count_distinct",
+    "count_if",
+    "count_min_sketch",
+    "covar_pop",
+    "covar_samp",
+    "every",
+    "first",
+    "first_value",
+    "grouping",
+    "grouping_id",
+    "histogram_numeric",
+    "hll_sketch_agg",
+    "hll_union_agg",
+    "kurtosis",
+    "last",
+    "last_value",
+    "listagg",
+    "listagg_distinct",
+    "max",
+    "max_by",
+    "mean",
+    "median",
+    "min",
+    "min_by",
+    "mode",
+    "percentile",
+    "percentile_approx",
+    "product",
+    "regr_avgx",
+    "regr_avgy",
+    "regr_count",
+    "regr_intercept",
+    "regr_r2",
+    "regr_slope",
+    "regr_sxx",
+    "regr_sxy",
+    "regr_syy",
+    "skewness",
+    "some",
+    "std",
+    "stddev",
+    "stddev_pop",
+    "stddev_samp",
+    "string_agg",
+    "string_agg_distinct",
+    "sum",
+    "sum_distinct",
+    "try_avg",
+    "try_sum",
+    "var_pop",
+    "var_samp",
+    "variance",
+    # Window Functions
+    "cume_dist",
+    "dense_rank",
+    "lag",
+    "lead",
+    "nth_value",
+    "ntile",
+    "percent_rank",
+    "rank",
+    "row_number",
+    # Generator Functions
+    "explode",
+    "explode_outer",
+    "inline",
+    "inline_outer",
+    "posexplode",
+    "posexplode_outer",
+    "stack",
+    # Partition Transformation Functions
+    "years",
+    "months",
+    "days",
+    "hours",
+    "bucket",
+    # CSV Functions
+    "from_csv",
+    "schema_of_csv",
+    "to_csv",
+    # JSON Functions
+    "from_json",
+    "get_json_object",
+    "json_array_length",
+    "json_object_keys",
+    "json_tuple",
+    "schema_of_json",
+    "to_json",
+    # VARIANT Functions
+    "is_variant_null",
+    "parse_json",
+    "schema_of_variant",
+    "schema_of_variant_agg",
+    "try_variant_get",
+    "variant_get",
+    "try_parse_json",
+    "to_variant_object",
+    # XML Functions
+    "from_xml",
+    "schema_of_xml",
+    "to_xml",
+    "xpath",
+    "xpath_boolean",
+    "xpath_double",
+    "xpath_float",
+    "xpath_int",
+    "xpath_long",
+    "xpath_number",
+    "xpath_short",
+    "xpath_string",
+    # URL Functions
+    "parse_url",
+    "try_parse_url",
+    "url_decode",
+    "url_encode",
+    "try_url_decode",
+    # Misc Functions
+    "aes_decrypt",
+    "aes_encrypt",
+    "assert_true",
+    "bitmap_bit_position",
+    "bitmap_bucket_number",
+    "bitmap_count",
+    "current_catalog",
+    "current_database",
+    "current_schema",
+    "current_user",
+    "hll_sketch_estimate",
+    "hll_union",
+    "input_file_block_length",
+    "input_file_block_start",
+    "input_file_name",
+    "java_method",
+    "monotonically_increasing_id",
+    "raise_error",
+    "reflect",
+    "session_user",
+    "spark_partition_id",
+    "try_aes_decrypt",
+    "try_reflect",
+    "typeof",
+    "user",
+    "version",
+    # UDF, UDTF and UDT
+    "AnalyzeArgument",
+    "AnalyzeResult",
+    "OrderingColumn",
+    "PandasUDFType",
+    "PartitioningColumn",
+    "SelectedColumn",
+    "SkipRestOfInputTableException",
+    "UserDefinedFunction",
+    "UserDefinedTableFunction",
+    "call_udf",
+    "pandas_udf",
+    "udf",
+    "udtf",
+    "unwrap_udt",
+]
diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py
index d2873a388617e..93ac6da1e14c5 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -65,7 +65,6 @@
 
 from pyspark.sql.utils import (
     to_str as _to_str,
-    has_numpy as _has_numpy,
     try_remote_functions as _try_remote_functions,
     get_active_spark_context as _get_active_spark_context,
     enum_to_value as _enum_to_value,
@@ -79,8 +78,6 @@
         UserDefinedFunctionLike,
     )
 
-if _has_numpy:
-    import numpy as np
 
 # Note to developers: all of PySpark functions here take string as column names whenever possible.
 # Namely, if columns are referred as arguments, they can always be both Column or string,
@@ -254,6 +251,8 @@ def lit(col: Any) -> Column:
     |     [true, false]|     []|       [1.5, 0.1]|           [a, b, c]|
     +------------------+-------+-----------------+--------------------+
     """
+    from pyspark.testing.utils import have_numpy
+
     if isinstance(col, Column):
         return col
     elif isinstance(col, list):
@@ -262,7 +261,9 @@ def lit(col: Any) -> Column:
                 errorClass="COLUMN_IN_LIST", messageParameters={"func_name": "lit"}
             )
         return array(*[lit(item) for item in col])
-    elif _has_numpy:
+    elif have_numpy:
+        import numpy as np
+
         if isinstance(col, np.generic):
             dt = _from_numpy_type(col.dtype)
             if dt is None:
@@ -1851,6 +1852,314 @@ def sum_distinct(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("sum_distinct", col)
 
 
+@_try_remote_functions
+def listagg(col: "ColumnOrName", delimiter: Optional[Union[Column, str, bytes]] = None) -> Column:
+    """
+    Aggregate function: returns the concatenation of non-null input values,
+    separated by the delimiter.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or column name
+        target column to compute on.
+    delimiter : :class:`~pyspark.sql.Column`, literal string or bytes, optional
+        the delimiter to separate the values. The default value is None.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    Example 1: Using listagg function
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('a',), ('b',), (None,), ('c',)], ['strings'])
+    >>> df.select(sf.listagg('strings')).show()
+    +----------------------+
+    |listagg(strings, NULL)|
+    +----------------------+
+    |                   abc|
+    +----------------------+
+
+    Example 2: Using listagg function with a delimiter
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('a',), ('b',), (None,), ('c',)], ['strings'])
+    >>> df.select(sf.listagg('strings', ', ')).show()
+    +--------------------+
+    |listagg(strings, , )|
+    +--------------------+
+    |             a, b, c|
+    +--------------------+
+
+    Example 3: Using listagg function with a binary column and delimiter
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(b'\x01',), (b'\x02',), (None,), (b'\x03',)], ['bytes'])
+    >>> df.select(sf.listagg('bytes', b'\x42')).show()
+    +---------------------+
+    |listagg(bytes, X'42')|
+    +---------------------+
+    |     [01 42 02 42 03]|
+    +---------------------+
+
+    Example 4: Using listagg function on a column with all None values
+
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql.types import StructType, StructField, StringType
+    >>> schema = StructType([StructField("strings", StringType(), True)])
+    >>> df = spark.createDataFrame([(None,), (None,), (None,), (None,)], schema=schema)
+    >>> df.select(sf.listagg('strings')).show()
+    +----------------------+
+    |listagg(strings, NULL)|
+    +----------------------+
+    |                  NULL|
+    +----------------------+
+    """
+    if delimiter is None:
+        return _invoke_function_over_columns("listagg", col)
+    else:
+        return _invoke_function_over_columns("listagg", col, lit(delimiter))
+
+
+@_try_remote_functions
+def listagg_distinct(
+    col: "ColumnOrName", delimiter: Optional[Union[Column, str, bytes]] = None
+) -> Column:
+    """
+    Aggregate function: returns the concatenation of distinct non-null input values,
+    separated by the delimiter.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or column name
+        target column to compute on.
+    delimiter : :class:`~pyspark.sql.Column`, literal string or bytes, optional
+        the delimiter to separate the values. The default value is None.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    Example 1: Using listagg_distinct function
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('a',), ('b',), (None,), ('c',), ('b',)], ['strings'])
+    >>> df.select(sf.listagg_distinct('strings')).show()
+    +-------------------------------+
+    |listagg(DISTINCT strings, NULL)|
+    +-------------------------------+
+    |                            abc|
+    +-------------------------------+
+
+    Example 2: Using listagg_distinct function with a delimiter
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('a',), ('b',), (None,), ('c',), ('b',)], ['strings'])
+    >>> df.select(sf.listagg_distinct('strings', ', ')).show()
+    +-----------------------------+
+    |listagg(DISTINCT strings, , )|
+    +-----------------------------+
+    |                      a, b, c|
+    +-----------------------------+
+
+    Example 3: Using listagg_distinct function with a binary column and delimiter
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(b'\x01',), (b'\x02',), (None,), (b'\x03',), (b'\x02',)],
+    ...                            ['bytes'])
+    >>> df.select(sf.listagg_distinct('bytes', b'\x42')).show()
+    +------------------------------+
+    |listagg(DISTINCT bytes, X'42')|
+    +------------------------------+
+    |              [01 42 02 42 03]|
+    +------------------------------+
+
+    Example 4: Using listagg_distinct function on a column with all None values
+
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql.types import StructType, StructField, StringType
+    >>> schema = StructType([StructField("strings", StringType(), True)])
+    >>> df = spark.createDataFrame([(None,), (None,), (None,), (None,)], schema=schema)
+    >>> df.select(sf.listagg_distinct('strings')).show()
+    +-------------------------------+
+    |listagg(DISTINCT strings, NULL)|
+    +-------------------------------+
+    |                           NULL|
+    +-------------------------------+
+    """
+    if delimiter is None:
+        return _invoke_function_over_columns("listagg_distinct", col)
+    else:
+        return _invoke_function_over_columns("listagg_distinct", col, lit(delimiter))
+
+
+@_try_remote_functions
+def string_agg(
+    col: "ColumnOrName", delimiter: Optional[Union[Column, str, bytes]] = None
+) -> Column:
+    """
+    Aggregate function: returns the concatenation of non-null input values,
+    separated by the delimiter.
+
+    An alias of :func:`listagg`.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or column name
+        target column to compute on.
+    delimiter : :class:`~pyspark.sql.Column`, literal string or bytes, optional
+        the delimiter to separate the values. The default value is None.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    Example 1: Using string_agg function
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('a',), ('b',), (None,), ('c',)], ['strings'])
+    >>> df.select(sf.string_agg('strings')).show()
+    +-------------------------+
+    |string_agg(strings, NULL)|
+    +-------------------------+
+    |                      abc|
+    +-------------------------+
+
+    Example 2: Using string_agg function with a delimiter
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('a',), ('b',), (None,), ('c',)], ['strings'])
+    >>> df.select(sf.string_agg('strings', ', ')).show()
+    +-----------------------+
+    |string_agg(strings, , )|
+    +-----------------------+
+    |                a, b, c|
+    +-----------------------+
+
+    Example 3: Using string_agg function with a binary column and delimiter
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(b'\x01',), (b'\x02',), (None,), (b'\x03',)], ['bytes'])
+    >>> df.select(sf.string_agg('bytes', b'\x42')).show()
+    +------------------------+
+    |string_agg(bytes, X'42')|
+    +------------------------+
+    |        [01 42 02 42 03]|
+    +------------------------+
+
+    Example 4: Using string_agg function on a column with all None values
+
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql.types import StructType, StructField, StringType
+    >>> schema = StructType([StructField("strings", StringType(), True)])
+    >>> df = spark.createDataFrame([(None,), (None,), (None,), (None,)], schema=schema)
+    >>> df.select(sf.string_agg('strings')).show()
+    +-------------------------+
+    |string_agg(strings, NULL)|
+    +-------------------------+
+    |                     NULL|
+    +-------------------------+
+    """
+    if delimiter is None:
+        return _invoke_function_over_columns("string_agg", col)
+    else:
+        return _invoke_function_over_columns("string_agg", col, lit(delimiter))
+
+
+@_try_remote_functions
+def string_agg_distinct(
+    col: "ColumnOrName", delimiter: Optional[Union[Column, str, bytes]] = None
+) -> Column:
+    """
+    Aggregate function: returns the concatenation of distinct non-null input values,
+    separated by the delimiter.
+
+    An alias of :func:`listagg_distinct`.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or column name
+        target column to compute on.
+    delimiter : :class:`~pyspark.sql.Column`, literal string or bytes, optional
+        the delimiter to separate the values. The default value is None.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    Example 1: Using string_agg_distinct function
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('a',), ('b',), (None,), ('c',), ('b',)], ['strings'])
+    >>> df.select(sf.string_agg_distinct('strings')).show()
+    +----------------------------------+
+    |string_agg(DISTINCT strings, NULL)|
+    +----------------------------------+
+    |                               abc|
+    +----------------------------------+
+
+    Example 2: Using string_agg_distinct function with a delimiter
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('a',), ('b',), (None,), ('c',), ('b',)], ['strings'])
+    >>> df.select(sf.string_agg_distinct('strings', ', ')).show()
+    +--------------------------------+
+    |string_agg(DISTINCT strings, , )|
+    +--------------------------------+
+    |                         a, b, c|
+    +--------------------------------+
+
+    Example 3: Using string_agg_distinct function with a binary column and delimiter
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(b'\x01',), (b'\x02',), (None,), (b'\x03',), (b'\x02',)],
+    ...                            ['bytes'])
+    >>> df.select(sf.string_agg_distinct('bytes', b'\x42')).show()
+    +---------------------------------+
+    |string_agg(DISTINCT bytes, X'42')|
+    +---------------------------------+
+    |                 [01 42 02 42 03]|
+    +---------------------------------+
+
+    Example 4: Using string_agg_distinct function on a column with all None values
+
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql.types import StructType, StructField, StringType
+    >>> schema = StructType([StructField("strings", StringType(), True)])
+    >>> df = spark.createDataFrame([(None,), (None,), (None,), (None,)], schema=schema)
+    >>> df.select(sf.string_agg_distinct('strings')).show()
+    +----------------------------------+
+    |string_agg(DISTINCT strings, NULL)|
+    +----------------------------------+
+    |                              NULL|
+    +----------------------------------+
+    """
+    if delimiter is None:
+        return _invoke_function_over_columns("string_agg_distinct", col)
+    else:
+        return _invoke_function_over_columns("string_agg_distinct", col, lit(delimiter))
+
+
 @_try_remote_functions
 def product(col: "ColumnOrName") -> Column:
     """
@@ -7343,6 +7652,12 @@ def rand(seed: Optional[int] = None) -> Column:
     :class:`~pyspark.sql.Column`
         A column of random values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.randn`
+    :meth:`pyspark.sql.functions.randstr`
+    :meth:`pyspark.sql.functions.uniform`
+
     Examples
     --------
     Example 1: Generate a random column without a seed
@@ -7396,6 +7711,12 @@ def randn(seed: Optional[int] = None) -> Column:
     :class:`~pyspark.sql.Column`
         A column of random values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.rand`
+    :meth:`pyspark.sql.functions.randstr`
+    :meth:`pyspark.sql.functions.uniform`
+
     Examples
     --------
     Example 1: Generate a random column without a seed
@@ -8864,6 +9185,13 @@ def curdate() -> Column:
     :class:`~pyspark.sql.Column`
         current date.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.now`
+    :meth:`pyspark.sql.functions.current_date`
+    :meth:`pyspark.sql.functions.current_timestamp`
+    :meth:`pyspark.sql.functions.localtimestamp`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -8893,6 +9221,13 @@ def current_date() -> Column:
     :class:`~pyspark.sql.Column`
         current date.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.now`
+    :meth:`pyspark.sql.functions.curdate`
+    :meth:`pyspark.sql.functions.current_timestamp`
+    :meth:`pyspark.sql.functions.localtimestamp`
+
     Examples
     --------
     >>> from pyspark.sql import functions as sf
@@ -8918,16 +9253,32 @@ def current_timezone() -> Column:
     :class:`~pyspark.sql.Column`
         current session local timezone.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.convert_timezone`
+
     Examples
     --------
-    >>> from pyspark.sql import functions as sf
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+
+    >>> from pyspark.sql import functions as sf
     >>> spark.range(1).select(sf.current_timezone()).show()
     +-------------------+
     | current_timezone()|
     +-------------------+
     |America/Los_Angeles|
     +-------------------+
+
+    Switch the timezone to Shanghai.
+
+    >>> spark.conf.set("spark.sql.session.timeZone", "Asia/Shanghai")
+    >>> spark.range(1).select(sf.current_timezone()).show()
+    +------------------+
+    |current_timezone()|
+    +------------------+
+    |     Asia/Shanghai|
+    +------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     return _invoke_function("current_timezone")
@@ -8949,6 +9300,13 @@ def current_timestamp() -> Column:
     :class:`~pyspark.sql.Column`
         current date and time.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.now`
+    :meth:`pyspark.sql.functions.curdate`
+    :meth:`pyspark.sql.functions.current_date`
+    :meth:`pyspark.sql.functions.localtimestamp`
+
     Examples
     --------
     >>> from pyspark.sql import functions as sf
@@ -8974,6 +9332,13 @@ def now() -> Column:
     :class:`~pyspark.sql.Column`
         current timestamp at the start of query evaluation.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.curdate`
+    :meth:`pyspark.sql.functions.current_date`
+    :meth:`pyspark.sql.functions.current_timestamp`
+    :meth:`pyspark.sql.functions.localtimestamp`
+
     Examples
     --------
     >>> from pyspark.sql import functions as sf
@@ -9004,6 +9369,13 @@ def localtimestamp() -> Column:
     :class:`~pyspark.sql.Column`
         current local date and time.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.now`
+    :meth:`pyspark.sql.functions.curdate`
+    :meth:`pyspark.sql.functions.current_date`
+    :meth:`pyspark.sql.functions.current_timestamp`
+
     Examples
     --------
     >>> from pyspark.sql import functions as sf
@@ -9044,6 +9416,15 @@ def date_format(date: "ColumnOrName", format: str) -> Column:
     format: literal string
         format to use to represent datetime values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.to_date`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.try_to_timestamp`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -9130,6 +9511,18 @@ def year(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         year part of the date/timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Examples
     --------
     Example 1: Extract the year from a string column representing dates
@@ -9209,6 +9602,18 @@ def quarter(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         quarter of the date/timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Examples
     --------
     Example 1: Extract the quarter from a string column representing dates
@@ -9288,6 +9693,19 @@ def month(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         month part of the date/timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.monthname`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Examples
     --------
     Example 1: Extract the month from a string column representing dates
@@ -9368,6 +9786,12 @@ def dayofweek(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         day of the week for given date/timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.dayofyear`
+    :meth:`pyspark.sql.functions.dayofmonth`
+
     Examples
     --------
     Example 1: Extract the day of the week from a string column representing dates
@@ -9442,6 +9866,12 @@ def dayofmonth(col: "ColumnOrName") -> Column:
     col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.dayofyear`
+    :meth:`pyspark.sql.functions.dayofweek`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -9523,9 +9953,25 @@ def day(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         day of the month for given date/timestamp as integer.
 
-    Examples
+    See Also
     --------
-    Example 1: Extract the day of the month from a string column representing dates
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.dayname`
+    :meth:`pyspark.sql.functions.dayofyear`
+    :meth:`pyspark.sql.functions.dayofmonth`
+    :meth:`pyspark.sql.functions.dayofweek`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
+    Examples
+    --------
+    Example 1: Extract the day of the month from a string column representing dates
 
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
@@ -9602,6 +10048,12 @@ def dayofyear(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         day of the year for given date/timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.dayofyear`
+    :meth:`pyspark.sql.functions.dayofmonth`
+
     Examples
     --------
     Example 1: Extract the day of the year from a string column representing dates
@@ -9681,6 +10133,18 @@ def hour(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         hour part of the timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Examples
     --------
     Example 1: Extract the hours from a string column representing timestamp
@@ -9728,6 +10192,18 @@ def minute(col: "ColumnOrName") -> Column:
     col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -9785,6 +10261,18 @@ def second(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         `seconds` part of the timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Examples
     --------
     Example 1: Extract the seconds from a string column representing timestamp
@@ -9839,6 +10327,10 @@ def weekofyear(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         `week` of the year for given date as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.weekday`
+
     Examples
     --------
     Example 1: Extract the week of the year from a string column representing dates
@@ -9915,6 +10407,11 @@ def weekday(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the day of the week for date/timestamp (0 = Monday, 1 = Tuesday, ..., 6 = Sunday).
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.weekofyear`
+
     Examples
     --------
     Example 1: Extract the day of the week from a string column representing dates
@@ -9991,6 +10488,11 @@ def monthname(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the three-letter abbreviation of month name for date/timestamp (Jan, Feb, Mar...)
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.dayname`
+
     Examples
     --------
     Example 1: Extract the month name from a string column representing dates
@@ -10067,6 +10569,11 @@ def dayname(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the three-letter abbreviation of day name for date/timestamp (Mon, Tue, Wed...)
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.monthname`
+
     Examples
     --------
     Example 1: Extract the weekday name from a string column representing dates
@@ -10147,6 +10654,13 @@ def extract(field: Column, source: "ColumnOrName") -> Column:
 
     See Also
     --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
     :meth:`pyspark.sql.functions.datepart`
     :meth:`pyspark.sql.functions.date_part`
 
@@ -10195,6 +10709,13 @@ def date_part(field: Column, source: "ColumnOrName") -> Column:
 
     See Also
     --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
     :meth:`pyspark.sql.functions.datepart`
     :meth:`pyspark.sql.functions.extract`
 
@@ -10243,6 +10764,13 @@ def datepart(field: Column, source: "ColumnOrName") -> Column:
 
     See Also
     --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
     :meth:`pyspark.sql.functions.date_part`
     :meth:`pyspark.sql.functions.extract`
 
@@ -10780,7 +11308,11 @@ def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column:
     See Also
     --------
     :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
     :meth:`pyspark.sql.functions.try_to_timestamp`
+    :meth:`pyspark.sql.functions.date_format`
 
     Examples
     --------
@@ -11018,7 +11550,12 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:
     See Also
     --------
     :meth:`pyspark.sql.functions.to_date`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.to_unix_timestamp`
     :meth:`pyspark.sql.functions.try_to_timestamp`
+    :meth:`pyspark.sql.functions.date_format`
 
     Examples
     --------
@@ -11072,6 +11609,8 @@ def try_to_timestamp(col: "ColumnOrName", format: Optional["ColumnOrName"] = Non
     --------
     :meth:`pyspark.sql.functions.to_date`
     :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.date_format`
 
     Examples
     --------
@@ -11128,10 +11667,15 @@ def xpath(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame(
     ...     [('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>',)], ['x'])
-    >>> df.select(xpath(df.x, lit('a/b/text()')).alias('r')).collect()
-    [Row(r=['b1', 'b2', 'b3'])]
+    >>> df.select(sf.xpath(df.x, sf.lit('a/b/text()'))).show()
+    +--------------------+
+    |xpath(x, a/b/text())|
+    +--------------------+
+    |        [b1, b2, b3]|
+    +--------------------+
     """
     return _invoke_function_over_columns("xpath", xml, path)
 
@@ -11145,9 +11689,14 @@ def xpath_boolean(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([('<a><b>1</b></a>',)], ['x'])
-    >>> df.select(xpath_boolean(df.x, lit('a/b')).alias('r')).collect()
-    [Row(r=True)]
+    >>> df.select(sf.xpath_boolean(df.x, sf.lit('a/b'))).show()
+    +---------------------+
+    |xpath_boolean(x, a/b)|
+    +---------------------+
+    |                 true|
+    +---------------------+
     """
     return _invoke_function_over_columns("xpath_boolean", xml, path)
 
@@ -11162,9 +11711,14 @@ def xpath_double(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
-    >>> df.select(xpath_double(df.x, lit('sum(a/b)')).alias('r')).collect()
-    [Row(r=3.0)]
+    >>> df.select(sf.xpath_double(df.x, sf.lit('sum(a/b)'))).show()
+    +-------------------------+
+    |xpath_double(x, sum(a/b))|
+    +-------------------------+
+    |                      3.0|
+    +-------------------------+
     """
     return _invoke_function_over_columns("xpath_double", xml, path)
 
@@ -11202,9 +11756,14 @@ def xpath_float(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
-    >>> df.select(xpath_float(df.x, lit('sum(a/b)')).alias('r')).collect()
-    [Row(r=3.0)]
+    >>> df.select(sf.xpath_float(df.x, sf.lit('sum(a/b)'))).show()
+    +------------------------+
+    |xpath_float(x, sum(a/b))|
+    +------------------------+
+    |                     3.0|
+    +------------------------+
     """
     return _invoke_function_over_columns("xpath_float", xml, path)
 
@@ -11219,9 +11778,14 @@ def xpath_int(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
-    >>> df.select(xpath_int(df.x, lit('sum(a/b)')).alias('r')).collect()
-    [Row(r=3)]
+    >>> df.select(sf.xpath_int(df.x, sf.lit('sum(a/b)'))).show()
+    +----------------------+
+    |xpath_int(x, sum(a/b))|
+    +----------------------+
+    |                     3|
+    +----------------------+
     """
     return _invoke_function_over_columns("xpath_int", xml, path)
 
@@ -11236,9 +11800,14 @@ def xpath_long(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
-    >>> df.select(xpath_long(df.x, lit('sum(a/b)')).alias('r')).collect()
-    [Row(r=3)]
+    >>> df.select(sf.xpath_long(df.x, sf.lit('sum(a/b)'))).show()
+    +-----------------------+
+    |xpath_long(x, sum(a/b))|
+    +-----------------------+
+    |                      3|
+    +-----------------------+
     """
     return _invoke_function_over_columns("xpath_long", xml, path)
 
@@ -11253,9 +11822,14 @@ def xpath_short(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([('<a><b>1</b><b>2</b></a>',)], ['x'])
-    >>> df.select(xpath_short(df.x, lit('sum(a/b)')).alias('r')).collect()
-    [Row(r=3)]
+    >>> df.select(sf.xpath_short(df.x, sf.lit('sum(a/b)'))).show()
+    +------------------------+
+    |xpath_short(x, sum(a/b))|
+    +------------------------+
+    |                       3|
+    +------------------------+
     """
     return _invoke_function_over_columns("xpath_short", xml, path)
 
@@ -11269,9 +11843,14 @@ def xpath_string(xml: "ColumnOrName", path: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([('<a><b>b</b><c>cc</c></a>',)], ['x'])
-    >>> df.select(xpath_string(df.x, lit('a/c')).alias('r')).collect()
-    [Row(r='cc')]
+    >>> df.select(sf.xpath_string(df.x, sf.lit('a/c'))).show()
+    +--------------------+
+    |xpath_string(x, a/c)|
+    +--------------------+
+    |                  cc|
+    +--------------------+
     """
     return _invoke_function_over_columns("xpath_string", xml, path)
 
@@ -11646,6 +12225,9 @@ def from_utc_timestamp(timestamp: "ColumnOrName", tz: Union[Column, str]) -> Col
     See Also
     --------
     :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
 
     Examples
     --------
@@ -11712,6 +12294,9 @@ def to_utc_timestamp(timestamp: "ColumnOrName", tz: Union[Column, str]) -> Colum
     See Also
     --------
     :meth:`pyspark.sql.functions.from_utc_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
 
     Examples
     --------
@@ -12034,22 +12619,22 @@ def window(
 
     Parameters
     ----------
-    timeColumn : :class:`~pyspark.sql.Column`
+    timeColumn : :class:`~pyspark.sql.Column` or column name
         The column or the expression to use as the timestamp for windowing by time.
         The time column must be of TimestampType or TimestampNTZType.
-    windowDuration : str
+    windowDuration : literal string
         A string specifying the width of the window, e.g. `10 minutes`,
         `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
         valid duration identifiers. Note that the duration is a fixed length of
         time, and does not vary over time according to a calendar. For example,
         `1 day` always means 86,400,000 milliseconds, not a calendar day.
-    slideDuration : str, optional
+    slideDuration : literal string, optional
         A new window will be generated every `slideDuration`. Must be less than
         or equal to the `windowDuration`. Check
         `org.apache.spark.unsafe.types.CalendarInterval` for valid duration
         identifiers. This duration is likewise absolute, and does not vary
         according to a calendar.
-    startTime : str, optional
+    startTime : literal string, optional
         The offset with respect to 1970-01-01 00:00:00 UTC with which to start
         window intervals. For example, in order to have hourly tumbling windows that
         start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15... provide
@@ -12060,24 +12645,30 @@ def window(
     :class:`~pyspark.sql.Column`
         the column for computed results.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.window_time`
+    :meth:`pyspark.sql.functions.session_window`
+
     Examples
     --------
     >>> import datetime
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame(
-    ...     [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)],
-    ... ).toDF("date", "val")
-    >>> w = df.groupBy(sf.window("date", "5 seconds")).agg(sf.sum("val").alias("sum"))
-    >>> w.select(
-    ...     w.window.start.cast("string").alias("start"),
-    ...     w.window.end.cast("string").alias("end"),
-    ...     "sum"
-    ... ).show()
-    +-------------------+-------------------+---+
-    |              start|                end|sum|
-    +-------------------+-------------------+---+
-    |2016-03-11 09:00:05|2016-03-11 09:00:10|  1|
-    +-------------------+-------------------+---+
+    >>> df = spark.createDataFrame([(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)], ['dt', 'v'])
+    >>> df2 = df.groupBy(sf.window('dt', '5 seconds')).agg(sf.sum('v'))
+    >>> df2.show(truncate=False)
+    +------------------------------------------+------+
+    |window                                    |sum(v)|
+    +------------------------------------------+------+
+    |{2016-03-11 09:00:05, 2016-03-11 09:00:10}|1     |
+    +------------------------------------------+------+
+
+    >>> df2.printSchema()
+    root
+     |-- window: struct (nullable = false)
+     |    |-- start: timestamp (nullable = true)
+     |    |-- end: timestamp (nullable = true)
+     |-- sum(v): long (nullable = true)
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -12123,7 +12714,7 @@ def window_time(
 
     Parameters
     ----------
-    windowColumn : :class:`~pyspark.sql.Column`
+    windowColumn : :class:`~pyspark.sql.Column` or column name
         The window column of a window aggregate records.
 
     Returns
@@ -12131,29 +12722,29 @@ def window_time(
     :class:`~pyspark.sql.Column`
         the column for computed results.
 
-    Notes
-    -----
-    Supports Spark Connect.
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.window`
+    :meth:`pyspark.sql.functions.session_window`
 
     Examples
     --------
     >>> import datetime
-    >>> df = spark.createDataFrame(
-    ...     [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)],
-    ... ).toDF("date", "val")
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)], ['dt', 'v'])
 
     Group the data into 5 second time windows and aggregate as sum.
 
-    >>> w = df.groupBy(window("date", "5 seconds")).agg(sum("val").alias("sum"))
+    >>> df2 = df.groupBy(sf.window('dt', '5 seconds')).agg(sf.sum('v'))
 
     Extract the window event time using the window_time function.
 
-    >>> w.select(
-    ...     w.window.end.cast("string").alias("end"),
-    ...     window_time(w.window).cast("string").alias("window_time"),
-    ...     "sum"
-    ... ).collect()
-    [Row(end='2016-03-11 09:00:10', window_time='2016-03-11 09:00:09.999999', sum=1)]
+    >>> df2.select('*', sf.window_time('window')).show(truncate=False)
+    +------------------------------------------+------+--------------------------+
+    |window                                    |sum(v)|window_time(window)       |
+    +------------------------------------------+------+--------------------------+
+    |{2016-03-11 09:00:05, 2016-03-11 09:00:10}|1     |2016-03-11 09:00:09.999999|
+    +------------------------------------------+------+--------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -12187,10 +12778,10 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str])
 
     Parameters
     ----------
-    timeColumn : :class:`~pyspark.sql.Column` or str
+    timeColumn : :class:`~pyspark.sql.Column` or column name
         The column name or column to use as the timestamp for windowing by time.
         The time column must be of TimestampType or TimestampNTZType.
-    gapDuration : :class:`~pyspark.sql.Column` or str
+    gapDuration : :class:`~pyspark.sql.Column` or literal string
         A Python string literal or column specifying the timeout of the session. It could be
         static value, e.g. `10 minutes`, `1 second`, or an expression/UDF that specifies gap
         duration dynamically based on the input row.
@@ -12200,17 +12791,29 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str])
     :class:`~pyspark.sql.Column`
         the column for computed results.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.window`
+    :meth:`pyspark.sql.functions.window_time`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([("2016-03-11 09:00:07", 1)]).toDF("date", "val")
-    >>> w = df.groupBy(session_window("date", "5 seconds")).agg(sum("val").alias("sum"))
-    >>> w.select(w.session_window.start.cast("string").alias("start"),
-    ...          w.session_window.end.cast("string").alias("end"), "sum").collect()
-    [Row(start='2016-03-11 09:00:07', end='2016-03-11 09:00:12', sum=1)]
-    >>> w = df.groupBy(session_window("date", lit("5 seconds"))).agg(sum("val").alias("sum"))
-    >>> w.select(w.session_window.start.cast("string").alias("start"),
-    ...          w.session_window.end.cast("string").alias("end"), "sum").collect()
-    [Row(start='2016-03-11 09:00:07', end='2016-03-11 09:00:12', sum=1)]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2016-03-11 09:00:07', 1)], ['dt', 'v'])
+    >>> df2 = df.groupBy(sf.session_window('dt', '5 seconds')).agg(sf.sum('v'))
+    >>> df2.show(truncate=False)
+    +------------------------------------------+------+
+    |session_window                            |sum(v)|
+    +------------------------------------------+------+
+    |{2016-03-11 09:00:07, 2016-03-11 09:00:12}|1     |
+    +------------------------------------------+------+
+
+    >>> df2.printSchema()
+    root
+     |-- session_window: struct (nullable = false)
+     |    |-- start: timestamp (nullable = true)
+     |    |-- end: timestamp (nullable = true)
+     |-- sum(v): long (nullable = true)
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -12240,37 +12843,57 @@ def to_unix_timestamp(
 
     Parameters
     ----------
-    timestamp : :class:`~pyspark.sql.Column` or str
+    timestamp : :class:`~pyspark.sql.Column` or column name
         Input column or strings.
-    format : :class:`~pyspark.sql.Column` or str, optional
+    format : :class:`~pyspark.sql.Column` or column name, optional
         format to use to convert UNIX timestamp values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.to_date`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+
     Examples
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
-    Example 1: Using default format 'yyyy-MM-dd HH:mm:ss' parses the timestamp string.
+    Example 1: Using default format to parse the timestamp string.
 
     >>> import pyspark.sql.functions as sf
-    >>> time_df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['dt'])
-    >>> time_df.select(sf.to_unix_timestamp('dt').alias('unix_time')).show()
-    +----------+
-    | unix_time|
-    +----------+
-    |1428520332|
-    +----------+
+    >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts'])
+    >>> df.select('*', sf.to_unix_timestamp('ts')).show()
+    +-------------------+------------------------------------------+
+    |                 ts|to_unix_timestamp(ts, yyyy-MM-dd HH:mm:ss)|
+    +-------------------+------------------------------------------+
+    |2015-04-08 12:12:12|                                1428520332|
+    +-------------------+------------------------------------------+
 
-    Example 2: Using user-specified format 'yyyy-MM-dd' parses the timestamp string.
+    Example 2: Using user-specified format 'yyyy-MM-dd' to parse the date string.
 
     >>> import pyspark.sql.functions as sf
-    >>> time_df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> time_df.select(
-    ...     sf.to_unix_timestamp('dt', sf.lit('yyyy-MM-dd')).alias('unix_time')).show()
-    +----------+
-    | unix_time|
-    +----------+
-    |1428476400|
-    +----------+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select('*', sf.to_unix_timestamp(df.dt, sf.lit('yyyy-MM-dd'))).show()
+    +----------+---------------------------------+
+    |        dt|to_unix_timestamp(dt, yyyy-MM-dd)|
+    +----------+---------------------------------+
+    |2015-04-08|                       1428476400|
+    +----------+---------------------------------+
+
+    Example 3: Using a format column to represent different formats.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [('2015-04-08', 'yyyy-MM-dd'), ('2025+01+09', 'yyyy+MM+dd')], ['dt', 'fmt'])
+    >>> df.select('*', sf.to_unix_timestamp('dt', 'fmt')).show()
+    +----------+----------+--------------------------+
+    |        dt|       fmt|to_unix_timestamp(dt, fmt)|
+    +----------+----------+--------------------------+
+    |2015-04-08|yyyy-MM-dd|                1428476400|
+    |2025+01+09|yyyy+MM+dd|                1736409600|
+    +----------+----------+--------------------------+
 
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
@@ -12286,29 +12909,63 @@ def to_timestamp_ltz(
     format: Optional["ColumnOrName"] = None,
 ) -> Column:
     """
-    Parses the `timestamp` with the `format` to a timestamp without time zone.
+    Parses the `timestamp` with the `format` to a timestamp with time zone.
     Returns null with invalid input.
 
     .. versionadded:: 3.5.0
 
     Parameters
     ----------
-    timestamp : :class:`~pyspark.sql.Column` or str
+    timestamp : :class:`~pyspark.sql.Column` or column name
         Input column or strings.
-    format : :class:`~pyspark.sql.Column` or str, optional
+    format : :class:`~pyspark.sql.Column` or column name, optional
         format to use to convert type `TimestampType` timestamp values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.to_date`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.to_unix_timestamp`
+    :meth:`pyspark.sql.functions.date_format`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([("2016-12-31",)], ["e"])
-    >>> df.select(to_timestamp_ltz(df.e, lit("yyyy-MM-dd")).alias('r')).collect()
-    ... # doctest: +SKIP
-    [Row(r=datetime.datetime(2016, 12, 31, 0, 0))]
+    Example 1: Using default format to parse the timestamp string.
 
-    >>> df = spark.createDataFrame([("2016-12-31",)], ["e"])
-    >>> df.select(to_timestamp_ltz(df.e).alias('r')).collect()
-    ... # doctest: +SKIP
-    [Row(r=datetime.datetime(2016, 12, 31, 0, 0))]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts'])
+    >>> df.select('*', sf.to_timestamp_ltz('ts')).show()
+    +-------------------+--------------------+
+    |                 ts|to_timestamp_ltz(ts)|
+    +-------------------+--------------------+
+    |2015-04-08 12:12:12| 2015-04-08 12:12:12|
+    +-------------------+--------------------+
+
+    Example 2: Using user-specified format to parse the date string.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2016-12-31',)], ['dt'])
+    >>> df.select('*', sf.to_timestamp_ltz(df.dt, sf.lit('yyyy-MM-dd'))).show()
+    +----------+--------------------------------+
+    |        dt|to_timestamp_ltz(dt, yyyy-MM-dd)|
+    +----------+--------------------------------+
+    |2016-12-31|             2016-12-31 00:00:00|
+    +----------+--------------------------------+
+
+    Example 3: Using a format column to represent different formats.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [('2015-04-08', 'yyyy-MM-dd'), ('2025+01+09', 'yyyy+MM+dd')], ['dt', 'fmt'])
+    >>> df.select('*', sf.to_timestamp_ltz('dt', 'fmt')).show()
+    +----------+----------+-------------------------+
+    |        dt|       fmt|to_timestamp_ltz(dt, fmt)|
+    +----------+----------+-------------------------+
+    |2015-04-08|yyyy-MM-dd|      2015-04-08 00:00:00|
+    |2025+01+09|yyyy+MM+dd|      2025-01-09 00:00:00|
+    +----------+----------+-------------------------+
     """
     if format is not None:
         return _invoke_function_over_columns("to_timestamp_ltz", timestamp, format)
@@ -12329,22 +12986,56 @@ def to_timestamp_ntz(
 
     Parameters
     ----------
-    timestamp : :class:`~pyspark.sql.Column` or str
+    timestamp : :class:`~pyspark.sql.Column` or column name
         Input column or strings.
-    format : :class:`~pyspark.sql.Column` or str, optional
+    format : :class:`~pyspark.sql.Column` or column name, optional
         format to use to convert type `TimestampNTZType` timestamp values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.to_date`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.to_unix_timestamp`
+    :meth:`pyspark.sql.functions.date_format`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([("2016-04-08",)], ["e"])
-    >>> df.select(to_timestamp_ntz(df.e, lit("yyyy-MM-dd")).alias('r')).collect()
-    ... # doctest: +SKIP
-    [Row(r=datetime.datetime(2016, 4, 8, 0, 0))]
+    Example 1: Using default format to parse the timestamp string.
 
-    >>> df = spark.createDataFrame([("2016-04-08",)], ["e"])
-    >>> df.select(to_timestamp_ntz(df.e).alias('r')).collect()
-    ... # doctest: +SKIP
-    [Row(r=datetime.datetime(2016, 4, 8, 0, 0))]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts'])
+    >>> df.select('*', sf.to_timestamp_ntz('ts')).show()
+    +-------------------+--------------------+
+    |                 ts|to_timestamp_ntz(ts)|
+    +-------------------+--------------------+
+    |2015-04-08 12:12:12| 2015-04-08 12:12:12|
+    +-------------------+--------------------+
+
+    Example 2: Using user-specified format 'yyyy-MM-dd' to parse the date string.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2016-12-31',)], ['dt'])
+    >>> df.select('*', sf.to_timestamp_ntz(df.dt, sf.lit('yyyy-MM-dd'))).show()
+    +----------+--------------------------------+
+    |        dt|to_timestamp_ntz(dt, yyyy-MM-dd)|
+    +----------+--------------------------------+
+    |2016-12-31|             2016-12-31 00:00:00|
+    +----------+--------------------------------+
+
+    Example 3: Using a format column to represent different formats.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [('2015-04-08', 'yyyy-MM-dd'), ('2025+01+09', 'yyyy+MM+dd')], ['dt', 'fmt'])
+    >>> df.select('*', sf.to_timestamp_ntz('dt', 'fmt')).show()
+    +----------+----------+-------------------------+
+    |        dt|       fmt|to_timestamp_ntz(dt, fmt)|
+    +----------+----------+-------------------------+
+    |2015-04-08|yyyy-MM-dd|      2015-04-08 00:00:00|
+    |2025+01+09|yyyy+MM+dd|      2025-01-09 00:00:00|
+    +----------+----------+-------------------------+
     """
     if format is not None:
         return _invoke_function_over_columns("to_timestamp_ntz", timestamp, format)
@@ -12361,9 +13052,15 @@ def current_catalog() -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.current_database`
+    :meth:`pyspark.sql.functions.current_schema`
+
     Examples
     --------
-    >>> spark.range(1).select(current_catalog()).show()
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.current_catalog()).show()
     +-----------------+
     |current_catalog()|
     +-----------------+
@@ -12379,9 +13076,15 @@ def current_database() -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.current_catalog`
+    :meth:`pyspark.sql.functions.current_schema`
+
     Examples
     --------
-    >>> spark.range(1).select(current_database()).show()
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.current_database()).show()
     +----------------+
     |current_schema()|
     +----------------+
@@ -12397,6 +13100,11 @@ def current_schema() -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.current_catalog`
+    :meth:`pyspark.sql.functions.current_database`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -12416,9 +13124,15 @@ def current_user() -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.user`
+    :meth:`pyspark.sql.functions.session_user`
+
     Examples
     --------
-    >>> spark.range(1).select(current_user()).show() # doctest: +SKIP
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.current_user()).show() # doctest: +SKIP
     +--------------+
     |current_user()|
     +--------------+
@@ -12434,6 +13148,11 @@ def user() -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.current_user`
+    :meth:`pyspark.sql.functions.session_user`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -12453,6 +13172,11 @@ def session_user() -> Column:
 
     .. versionadded:: 4.0.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.user`
+    :meth:`pyspark.sql.functions.current_user`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -12469,7 +13193,7 @@ def session_user() -> Column:
 @_try_remote_functions
 def crc32(col: "ColumnOrName") -> Column:
     """
-    Calculates the cyclic redundancy check value  (CRC32) of a binary column and
+    Calculates the cyclic redundancy check value (CRC32) of a binary column and
     returns the value as a bigint.
 
     .. versionchanged:: 3.4.0
@@ -12477,7 +13201,7 @@ def crc32(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -12489,8 +13213,14 @@ def crc32(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> spark.createDataFrame([('ABC',)], ['a']).select(crc32('a').alias('crc32')).collect()
-    [Row(crc32=2743272264)]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('ABC',)], ['a'])
+    >>> df.select('*', sf.crc32('a')).show(truncate=False)
+    +---+----------+
+    |a  |crc32(a)  |
+    +---+----------+
+    |ABC|2743272264|
+    +---+----------+
     """
     return _invoke_function_over_columns("crc32", col)
 
@@ -12506,7 +13236,7 @@ def md5(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -12516,8 +13246,14 @@ def md5(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> spark.createDataFrame([('ABC',)], ['a']).select(md5('a').alias('hash')).collect()
-    [Row(hash='902fbdd2b1df0c4f70b4a5d23525e932')]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('ABC',)], ['a'])
+    >>> df.select('*', sf.md5('a')).show(truncate=False)
+    +---+--------------------------------+
+    |a  |md5(a)                          |
+    +---+--------------------------------+
+    |ABC|902fbdd2b1df0c4f70b4a5d23525e932|
+    +---+--------------------------------+
     """
     return _invoke_function_over_columns("md5", col)
 
@@ -12533,7 +13269,7 @@ def sha1(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -12541,10 +13277,21 @@ def sha1(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the column for computed results.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.sha`
+    :meth:`pyspark.sql.functions.sha2`
+
     Examples
     --------
-    >>> spark.createDataFrame([('ABC',)], ['a']).select(sha1('a').alias('hash')).collect()
-    [Row(hash='3c01bdbb26f358bab27f267924aa2c9a03fcfdb8')]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('ABC',)], ['a'])
+    >>> df.select('*', sf.sha1('a')).show(truncate=False)
+    +---+----------------------------------------+
+    |a  |sha1(a)                                 |
+    +---+----------------------------------------+
+    |ABC|3c01bdbb26f358bab27f267924aa2c9a03fcfdb8|
+    +---+----------------------------------------+
     """
     return _invoke_function_over_columns("sha1", col)
 
@@ -12562,7 +13309,7 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
     numBits : int
         the desired bit length of the result, which must have a
@@ -12573,12 +13320,18 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column:
     :class:`~pyspark.sql.Column`
         the column for computed results.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.sha`
+    :meth:`pyspark.sql.functions.sha1`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([["Alice"], ["Bob"]], ["name"])
-    >>> df.withColumn("sha2", sha2(df.name, 256)).show(truncate=False)
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([['Alice'], ['Bob']], ['name'])
+    >>> df.select('*', sf.sha2('name', 256)).show(truncate=False)
     +-----+----------------------------------------------------------------+
-    |name |sha2                                                            |
+    |name |sha2(name, 256)                                                 |
     +-----+----------------------------------------------------------------+
     |Alice|3bc51062973c458d5a6f2d8d64a023246354ad7e064b1e4e009ec8a0699a3043|
     |Bob  |cd9fb1e148ccd8442e5aa74904cc73bf6fb54d1d54d333bd596aa9bb4bb4e961|
@@ -12608,7 +13361,7 @@ def hash(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         one or more columns to compute on.
 
     Returns
@@ -12616,27 +13369,34 @@ def hash(*cols: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         hash value as int column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.xxhash64`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('ABC', 'DEF')], ['c1', 'c2'])
+    >>> df.select('*', sf.hash('c1')).show()
+    +---+---+----------+
+    | c1| c2|  hash(c1)|
+    +---+---+----------+
+    |ABC|DEF|-757602832|
+    +---+---+----------+
 
-    Hash for one column
-
-    >>> df.select(hash('c1').alias('hash')).show()
-    +----------+
-    |      hash|
-    +----------+
-    |-757602832|
-    +----------+
-
-    Two or more columns
+    >>> df.select('*', sf.hash('c1', df.c2)).show()
+    +---+---+------------+
+    | c1| c2|hash(c1, c2)|
+    +---+---+------------+
+    |ABC|DEF|   599895104|
+    +---+---+------------+
 
-    >>> df.select(hash('c1', 'c2').alias('hash')).show()
-    +---------+
-    |     hash|
-    +---------+
-    |599895104|
-    +---------+
+    >>> df.select('*', sf.hash('*')).show()
+    +---+---+------------+
+    | c1| c2|hash(c1, c2)|
+    +---+---+------------+
+    |ABC|DEF|   599895104|
+    +---+---+------------+
     """
     return _invoke_function_over_seq_of_columns("hash", cols)
 
@@ -12653,7 +13413,7 @@ def xxhash64(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         one or more columns to compute on.
 
     Returns
@@ -12661,27 +13421,34 @@ def xxhash64(*cols: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         hash value as long column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.hash`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('ABC', 'DEF')], ['c1', 'c2'])
-
-    Hash for one column
-
-    >>> df.select(xxhash64('c1').alias('hash')).show()
-    +-------------------+
-    |               hash|
-    +-------------------+
-    |4105715581806190027|
-    +-------------------+
-
-    Two or more columns
-
-    >>> df.select(xxhash64('c1', 'c2').alias('hash')).show()
-    +-------------------+
-    |               hash|
-    +-------------------+
-    |3233247871021311208|
-    +-------------------+
+    >>> df.select('*', sf.xxhash64('c1')).show()
+    +---+---+-------------------+
+    | c1| c2|       xxhash64(c1)|
+    +---+---+-------------------+
+    |ABC|DEF|4105715581806190027|
+    +---+---+-------------------+
+
+    >>> df.select('*', sf.xxhash64('c1', df.c2)).show()
+    +---+---+-------------------+
+    | c1| c2|   xxhash64(c1, c2)|
+    +---+---+-------------------+
+    |ABC|DEF|3233247871021311208|
+    +---+---+-------------------+
+
+    >>> df.select('*', sf.xxhash64('*')).show()
+    +---+---+-------------------+
+    | c1| c2|   xxhash64(c1, c2)|
+    +---+---+-------------------+
+    |ABC|DEF|3233247871021311208|
+    +---+---+-------------------+
     """
     return _invoke_function_over_seq_of_columns("xxhash64", cols)
 
@@ -12699,9 +13466,9 @@ def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column name or column that represents the input column to test
-    errMsg : :class:`~pyspark.sql.Column` or str, optional
+    errMsg : :class:`~pyspark.sql.Column` or literal string, optional
         A Python string literal or column containing the error message
 
     Returns
@@ -12709,16 +13476,36 @@ def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None
     :class:`~pyspark.sql.Column`
         `null` if the input column is `true` otherwise throws an error with specified message.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.raise_error`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([(0,1)], ['a', 'b'])
-    >>> df.select(assert_true(df.a < df.b).alias('r')).collect()
-    [Row(r=None)]
-    >>> df.select(assert_true(df.a < df.b, df.a).alias('r')).collect()
-    [Row(r=None)]
-    >>> df.select(assert_true(df.a < df.b, 'error').alias('r')).collect()
-    [Row(r=None)]
-    >>> df.select(assert_true(df.a > df.b, 'My error msg').alias('r')).collect() # doctest: +SKIP
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([(0, 1)], ['a', 'b'])
+    >>> df.select('*', sf.assert_true(df.a < df.b)).show() # doctest: +SKIP
+    +------------------------------------------------------+
+    |assert_true((a < b), '(a#788L < b#789L)' is not true!)|
+    +------------------------------------------------------+
+    |                                                  NULL|
+    +------------------------------------------------------+
+
+    >>> df.select('*', sf.assert_true(df.a < df.b, df.a)).show()
+    +---+---+-----------------------+
+    |  a|  b|assert_true((a < b), a)|
+    +---+---+-----------------------+
+    |  0|  1|                   NULL|
+    +---+---+-----------------------+
+
+    >>> df.select('*', sf.assert_true(df.a < df.b, 'error')).show()
+    +---+---+---------------------------+
+    |  a|  b|assert_true((a < b), error)|
+    +---+---+---------------------------+
+    |  0|  1|                       NULL|
+    +---+---+---------------------------+
+
+    >>> df.select('*', sf.assert_true(df.a > df.b, 'My error msg')).show() # doctest: +SKIP
     ...
     java.lang.RuntimeException: My error msg
     ...
@@ -12746,7 +13533,7 @@ def raise_error(errMsg: Union[Column, str]) -> Column:
 
     Parameters
     ----------
-    errMsg : :class:`~pyspark.sql.Column` or str
+    errMsg : :class:`~pyspark.sql.Column` or literal string
         A Python string literal or column containing the error message
 
     Returns
@@ -12754,10 +13541,14 @@ def raise_error(errMsg: Union[Column, str]) -> Column:
     :class:`~pyspark.sql.Column`
         throws an error with specified message.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.assert_true`
+
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(raise_error("My error message")).show() # doctest: +SKIP
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.raise_error("My error message")).show() # doctest: +SKIP
     ...
     java.lang.RuntimeException: My error message
     ...
@@ -12786,7 +13577,7 @@ def upper(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -12794,17 +13585,22 @@ def upper(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         upper case values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.lower`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], "STRING")
-    >>> df.select(upper("value")).show()
-    +------------+
-    |upper(value)|
-    +------------+
-    |       SPARK|
-    |     PYSPARK|
-    |  PANDAS API|
-    +------------+
+    >>> df.select("*", sf.upper("value")).show()
+    +----------+------------+
+    |     value|upper(value)|
+    +----------+------------+
+    |     Spark|       SPARK|
+    |   PySpark|     PYSPARK|
+    |Pandas API|  PANDAS API|
+    +----------+------------+
     """
     return _invoke_function_over_columns("upper", col)
 
@@ -12821,7 +13617,7 @@ def lower(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -12829,17 +13625,22 @@ def lower(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         lower case values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.upper`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], "STRING")
-    >>> df.select(lower("value")).show()
-    +------------+
-    |lower(value)|
-    +------------+
-    |       spark|
-    |     pyspark|
-    |  pandas api|
-    +------------+
+    >>> df.select("*", sf.lower("value")).show()
+    +----------+------------+
+    |     value|lower(value)|
+    +----------+------------+
+    |     Spark|       spark|
+    |   PySpark|     pyspark|
+    |Pandas API|  pandas api|
+    +----------+------------+
     """
     return _invoke_function_over_columns("lower", col)
 
@@ -12856,7 +13657,7 @@ def ascii(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -12866,15 +13667,16 @@ def ascii(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], "STRING")
-    >>> df.select(ascii("value")).show()
-    +------------+
-    |ascii(value)|
-    +------------+
-    |          83|
-    |          80|
-    |          80|
-    +------------+
+    >>> df.select("*", sf.ascii("value")).show()
+    +----------+------------+
+    |     value|ascii(value)|
+    +----------+------------+
+    |     Spark|          83|
+    |   PySpark|          80|
+    |Pandas API|          80|
+    +----------+------------+
     """
     return _invoke_function_over_columns("ascii", col)
 
@@ -12891,7 +13693,7 @@ def base64(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -12899,17 +13701,22 @@ def base64(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         BASE64 encoding of string value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.unbase64`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], "STRING")
-    >>> df.select(base64("value")).show()
-    +----------------+
-    |   base64(value)|
-    +----------------+
-    |        U3Bhcms=|
-    |    UHlTcGFyaw==|
-    |UGFuZGFzIEFQSQ==|
-    +----------------+
+    >>> df.select("*", sf.base64("value")).show()
+    +----------+----------------+
+    |     value|   base64(value)|
+    +----------+----------------+
+    |     Spark|        U3Bhcms=|
+    |   PySpark|    UHlTcGFyaw==|
+    |Pandas API|UGFuZGFzIEFQSQ==|
+    +----------+----------------+
     """
     return _invoke_function_over_columns("base64", col)
 
@@ -12926,7 +13733,7 @@ def unbase64(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -12934,19 +13741,22 @@ def unbase64(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         encoded string value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.base64`
+
     Examples
     --------
-    >>> df = spark.createDataFrame(["U3Bhcms=",
-    ...                             "UHlTcGFyaw==",
-    ...                             "UGFuZGFzIEFQSQ=="], "STRING")
-    >>> df.select(unbase64("value")).show()
-    +--------------------+
-    |     unbase64(value)|
-    +--------------------+
-    |    [53 70 61 72 6B]|
-    |[50 79 53 70 61 7...|
-    |[50 61 6E 64 61 7...|
-    +--------------------+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(["U3Bhcms=", "UHlTcGFyaw==", "UGFuZGFzIEFQSQ=="], "STRING")
+    >>> df.select("*", sf.unbase64("value")).show(truncate=False)
+    +----------------+-------------------------------+
+    |value           |unbase64(value)                |
+    +----------------+-------------------------------+
+    |U3Bhcms=        |[53 70 61 72 6B]               |
+    |UHlTcGFyaw==    |[50 79 53 70 61 72 6B]         |
+    |UGFuZGFzIEFQSQ==|[50 61 6E 64 61 73 20 41 50 49]|
+    +----------------+-------------------------------+
     """
     return _invoke_function_over_columns("unbase64", col)
 
@@ -12963,9 +13773,9 @@ def ltrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    trim : :class:`~pyspark.sql.Column` or str, optional
+    trim : :class:`~pyspark.sql.Column` or column name, optional
         The trim string characters to trim, the default value is a single space
 
         .. versionadded:: 4.0.0
@@ -12975,6 +13785,11 @@ def ltrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
     :class:`~pyspark.sql.Column`
         left trimmed values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.trim`
+    :meth:`pyspark.sql.functions.rtrim`
+
     Examples
     --------
     Example 1: Trim the spaces
@@ -13002,6 +13817,18 @@ def ltrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
     | Spark**|                   Spark**|
     |  *Spark|                     Spark|
     +--------+--------------------------+
+
+    Example 3: Trim a column containing different characters
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([("**Spark*", "*"), ("==Spark=", "=")], ["value", "t"])
+    >>> df.select("*", sf.ltrim("value", "t")).show()
+    +--------+---+--------------------------+
+    |   value|  t|TRIM(LEADING t FROM value)|
+    +--------+---+--------------------------+
+    |**Spark*|  *|                    Spark*|
+    |==Spark=|  =|                    Spark=|
+    +--------+---+--------------------------+
     """
     if trim is not None:
         return _invoke_function_over_columns("ltrim", col, trim)
@@ -13021,9 +13848,9 @@ def rtrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    trim : :class:`~pyspark.sql.Column` or str, optional
+    trim : :class:`~pyspark.sql.Column` or column name, optional
         The trim string characters to trim, the default value is a single space
 
         .. versionadded:: 4.0.0
@@ -13033,6 +13860,11 @@ def rtrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
     :class:`~pyspark.sql.Column`
         right trimmed values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.trim`
+    :meth:`pyspark.sql.functions.ltrim`
+
     Examples
     --------
     Example 1: Trim the spaces
@@ -13060,6 +13892,18 @@ def rtrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
     | Spark**|                      Spark|
     |  *Spark|                     *Spark|
     +--------+---------------------------+
+
+    Example 3: Trim a column containing different characters
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([("**Spark*", "*"), ("==Spark=", "=")], ["value", "t"])
+    >>> df.select("*", sf.rtrim("value", "t")).show()
+    +--------+---+---------------------------+
+    |   value|  t|TRIM(TRAILING t FROM value)|
+    +--------+---+---------------------------+
+    |**Spark*|  *|                    **Spark|
+    |==Spark=|  =|                    ==Spark|
+    +--------+---+---------------------------+
     """
     if trim is not None:
         return _invoke_function_over_columns("rtrim", col, trim)
@@ -13079,9 +13923,9 @@ def trim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    trim : :class:`~pyspark.sql.Column` or str, optional
+    trim : :class:`~pyspark.sql.Column` or column name, optional
         The trim string characters to trim, the default value is a single space
 
         .. versionadded:: 4.0.0
@@ -13091,6 +13935,11 @@ def trim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
     :class:`~pyspark.sql.Column`
         trimmed values from both sides.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.ltrim`
+    :meth:`pyspark.sql.functions.rtrim`
+
     Examples
     --------
     Example 1: Trim the spaces
@@ -13118,6 +13967,18 @@ def trim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
     | Spark**|                  Spark|
     |  *Spark|                  Spark|
     +--------+-----------------------+
+
+    Example 3: Trim a column containing different characters
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([("**Spark*", "*"), ("==Spark=", "=")], ["value", "t"])
+    >>> df.select("*", sf.trim("value", "t")).show()
+    +--------+---+-----------------------+
+    |   value|  t|TRIM(BOTH t FROM value)|
+    +--------+---+-----------------------+
+    |**Spark*|  *|                  Spark|
+    |==Spark=|  =|                  Spark|
+    +--------+---+-----------------------+
     """
     if trim is not None:
         return _invoke_function_over_columns("trim", col, trim)
@@ -13138,9 +13999,9 @@ def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    sep : str
+    sep : literal string
         words separator.
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         list of columns to work on.
 
     Returns
@@ -13148,11 +14009,20 @@ def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         string of concatenated words.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.concat`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
-    >>> df.select(concat_ws('-', df.s, df.d).alias('s')).collect()
-    [Row(s='abcd-123')]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([("abcd", "123")], ["s", "d"])
+    >>> df.select("*", sf.concat_ws("-", df.s, "d", sf.lit("xyz"))).show()
+    +----+---+-----------------------+
+    |   s|  d|concat_ws(-, s, d, xyz)|
+    +----+---+-----------------------+
+    |abcd|123|           abcd-123-xyz|
+    +----+---+-----------------------+
     """
     from pyspark.sql.classic.column import _to_seq, _to_java_column
 
@@ -13173,9 +14043,9 @@ def decode(col: "ColumnOrName", charset: str) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    charset : str
+    charset : literal string
         charset to use to decode to.
 
     Returns
@@ -13183,15 +14053,20 @@ def decode(col: "ColumnOrName", charset: str) -> Column:
     :class:`~pyspark.sql.Column`
         the column for computed results.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.encode`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('abcd',)], ['a'])
-    >>> df.select(decode("a", "UTF-8")).show()
-    +----------------+
-    |decode(a, UTF-8)|
-    +----------------+
-    |            abcd|
-    +----------------+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(b"\x61\x62\x63\x64",)], ["a"])
+    >>> df.select("*", sf.decode("a", "UTF-8")).show()
+    +-------------+----------------+
+    |            a|decode(a, UTF-8)|
+    +-------------+----------------+
+    |[61 62 63 64]|            abcd|
+    +-------------+----------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -13211,9 +14086,9 @@ def encode(col: "ColumnOrName", charset: str) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    charset : str
+    charset : literal string
         charset to use to encode.
 
     Returns
@@ -13221,15 +14096,20 @@ def encode(col: "ColumnOrName", charset: str) -> Column:
     :class:`~pyspark.sql.Column`
         the column for computed results.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.decode`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('abcd',)], ['c'])
-    >>> df.select(encode("c", "UTF-8")).show()
-    +----------------+
-    |encode(c, UTF-8)|
-    +----------------+
-    |   [61 62 63 64]|
-    +----------------+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([("abcd",)], ["c"])
+    >>> df.select("*", sf.encode("c", "UTF-8")).show()
+    +----+----------------+
+    |   c|encode(c, UTF-8)|
+    +----+----------------+
+    |abcd|   [61 62 63 64]|
+    +----+----------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -13245,7 +14125,7 @@ def is_valid_utf8(str: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         A column of strings, each representing a UTF-8 byte sequence.
 
     Returns
@@ -13253,6 +14133,12 @@ def is_valid_utf8(str: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         whether the input string is a valid UTF-8 string.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_valid_utf8`
+    :meth:`pyspark.sql.functions.validate_utf8`
+    :meth:`pyspark.sql.functions.try_validate_utf8`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -13276,7 +14162,7 @@ def make_valid_utf8(str: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         A column of strings, each representing a UTF-8 byte sequence.
 
     Returns
@@ -13284,6 +14170,12 @@ def make_valid_utf8(str: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the valid UTF-8 version of the given input string.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.is_valid_utf8`
+    :meth:`pyspark.sql.functions.validate_utf8`
+    :meth:`pyspark.sql.functions.try_validate_utf8`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -13306,7 +14198,7 @@ def validate_utf8(str: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         A column of strings, each representing a UTF-8 byte sequence.
 
     Returns
@@ -13314,6 +14206,12 @@ def validate_utf8(str: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the input string if it is a valid UTF-8 string, error otherwise.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.is_valid_utf8`
+    :meth:`pyspark.sql.functions.make_valid_utf8`
+    :meth:`pyspark.sql.functions.try_validate_utf8`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -13336,7 +14234,7 @@ def try_validate_utf8(str: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         A column of strings, each representing a UTF-8 byte sequence.
 
     Returns
@@ -13344,6 +14242,12 @@ def try_validate_utf8(str: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the input string if it is a valid UTF-8 string, null otherwise.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.is_valid_utf8`
+    :meth:`pyspark.sql.functions.make_valid_utf8`
+    :meth:`pyspark.sql.functions.validate_utf8`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -13370,7 +14274,7 @@ def format_number(col: "ColumnOrName", d: int) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         the column name of the numeric value to be formatted
     d : int
         the N decimal places
@@ -13382,8 +14286,14 @@ def format_number(col: "ColumnOrName", d: int) -> Column:
 
     Examples
     --------
-    >>> spark.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect()
-    [Row(v='5.0000')]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([(5,)], ["a"])
+    >>> df.select("*", sf.format_number("a", 4), sf.format_number(df.a, 6)).show()
+    +---+-------------------+-------------------+
+    |  a|format_number(a, 4)|format_number(a, 6)|
+    +---+-------------------+-------------------+
+    |  5|             5.0000|           5.000000|
+    +---+-------------------+-------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -13402,9 +14312,9 @@ def format_string(format: str, *cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    format : str
+    format : literal string
         string that can contain embedded format tags and used as result column's value
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         column names or :class:`~pyspark.sql.Column`\\s to be used in formatting
 
     Returns
@@ -13412,11 +14322,20 @@ def format_string(format: str, *cols: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the column of formatted results.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.printf`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([(5, "hello")], ['a', 'b'])
-    >>> df.select(format_string('%d %s', df.a, df.b).alias('v')).collect()
-    [Row(v='5 hello')]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([(5, "hello")], ["a", "b"])
+    >>> df.select("*", sf.format_string('%d %s', "a", df.b)).show()
+    +---+-----+--------------------------+
+    |  a|    b|format_string(%d %s, a, b)|
+    +---+-----+--------------------------+
+    |  5|hello|                   5 hello|
+    +---+-----+--------------------------+
     """
     from pyspark.sql.classic.column import _to_seq, _to_java_column
 
@@ -13457,6 +14376,13 @@ def instr(str: "ColumnOrName", substr: Union[Column, str]) -> Column:
     :class:`~pyspark.sql.Column`
         location of the first occurrence of the substring as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.locate`
+    :meth:`pyspark.sql.functions.substr`
+    :meth:`pyspark.sql.functions.substring`
+    :meth:`pyspark.sql.functions.substring_index`
+
     Examples
     --------
     Example 1: Using a literal string as the 'substring'
@@ -13504,14 +14430,14 @@ def overlay(
 
     Parameters
     ----------
-    src : :class:`~pyspark.sql.Column` or str
-        column name or column containing the string that will be replaced
-    replace : :class:`~pyspark.sql.Column` or str
-        column name or column containing the substitution string
-    pos : :class:`~pyspark.sql.Column` or str or int
-        column name, column, or int containing the starting position in src
-    len : :class:`~pyspark.sql.Column` or str or int, optional
-        column name, column, or int containing the number of bytes to replace in src
+    src : :class:`~pyspark.sql.Column` or column name
+        the string that will be replaced
+    replace : :class:`~pyspark.sql.Column` or column name
+        the substitution string
+    pos : :class:`~pyspark.sql.Column` or column name or int
+        the starting position in src
+    len : :class:`~pyspark.sql.Column` or column name or int, optional
+        the number of bytes to replace in src
         string by 'replace' defaults to -1, which represents the length of the 'replace' string
 
     Returns
@@ -13521,13 +14447,28 @@ def overlay(
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("SPARK_SQL", "CORE")], ("x", "y"))
-    >>> df.select(overlay("x", "y", 7).alias("overlayed")).collect()
-    [Row(overlayed='SPARK_CORE')]
-    >>> df.select(overlay("x", "y", 7, 0).alias("overlayed")).collect()
-    [Row(overlayed='SPARK_CORESQL')]
-    >>> df.select(overlay("x", "y", 7, 2).alias("overlayed")).collect()
-    [Row(overlayed='SPARK_COREL')]
+    >>> df.select("*", sf.overlay("x", df.y, 7)).show()
+    +---------+----+--------------------+
+    |        x|   y|overlay(x, y, 7, -1)|
+    +---------+----+--------------------+
+    |SPARK_SQL|CORE|          SPARK_CORE|
+    +---------+----+--------------------+
+
+    >>> df.select("*", sf.overlay("x", df.y, 7, 0)).show()
+    +---------+----+-------------------+
+    |        x|   y|overlay(x, y, 7, 0)|
+    +---------+----+-------------------+
+    |SPARK_SQL|CORE|      SPARK_CORESQL|
+    +---------+----+-------------------+
+
+    >>> df.select("*", sf.overlay("x", "y", 7, 2)).show()
+    +---------+----+-------------------+
+    |        x|   y|overlay(x, y, 7, 2)|
+    +---------+----+-------------------+
+    |SPARK_SQL|CORE|        SPARK_COREL|
+    +---------+----+-------------------+
     """
     pos = _enum_to_value(pos)
     if not isinstance(pos, (int, str, Column)):
@@ -13581,11 +14522,11 @@ def sentences(
 
     Parameters
     ----------
-    string : :class:`~pyspark.sql.Column` or str
+    string : :class:`~pyspark.sql.Column` or column name
         a string to be split
-    language : :class:`~pyspark.sql.Column` or str, optional
+    language : :class:`~pyspark.sql.Column` or column name, optional
         a language of the locale
-    country : :class:`~pyspark.sql.Column` or str, optional
+    country : :class:`~pyspark.sql.Column` or column name, optional
         a country of the locale
 
     Returns
@@ -13593,28 +14534,35 @@ def sentences(
     :class:`~pyspark.sql.Column`
         arrays of split sentences.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.split`
+    :meth:`pyspark.sql.functions.split_part`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([["This is an example sentence."]], ["string"])
-    >>> df.select(sentences(df.string, lit("en"), lit("US"))).show(truncate=False)
-    +-----------------------------------+
-    |sentences(string, en, US)          |
-    +-----------------------------------+
-    |[[This, is, an, example, sentence]]|
-    +-----------------------------------+
-    >>> df.select(sentences(df.string, lit("en"))).show(truncate=False)
-    +-----------------------------------+
-    |sentences(string, en, )            |
-    +-----------------------------------+
-    |[[This, is, an, example, sentence]]|
-    +-----------------------------------+
-    >>> df = spark.createDataFrame([["Hello world. How are you?"]], ["s"])
-    >>> df.select(sentences("s")).show(truncate=False)
-    +---------------------------------+
-    |sentences(s, , )                 |
-    +---------------------------------+
-    |[[Hello, world], [How, are, you]]|
-    +---------------------------------+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([("This is an example sentence.", )], ["s"])
+    >>> df.select("*", sf.sentences(df.s, sf.lit("en"), sf.lit("US"))).show(truncate=False)
+    +----------------------------+-----------------------------------+
+    |s                           |sentences(s, en, US)               |
+    +----------------------------+-----------------------------------+
+    |This is an example sentence.|[[This, is, an, example, sentence]]|
+    +----------------------------+-----------------------------------+
+
+    >>> df.select("*", sf.sentences(df.s, sf.lit("en"))).show(truncate=False)
+    +----------------------------+-----------------------------------+
+    |s                           |sentences(s, en, )                 |
+    +----------------------------+-----------------------------------+
+    |This is an example sentence.|[[This, is, an, example, sentence]]|
+    +----------------------------+-----------------------------------+
+
+    >>> df.select("*", sf.sentences(df.s)).show(truncate=False)
+    +----------------------------+-----------------------------------+
+    |s                           |sentences(s, , )                   |
+    +----------------------------+-----------------------------------+
+    |This is an example sentence.|[[This, is, an, example, sentence]]|
+    +----------------------------+-----------------------------------+
     """
     if language is None:
         language = lit("")
@@ -13646,15 +14594,15 @@ def substring(
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    pos : :class:`~pyspark.sql.Column` or str or int
+    pos : :class:`~pyspark.sql.Column` or column name or int
         starting position in str.
 
         .. versionchanged:: 4.0.0
             `pos` now accepts column and column name.
 
-    len : :class:`~pyspark.sql.Column` or str or int
+    len : :class:`~pyspark.sql.Column` or column name or int
         length of chars.
 
         .. versionchanged:: 4.0.0
@@ -13665,6 +14613,14 @@ def substring(
     :class:`~pyspark.sql.Column`
         substring of given value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.instr`
+    :meth:`pyspark.sql.functions.locate`
+    :meth:`pyspark.sql.functions.substr`
+    :meth:`pyspark.sql.functions.substring_index`
+    :meth:`pyspark.sql.Column.substr`
+
     Examples
     --------
     Example 1: Using literal integers as arguments
@@ -13743,9 +14699,9 @@ def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    delim : str
+    delim : literal string
         delimiter of values.
     count : int
         number of occurrences.
@@ -13755,13 +14711,31 @@ def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
     :class:`~pyspark.sql.Column`
         substring of given value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.instr`
+    :meth:`pyspark.sql.functions.locate`
+    :meth:`pyspark.sql.functions.substr`
+    :meth:`pyspark.sql.functions.substring`
+    :meth:`pyspark.sql.Column.substr`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([('a.b.c.d',)], ['s'])
-    >>> df.select(substring_index(df.s, '.', 2).alias('s')).collect()
-    [Row(s='a.b')]
-    >>> df.select(substring_index(df.s, '.', -3).alias('s')).collect()
-    [Row(s='b.c.d')]
+    >>> df.select('*', sf.substring_index(df.s, '.', 2)).show()
+    +-------+------------------------+
+    |      s|substring_index(s, ., 2)|
+    +-------+------------------------+
+    |a.b.c.d|                     a.b|
+    +-------+------------------------+
+
+    >>> df.select('*', sf.substring_index('s', '.', -3)).show()
+    +-------+-------------------------+
+    |      s|substring_index(s, ., -3)|
+    +-------+-------------------------+
+    |a.b.c.d|                    b.c.d|
+    +-------+-------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -13783,16 +14757,15 @@ def levenshtein(
 
     Parameters
     ----------
-    left : :class:`~pyspark.sql.Column` or str
+    left : :class:`~pyspark.sql.Column` or column name
         first column value.
-    right : :class:`~pyspark.sql.Column` or str
+    right : :class:`~pyspark.sql.Column` or column name
         second column value.
     threshold : int, optional
         if set when the levenshtein distance of the two given strings
         less than or equal to a given threshold then return result distance, or -1
 
-        .. versionchanged: 3.5.0
-            Added ``threshold`` argument.
+        .. versionadded: 3.5.0
 
     Returns
     -------
@@ -13801,11 +14774,21 @@ def levenshtein(
 
     Examples
     --------
-    >>> df0 = spark.createDataFrame([('kitten', 'sitting',)], ['l', 'r'])
-    >>> df0.select(levenshtein('l', 'r').alias('d')).collect()
-    [Row(d=3)]
-    >>> df0.select(levenshtein('l', 'r', 2).alias('d')).collect()
-    [Row(d=-1)]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('kitten', 'sitting',)], ['l', 'r'])
+    >>> df.select('*', sf.levenshtein('l', 'r')).show()
+    +------+-------+-----------------+
+    |     l|      r|levenshtein(l, r)|
+    +------+-------+-----------------+
+    |kitten|sitting|                3|
+    +------+-------+-----------------+
+
+    >>> df.select('*', sf.levenshtein(df.l, df.r, 2)).show()
+    +------+-------+--------------------+
+    |     l|      r|levenshtein(l, r, 2)|
+    +------+-------+--------------------+
+    |kitten|sitting|                  -1|
+    +------+-------+--------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -13829,9 +14812,9 @@ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
 
     Parameters
     ----------
-    substr : str
+    substr : literal string
         a string
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         a Column of :class:`pyspark.sql.types.StringType`
     pos : int, optional
         start position (zero based)
@@ -13846,11 +14829,31 @@ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
     The position is not zero based, but 1 based index. Returns 0 if substr
     could not be found in str.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.instr`
+    :meth:`pyspark.sql.functions.substr`
+    :meth:`pyspark.sql.functions.substring`
+    :meth:`pyspark.sql.functions.substring_index`
+    :meth:`pyspark.sql.Column.substr`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
-    >>> df.select(locate('b', df.s, 1).alias('s')).collect()
-    [Row(s=2)]
+    >>> df.select('*', sf.locate('b', 's', 1)).show()
+    +----+---------------+
+    |   s|locate(b, s, 1)|
+    +----+---------------+
+    |abcd|              2|
+    +----+---------------+
+
+    >>> df.select('*', sf.locate('b', df.s, 3)).show()
+    +----+---------------+
+    |   s|locate(b, s, 3)|
+    +----+---------------+
+    |abcd|              0|
+    +----+---------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -13894,6 +14897,10 @@ def lpad(
     :class:`~pyspark.sql.Column`
         left padded result.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.rpad`
+
     Examples
     --------
     Example 1: Pad with a literal string
@@ -13960,6 +14967,10 @@ def rpad(
     :class:`~pyspark.sql.Column`
         right padded result.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.lpad`
+
     Examples
     --------
     Example 1: Pad with a literal string
@@ -14003,9 +15014,9 @@ def repeat(col: "ColumnOrName", n: Union["ColumnOrName", int]) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    n : :class:`~pyspark.sql.Column` or str or int
+    n : :class:`~pyspark.sql.Column` or column name or int
         number of times to repeat value.
 
         .. versionchanged:: 4.0.0
@@ -14018,35 +15029,35 @@ def repeat(col: "ColumnOrName", n: Union["ColumnOrName", int]) -> Column:
 
     Examples
     --------
-    >>> import pyspark.sql.functions as sf
-    >>> spark.createDataFrame(
-    ...     [('ab',)], ['s',]
-    ... ).select(sf.repeat("s", 3)).show()
-    +------------+
-    |repeat(s, 3)|
-    +------------+
-    |      ababab|
-    +------------+
+    Example 1: Repeat with a constant number of times
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.createDataFrame(
-    ...     [('ab',)], ['s',]
-    ... ).select(sf.repeat("s", sf.lit(4))).show()
-    +------------+
-    |repeat(s, 4)|
-    +------------+
-    |    abababab|
-    +------------+
+    >>> df = spark.createDataFrame([('ab',)], ['s',])
+    >>> df.select("*", sf.repeat("s", 3)).show()
+    +---+------------+
+    |  s|repeat(s, 3)|
+    +---+------------+
+    | ab|      ababab|
+    +---+------------+
+
+    >>> df.select("*", sf.repeat(df.s, sf.lit(4))).show()
+    +---+------------+
+    |  s|repeat(s, 4)|
+    +---+------------+
+    | ab|    abababab|
+    +---+------------+
+
+    Example 2: Repeat with a column containing different number of times
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.createDataFrame(
-    ...     [('ab', 5,)], ['s', 't']
-    ... ).select(sf.repeat("s", 't')).show()
-    +------------+
-    |repeat(s, t)|
-    +------------+
-    |  ababababab|
-    +------------+
+    >>> df = spark.createDataFrame([('ab', 5,), ('abc', 6,)], ['s', 't'])
+    >>> df.select("*", sf.repeat("s", "t")).show()
+    +---+---+------------------+
+    |  s|  t|      repeat(s, t)|
+    +---+---+------------------+
+    | ab|  5|        ababababab|
+    |abc|  6|abcabcabcabcabcabc|
+    +---+---+------------------+
     """
     n = _enum_to_value(n)
     n = lit(n) if isinstance(n, int) else n
@@ -14069,12 +15080,18 @@ def split(
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         a string expression to split
-    pattern : :class:`~pyspark.sql.Column` or str
+    pattern : :class:`~pyspark.sql.Column` or literal string
         a string representing a regular expression. The regex string should be
         a Java regular expression.
-    limit : :class:`~pyspark.sql.Column` or str or int
+
+        .. versionchanged:: 4.0.0
+             `pattern` now accepts column. Does not accept column name since string type remain
+             accepted as a regular expression representation, for backwards compatibility.
+             In addition to int, `limit` now accepts column and column name.
+
+    limit : :class:`~pyspark.sql.Column` or column name or int
         an integer which controls the number of times `pattern` is applied.
 
         * ``limit > 0``: The resulting array's length will not be more than `limit`, and the
@@ -14086,61 +15103,66 @@ def split(
         .. versionchanged:: 3.0
            `split` now takes an optional `limit` field. If not provided, default limit value is -1.
 
-        .. versionchanged:: 4.0.0
-             `pattern` now accepts column. Does not accept column name since string type remain
-             accepted as a regular expression representation, for backwards compatibility.
-             In addition to int, `limit` now accepts column and column name.
-
     Returns
     -------
     :class:`~pyspark.sql.Column`
         array of separated strings.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.sentences`
+    :meth:`pyspark.sql.functions.split_part`
+
     Examples
     --------
-    >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([('oneAtwoBthreeC',)], ['s',])
-    >>> df.select(sf.split(df.s, '[ABC]', 2).alias('s')).show()
-    +-----------------+
-    |                s|
-    +-----------------+
-    |[one, twoBthreeC]|
-    +-----------------+
+    Example 1: Repeat with a constant pattern
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('oneAtwoBthreeC',)], ['s',])
-    >>> df.select(sf.split(df.s, '[ABC]', -1).alias('s')).show()
-    +-------------------+
-    |                  s|
-    +-------------------+
-    |[one, two, three, ]|
-    +-------------------+
+    >>> df.select('*', sf.split(df.s, '[ABC]')).show()
+    +--------------+-------------------+
+    |             s|split(s, [ABC], -1)|
+    +--------------+-------------------+
+    |oneAtwoBthreeC|[one, two, three, ]|
+    +--------------+-------------------+
+
+    >>> df.select('*', sf.split(df.s, '[ABC]', 2)).show()
+    +--------------+------------------+
+    |             s|split(s, [ABC], 2)|
+    +--------------+------------------+
+    |oneAtwoBthreeC| [one, twoBthreeC]|
+    +--------------+------------------+
+
+    >>> df.select('*', sf.split('s', '[ABC]', -2)).show()
+    +--------------+-------------------+
+    |             s|split(s, [ABC], -2)|
+    +--------------+-------------------+
+    |oneAtwoBthreeC|[one, two, three, ]|
+    +--------------+-------------------+
+
+    Example 2: Repeat with a column containing different patterns and limits
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame(
-    ...     [('oneAtwoBthreeC', '[ABC]'), ('1A2B3C', '[1-9]+'), ('aa2bb3cc4', '[1-9]+')],
-    ...     ['s', 'pattern']
-    ... )
-    >>> df.select(sf.split(df.s, df.pattern).alias('s')).show()
-    +-------------------+
-    |                  s|
-    +-------------------+
-    |[one, two, three, ]|
-    |        [, A, B, C]|
-    |     [aa, bb, cc, ]|
-    +-------------------+
-
-    >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame(
-    ...     [('oneAtwoBthreeC', '[ABC]', 2), ('1A2B3C', '[1-9]+', -1)],
-    ...     ['s', 'pattern', 'expected_parts']
-    ... )
-    >>> df.select(sf.split(df.s, df.pattern, df.expected_parts).alias('s')).show()
+    >>> df = spark.createDataFrame([
+    ...     ('oneAtwoBthreeC', '[ABC]', 2),
+    ...     ('1A2B3C', '[1-9]+', 1),
+    ...     ('aa2bb3cc4', '[1-9]+', -1)], ['s', 'p', 'l'])
+    >>> df.select('*', sf.split(df.s, df.p)).show()
+    +--------------+------+---+-------------------+
+    |             s|     p|  l|    split(s, p, -1)|
+    +--------------+------+---+-------------------+
+    |oneAtwoBthreeC| [ABC]|  2|[one, two, three, ]|
+    |        1A2B3C|[1-9]+|  1|        [, A, B, C]|
+    |     aa2bb3cc4|[1-9]+| -1|     [aa, bb, cc, ]|
+    +--------------+------+---+-------------------+
+
+    >>> df.select(sf.split('s', df.p, 'l')).show()
     +-----------------+
-    |                s|
+    |   split(s, p, l)|
     +-----------------+
     |[one, twoBthreeC]|
-    |      [, A, B, C]|
+    |         [1A2B3C]|
+    |   [aa, bb, cc, ]|
     +-----------------+
     """
     limit = _enum_to_value(limit)
@@ -14305,16 +15327,29 @@ def randstr(length: Union[Column, int], seed: Optional[Union[Column, int]] = Non
     :class:`~pyspark.sql.Column`
         The generated random string with the specified length.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.rand`
+    :meth:`pyspark.sql.functions.randn`
+
     Examples
     --------
-    >>> spark.createDataFrame([('3',)], ['a']) \\
-    ...   .select(randstr(lit(5), lit(0)).alias('result')) \\
-    ...   .selectExpr("length(result) > 0").show()
-    +--------------------+
-    |(length(result) > 0)|
-    +--------------------+
-    |                true|
-    +--------------------+
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(0, 10, 1, 1).select(sf.randstr(16, 3)).show()
+    +----------------+
+    |  randstr(16, 3)|
+    +----------------+
+    |nurJIpH4cmmMnsCG|
+    |fl9YtT5m01trZtIt|
+    |PD19rAgscTHS7qQZ|
+    |2CuAICF5UJOruVv4|
+    |kNZEs8nDpJEoz3Rl|
+    |OXiU0KN5eaXfjXFs|
+    |qfnTM1BZAHtN0gBV|
+    |1p8XiSKwg33KnRPK|
+    |od5y5MucayQq1bKK|
+    |tklYPmKmc5sIppWM|
+    +----------------+
     """
     length = _enum_to_value(length)
     length = lit(length)
@@ -14335,9 +15370,9 @@ def regexp_count(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    regexp : :class:`~pyspark.sql.Column` or str
+    regexp : :class:`~pyspark.sql.Column` or column name
         regex pattern to apply.
 
     Returns
@@ -14347,13 +15382,35 @@ def regexp_count(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("1a 2b 14m", r"\d+")], ["str", "regexp"])
-    >>> df.select(regexp_count('str', lit(r'\d+')).alias('d')).collect()
-    [Row(d=3)]
-    >>> df.select(regexp_count('str', lit(r'mmm')).alias('d')).collect()
-    [Row(d=0)]
-    >>> df.select(regexp_count("str", col("regexp")).alias('d')).collect()
-    [Row(d=3)]
+    >>> df.select('*', sf.regexp_count('str', sf.lit(r'\d+'))).show()
+    +---------+------+----------------------+
+    |      str|regexp|regexp_count(str, \d+)|
+    +---------+------+----------------------+
+    |1a 2b 14m|   \d+|                     3|
+    +---------+------+----------------------+
+
+    >>> df.select('*', sf.regexp_count('str', sf.lit(r'mmm'))).show()
+    +---------+------+----------------------+
+    |      str|regexp|regexp_count(str, mmm)|
+    +---------+------+----------------------+
+    |1a 2b 14m|   \d+|                     0|
+    +---------+------+----------------------+
+
+    >>> df.select('*', sf.regexp_count("str", sf.col("regexp"))).show()
+    +---------+------+-------------------------+
+    |      str|regexp|regexp_count(str, regexp)|
+    +---------+------+-------------------------+
+    |1a 2b 14m|   \d+|                        3|
+    +---------+------+-------------------------+
+
+    >>> df.select('*', sf.regexp_count(sf.col('str'), "regexp")).show()
+    +---------+------+-------------------------+
+    |      str|regexp|regexp_count(str, regexp)|
+    +---------+------+-------------------------+
+    |1a 2b 14m|   \d+|                        3|
+    +---------+------+-------------------------+
     """
     return _invoke_function_over_columns("regexp_count", str, regexp)
 
@@ -14370,7 +15427,7 @@ def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column:
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         target column to work on.
     pattern : str
         regex pattern to apply.
@@ -14382,17 +15439,36 @@ def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column:
     :class:`~pyspark.sql.Column`
         matched value specified by `idx` group id.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.regexp_extract_all`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([('100-200',)], ['str'])
-    >>> df.select(regexp_extract('str', r'(\d+)-(\d+)', 1).alias('d')).collect()
-    [Row(d='100')]
+    >>> df.select('*', sf.regexp_extract('str', r'(\d+)-(\d+)', 1)).show()
+    +-------+-----------------------------------+
+    |    str|regexp_extract(str, (\d+)-(\d+), 1)|
+    +-------+-----------------------------------+
+    |100-200|                                100|
+    +-------+-----------------------------------+
+
     >>> df = spark.createDataFrame([('foo',)], ['str'])
-    >>> df.select(regexp_extract('str', r'(\d+)', 1).alias('d')).collect()
-    [Row(d='')]
+    >>> df.select('*', sf.regexp_extract('str', r'(\d+)', 1)).show()
+    +---+-----------------------------+
+    |str|regexp_extract(str, (\d+), 1)|
+    +---+-----------------------------+
+    |foo|                             |
+    +---+-----------------------------+
+
     >>> df = spark.createDataFrame([('aaaac',)], ['str'])
-    >>> df.select(regexp_extract('str', '(a+)(b)?(c)', 2).alias('d')).collect()
-    [Row(d='')]
+    >>> df.select('*', sf.regexp_extract(sf.col('str'), '(a+)(b)?(c)', 2)).show()
+    +-----+-----------------------------------+
+    |  str|regexp_extract(str, (a+)(b)?(c), 2)|
+    +-----+-----------------------------------+
+    |aaaac|                                   |
+    +-----+-----------------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -14412,11 +15488,11 @@ def regexp_extract_all(
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    regexp : :class:`~pyspark.sql.Column` or str
+    regexp : :class:`~pyspark.sql.Column` or column name
         regex pattern to apply.
-    idx : int, optional
+    idx : :class:`~pyspark.sql.Column` or int, optional
         matched group id.
 
     Returns
@@ -14424,17 +15500,48 @@ def regexp_extract_all(
     :class:`~pyspark.sql.Column`
         all strings in the `str` that match a Java regex and corresponding to the regex group index.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.regexp_extract`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("100-200, 300-400", r"(\d+)-(\d+)")], ["str", "regexp"])
-    >>> df.select(regexp_extract_all('str', lit(r'(\d+)-(\d+)')).alias('d')).collect()
-    [Row(d=['100', '300'])]
-    >>> df.select(regexp_extract_all('str', lit(r'(\d+)-(\d+)'), 1).alias('d')).collect()
-    [Row(d=['100', '300'])]
-    >>> df.select(regexp_extract_all('str', lit(r'(\d+)-(\d+)'), 2).alias('d')).collect()
-    [Row(d=['200', '400'])]
-    >>> df.select(regexp_extract_all('str', col("regexp")).alias('d')).collect()
-    [Row(d=['100', '300'])]
+    >>> df.select('*', sf.regexp_extract_all('str', sf.lit(r'(\d+)-(\d+)'))).show()
+    +----------------+-----------+---------------------------------------+
+    |             str|     regexp|regexp_extract_all(str, (\d+)-(\d+), 1)|
+    +----------------+-----------+---------------------------------------+
+    |100-200, 300-400|(\d+)-(\d+)|                             [100, 300]|
+    +----------------+-----------+---------------------------------------+
+
+    >>> df.select('*', sf.regexp_extract_all('str', sf.lit(r'(\d+)-(\d+)'), sf.lit(1))).show()
+    +----------------+-----------+---------------------------------------+
+    |             str|     regexp|regexp_extract_all(str, (\d+)-(\d+), 1)|
+    +----------------+-----------+---------------------------------------+
+    |100-200, 300-400|(\d+)-(\d+)|                             [100, 300]|
+    +----------------+-----------+---------------------------------------+
+
+    >>> df.select('*', sf.regexp_extract_all('str', sf.lit(r'(\d+)-(\d+)'), 2)).show()
+    +----------------+-----------+---------------------------------------+
+    |             str|     regexp|regexp_extract_all(str, (\d+)-(\d+), 2)|
+    +----------------+-----------+---------------------------------------+
+    |100-200, 300-400|(\d+)-(\d+)|                             [200, 400]|
+    +----------------+-----------+---------------------------------------+
+
+    >>> df.select('*', sf.regexp_extract_all('str', sf.col("regexp"))).show()
+    +----------------+-----------+----------------------------------+
+    |             str|     regexp|regexp_extract_all(str, regexp, 1)|
+    +----------------+-----------+----------------------------------+
+    |100-200, 300-400|(\d+)-(\d+)|                        [100, 300]|
+    +----------------+-----------+----------------------------------+
+
+    >>> df.select('*', sf.regexp_extract_all(sf.col('str'), "regexp")).show()
+    +----------------+-----------+----------------------------------+
+    |             str|     regexp|regexp_extract_all(str, regexp, 1)|
+    +----------------+-----------+----------------------------------+
+    |100-200, 300-400|(\d+)-(\d+)|                        [100, 300]|
+    +----------------+-----------+----------------------------------+
     """
     if idx is None:
         return _invoke_function_over_columns("regexp_extract_all", str, regexp)
@@ -14469,43 +15576,102 @@ def regexp_replace(
 
     Examples
     --------
-    >>> df = spark.createDataFrame([("100-200", r"(\d+)", "--")], ["str", "pattern", "replacement"])
-    >>> df.select(regexp_replace('str', r'(\d+)', '--').alias('d')).collect()
-    [Row(d='-----')]
-    >>> df.select(regexp_replace("str", col("pattern"), col("replacement")).alias('d')).collect()
-    [Row(d='-----')]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...      [("100-200", r"(\d+)", "--")],
+    ...      ["str", "pattern", "replacement"]
+    ... )
+
+    Example 1: Replaces all the substrings in the `str` column name that
+    match the regex pattern `(\d+)` (one or more digits) with the replacement
+    string "--".
+
+    >>> df.select('*', sf.regexp_replace('str', r'(\d+)', '--')).show()
+    +-------+-------+-----------+---------------------------------+
+    |    str|pattern|replacement|regexp_replace(str, (\d+), --, 1)|
+    +-------+-------+-----------+---------------------------------+
+    |100-200|  (\d+)|         --|                            -----|
+    +-------+-------+-----------+---------------------------------+
+
+    Example 2: Replaces all the substrings in the `str` Column that match
+    the regex pattern in the `pattern` Column with the string in the `replacement`
+    column.
+
+    >>> df.select('*', \
+    ...     sf.regexp_replace(sf.col("str"), sf.col("pattern"), sf.col("replacement")) \
+    ... ).show()
+    +-------+-------+-----------+--------------------------------------------+
+    |    str|pattern|replacement|regexp_replace(str, pattern, replacement, 1)|
+    +-------+-------+-----------+--------------------------------------------+
+    |100-200|  (\d+)|         --|                                       -----|
+    +-------+-------+-----------+--------------------------------------------+
     """
     return _invoke_function_over_columns("regexp_replace", string, lit(pattern), lit(replacement))
 
 
 @_try_remote_functions
 def regexp_substr(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
-    r"""Returns the substring that matches the Java regex `regexp` within the string `str`.
+    r"""Returns the first substring that matches the Java regex `regexp` within the string `str`.
     If the regular expression is not found, the result is null.
 
     .. versionadded:: 3.5.0
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    regexp : :class:`~pyspark.sql.Column` or str
+    regexp : :class:`~pyspark.sql.Column` or column name
         regex pattern to apply.
 
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        the substring that matches a Java regex within the string `str`.
+        the first substring that matches a Java regex within the string `str`.
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("1a 2b 14m", r"\d+")], ["str", "regexp"])
-    >>> df.select(regexp_substr('str', lit(r'\d+')).alias('d')).collect()
-    [Row(d='1')]
-    >>> df.select(regexp_substr('str', lit(r'mmm')).alias('d')).collect()
-    [Row(d=None)]
-    >>> df.select(regexp_substr("str", col("regexp")).alias('d')).collect()
-    [Row(d='1')]
+
+    Example 1: Returns the first substring in the `str` column name that
+    matches the regex pattern `(\d+)` (one or more digits).
+
+    >>> df.select('*', sf.regexp_substr('str', sf.lit(r'\d+'))).show()
+    +---------+------+-----------------------+
+    |      str|regexp|regexp_substr(str, \d+)|
+    +---------+------+-----------------------+
+    |1a 2b 14m|   \d+|                      1|
+    +---------+------+-----------------------+
+
+    Example 2: Returns the first substring in the `str` column name that
+    matches the regex pattern `(mmm)` (three consecutive 'm' characters)
+
+    >>> df.select('*', sf.regexp_substr('str', sf.lit(r'mmm'))).show()
+    +---------+------+-----------------------+
+    |      str|regexp|regexp_substr(str, mmm)|
+    +---------+------+-----------------------+
+    |1a 2b 14m|   \d+|                   NULL|
+    +---------+------+-----------------------+
+
+    Example 3: Returns the first substring in the `str` column name that
+    matches the regex pattern in `regexp` Column.
+
+    >>> df.select('*', sf.regexp_substr("str", sf.col("regexp"))).show()
+    +---------+------+--------------------------+
+    |      str|regexp|regexp_substr(str, regexp)|
+    +---------+------+--------------------------+
+    |1a 2b 14m|   \d+|                         1|
+    +---------+------+--------------------------+
+
+    Example 4: Returns the first substring in the `str` Column that
+    matches the regex pattern in `regexp` column name.
+
+    >>> df.select('*', sf.regexp_substr(sf.col("str"), "regexp")).show()
+    +---------+------+--------------------------+
+    |      str|regexp|regexp_substr(str, regexp)|
+    +---------+------+--------------------------+
+    |1a 2b 14m|   \d+|                         1|
+    +---------+------+--------------------------+
     """
     return _invoke_function_over_columns("regexp_substr", str, regexp)
 
@@ -14514,36 +15680,70 @@ def regexp_substr(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
 def regexp_instr(
     str: "ColumnOrName", regexp: "ColumnOrName", idx: Optional[Union[int, Column]] = None
 ) -> Column:
-    r"""Extract all strings in the `str` that match the Java regex `regexp`
+    r"""Returns the position of the first substring in the `str` that match the Java regex `regexp`
     and corresponding to the regex group index.
 
     .. versionadded:: 3.5.0
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    regexp : :class:`~pyspark.sql.Column` or str
+    regexp : :class:`~pyspark.sql.Column` or column name
         regex pattern to apply.
-    idx : int, optional
+    idx : :class:`~pyspark.sql.Column` or int, optional
         matched group id.
 
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        all strings in the `str` that match a Java regex and corresponding to the regex group index.
+        the position of the first substring in the `str` that match a Java regex and corresponding
+        to the regex group index.
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("1a 2b 14m", r"\d+(a|b|m)")], ["str", "regexp"])
-    >>> df.select(regexp_instr('str', lit(r'\d+(a|b|m)')).alias('d')).collect()
-    [Row(d=1)]
-    >>> df.select(regexp_instr('str', lit(r'\d+(a|b|m)'), 1).alias('d')).collect()
-    [Row(d=1)]
-    >>> df.select(regexp_instr('str', lit(r'\d+(a|b|m)'), 2).alias('d')).collect()
-    [Row(d=1)]
-    >>> df.select(regexp_instr('str', col("regexp")).alias('d')).collect()
-    [Row(d=1)]
+
+    Example 1: Returns the position of the first substring in the `str` column name that
+    match the regex pattern `(\d+(a|b|m))` (one or more digits followed by 'a', 'b', or 'm').
+
+    >>> df.select('*', sf.regexp_instr('str', sf.lit(r'\d+(a|b|m)'))).show()
+    +---------+----------+--------------------------------+
+    |      str|    regexp|regexp_instr(str, \d+(a|b|m), 0)|
+    +---------+----------+--------------------------------+
+    |1a 2b 14m|\d+(a|b|m)|                               1|
+    +---------+----------+--------------------------------+
+
+    Example 2: Returns the position of the first substring in the `str` column name that
+    match the regex pattern `(\d+(a|b|m))` (one or more digits followed by 'a', 'b', or 'm'),
+
+    >>> df.select('*', sf.regexp_instr('str', sf.lit(r'\d+(a|b|m)'), sf.lit(1))).show()
+    +---------+----------+--------------------------------+
+    |      str|    regexp|regexp_instr(str, \d+(a|b|m), 1)|
+    +---------+----------+--------------------------------+
+    |1a 2b 14m|\d+(a|b|m)|                               1|
+    +---------+----------+--------------------------------+
+
+    Example 3: Returns the position of the first substring in the `str` column name that
+    match the regex pattern in `regexp` Column.
+
+    >>> df.select('*', sf.regexp_instr('str', sf.col("regexp"))).show()
+    +---------+----------+----------------------------+
+    |      str|    regexp|regexp_instr(str, regexp, 0)|
+    +---------+----------+----------------------------+
+    |1a 2b 14m|\d+(a|b|m)|                           1|
+    +---------+----------+----------------------------+
+
+    Example 4: Returns the position of the first substring in the `str` Column that
+    match the regex pattern in `regexp` column name.
+
+    >>> df.select('*', sf.regexp_instr(sf.col("str"), "regexp")).show()
+    +---------+----------+----------------------------+
+    |      str|    regexp|regexp_instr(str, regexp, 0)|
+    +---------+----------+----------------------------+
+    |1a 2b 14m|\d+(a|b|m)|                           1|
+    +---------+----------+----------------------------+
     """
     if idx is None:
         return _invoke_function_over_columns("regexp_instr", str, regexp)
@@ -14562,7 +15762,7 @@ def initcap(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -14572,8 +15772,14 @@ def initcap(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> spark.createDataFrame([('ab cd',)], ['a']).select(initcap("a").alias('v')).collect()
-    [Row(v='Ab Cd')]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('ab cd',)], ['a'])
+    >>> df.select("*", sf.initcap("a")).show()
+    +-----+----------+
+    |    a|initcap(a)|
+    +-----+----------+
+    |ab cd|     Ab Cd|
+    +-----+----------+
     """
     return _invoke_function_over_columns("initcap", col)
 
@@ -14590,7 +15796,7 @@ def soundex(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -14600,9 +15806,15 @@ def soundex(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([("Peters",),("Uhrbach",)], ['name'])
-    >>> df.select(soundex(df.name).alias("soundex")).collect()
-    [Row(soundex='P362'), Row(soundex='U612')]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([("Peters",),("Uhrbach",)], ["s"])
+    >>> df.select("*", sf.soundex("s")).show()
+    +-------+----------+
+    |      s|soundex(s)|
+    +-------+----------+
+    | Peters|      P362|
+    |Uhrbach|      U612|
+    +-------+----------+
     """
     return _invoke_function_over_columns("soundex", col)
 
@@ -14618,7 +15830,7 @@ def bin(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -14628,9 +15840,22 @@ def bin(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([2,5], "INT")
-    >>> df.select(bin(df.value).alias('c')).collect()
-    [Row(c='10'), Row(c='101')]
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(10).select("*", sf.bin("id")).show()
+    +---+-------+
+    | id|bin(id)|
+    +---+-------+
+    |  0|      0|
+    |  1|      1|
+    |  2|     10|
+    |  3|     11|
+    |  4|    100|
+    |  5|    101|
+    |  6|    110|
+    |  7|    111|
+    |  8|   1000|
+    |  9|   1001|
+    +---+-------+
     """
     return _invoke_function_over_columns("bin", col)
 
@@ -14648,9 +15873,13 @@ def hex(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.unhex`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -14658,8 +15887,14 @@ def hex(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> spark.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect()
-    [Row(hex(a)='414243', hex(b)='3')]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('ABC', 3)], ['a', 'b'])
+    >>> df.select('*', sf.hex('a'), sf.hex(df.b)).show()
+    +---+---+------+------+
+    |  a|  b|hex(a)|hex(b)|
+    +---+---+------+------+
+    |ABC|  3|414243|     3|
+    +---+---+------+------+
     """
     return _invoke_function_over_columns("hex", col)
 
@@ -14676,9 +15911,13 @@ def unhex(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.hex`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -14686,8 +15925,14 @@ def unhex(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> spark.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect()
-    [Row(unhex(a)=bytearray(b'ABC'))]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('414243',)], ['a'])
+    >>> df.select('*', sf.unhex('a')).show()
+    +------+----------+
+    |     a|  unhex(a)|
+    +------+----------+
+    |414243|[41 42 43]|
+    +------+----------+
     """
     return _invoke_function_over_columns("unhex", col)
 
@@ -14722,14 +15967,22 @@ def uniform(
 
     Examples
     --------
-    >>> spark.createDataFrame([('3',)], ['a']) \\
-    ...    .select(uniform(lit(0), lit(10), lit(0)).alias('result')) \\
-    ...    .selectExpr("result < 15").show()
-    +-------------+
-    |(result < 15)|
-    +-------------+
-    |         true|
-    +-------------+
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(0, 10, 1, 1).select(sf.uniform(5, 105, 3)).show()
+    +------------------+
+    |uniform(5, 105, 3)|
+    +------------------+
+    |                30|
+    |                71|
+    |                99|
+    |                77|
+    |                16|
+    |                25|
+    |                89|
+    |                80|
+    |                51|
+    |                83|
+    +------------------+
     """
     min = _enum_to_value(min)
     min = lit(min)
@@ -15092,18 +16345,35 @@ def split_part(src: "ColumnOrName", delimiter: "ColumnOrName", partNum: "ColumnO
 
     Parameters
     ----------
-    src : :class:`~pyspark.sql.Column` or str
-        A column of string to be splited.
-    delimiter : :class:`~pyspark.sql.Column` or str
+    src : :class:`~pyspark.sql.Column` or column name
+        A column of string to be split.
+    delimiter : :class:`~pyspark.sql.Column` or column name
         A column of string, the delimiter used for split.
-    partNum : :class:`~pyspark.sql.Column` or str
+    partNum : :class:`~pyspark.sql.Column` or column name
         A column of string, requested part of the split (1-based).
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.sentences`
+    :meth:`pyspark.sql.functions.split`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("11.12.13", ".", 3,)], ["a", "b", "c"])
-    >>> df.select(split_part(df.a, df.b, df.c).alias('r')).collect()
-    [Row(r='13')]
+    >>> df.select("*", sf.split_part("a", "b", "c")).show()
+    +--------+---+---+-------------------+
+    |       a|  b|  c|split_part(a, b, c)|
+    +--------+---+---+-------------------+
+    |11.12.13|  .|  3|                 13|
+    +--------+---+---+-------------------+
+
+    >>> df.select("*", sf.split_part(df.a, df.b, sf.lit(-2))).show()
+    +--------+---+---+--------------------+
+    |       a|  b|  c|split_part(a, b, -2)|
+    +--------+---+---+--------------------+
+    |11.12.13|  .|  3|                  12|
+    +--------+---+---+--------------------+
     """
     return _invoke_function_over_columns("split_part", src, delimiter, partNum)
 
@@ -15120,34 +16390,42 @@ def substr(
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         A column of string.
-    pos : :class:`~pyspark.sql.Column` or str
+    pos : :class:`~pyspark.sql.Column` or column name
         A column of string, the substring of `str` that starts at `pos`.
-    len : :class:`~pyspark.sql.Column` or str, optional
+    len : :class:`~pyspark.sql.Column` or column name, optional
         A column of string, the substring of `str` is of length `len`.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        substring of given value.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.instr`
+    :meth:`pyspark.sql.functions.substring`
+    :meth:`pyspark.sql.functions.substring_index`
+    :meth:`pyspark.sql.Column.substr`
+
     Examples
     --------
-    >>> import pyspark.sql.functions as sf
-    >>> spark.createDataFrame(
-    ...     [("Spark SQL", 5, 1,)], ["a", "b", "c"]
-    ... ).select(sf.substr("a", "b", "c")).show()
-    +---------------+
-    |substr(a, b, c)|
-    +---------------+
-    |              k|
-    +---------------+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([("Spark SQL", 5, 1,)], ["a", "b", "c"])
+    >>> df.select("*", sf.substr("a", "b", "c")).show()
+    +---------+---+---+---------------+
+    |        a|  b|  c|substr(a, b, c)|
+    +---------+---+---+---------------+
+    |Spark SQL|  5|  1|              k|
+    +---------+---+---+---------------+
 
-    >>> import pyspark.sql.functions as sf
-    >>> spark.createDataFrame(
-    ...     [("Spark SQL", 5, 1,)], ["a", "b", "c"]
-    ... ).select(sf.substr("a", "b")).show()
-    +------------------------+
-    |substr(a, b, 2147483647)|
-    +------------------------+
-    |                   k SQL|
-    +------------------------+
+    >>> df.select("*", sf.substr(df.a, df.b)).show()
+    +---------+---+---+------------------------+
+    |        a|  b|  c|substr(a, b, 2147483647)|
+    +---------+---+---+------------------------+
+    |Spark SQL|  5|  1|                   k SQL|
+    +---------+---+---+------------------------+
     """
     if len is not None:
         return _invoke_function_over_columns("substr", str, pos, len)
@@ -16326,12 +17604,12 @@ def collation(col: "ColumnOrName") -> Column:
     Examples
     --------
     >>> df = spark.createDataFrame([('name',)], ['dt'])
-    >>> df.select(collation('dt').alias('collation')).show()
-    +-----------+
-    |  collation|
-    +-----------+
-    |UTF8_BINARY|
-    +-----------+
+    >>> df.select(collation('dt').alias('collation')).show(truncate=False)
+    +--------------------------+
+    |collation                 |
+    +--------------------------+
+    |SYSTEM.BUILTIN.UTF8_BINARY|
+    +--------------------------+
     """
     return _invoke_function_over_columns("collation", col)
 
@@ -16974,6 +18252,7 @@ def concat(*cols: "ColumnOrName") -> Column:
 
     See Also
     --------
+    :meth:`pyspark.sql.functions.concat_ws`
     :meth:`pyspark.sql.functions.array_join` : to concatenate string columns with delimiter
 
     Examples
@@ -18280,7 +19559,7 @@ def explode(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         Target column to work on.
 
     Returns
@@ -18293,6 +19572,8 @@ def explode(col: "ColumnOrName") -> Column:
     :meth:`pyspark.sql.functions.posexplode`
     :meth:`pyspark.sql.functions.explode_outer`
     :meth:`pyspark.sql.functions.posexplode_outer`
+    :meth:`pyspark.sql.functions.inline`
+    :meth:`pyspark.sql.functions.inline_outer`
 
     Notes
     -----
@@ -18302,119 +19583,79 @@ def explode(col: "ColumnOrName") -> Column:
     --------
     Example 1: Exploding an array column
 
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(id=1, values=[1, 2, 3])])
-    >>> df.select(sf.explode(df.values).alias("value")).show()
-    +-----+
-    |value|
-    +-----+
-    |    1|
-    |    2|
-    |    3|
-    +-----+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,ARRAY(1,2,3,NULL)), (2,ARRAY()), (3,NULL) AS t(i,a)')
+    >>> df.show()
+    +---+---------------+
+    |  i|              a|
+    +---+---------------+
+    |  1|[1, 2, 3, NULL]|
+    |  2|             []|
+    |  3|           NULL|
+    +---+---------------+
+
+    >>> df.select('*', sf.explode('a')).show()
+    +---+---------------+----+
+    |  i|              a| col|
+    +---+---------------+----+
+    |  1|[1, 2, 3, NULL]|   1|
+    |  1|[1, 2, 3, NULL]|   2|
+    |  1|[1, 2, 3, NULL]|   3|
+    |  1|[1, 2, 3, NULL]|NULL|
+    +---+---------------+----+
 
     Example 2: Exploding a map column
 
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(id=1, values={"a": "b", "c": "d"})])
-    >>> df.select(sf.explode(df.values).alias("key", "value")).show()
-    +---+-----+
-    |key|value|
-    +---+-----+
-    |  a|    b|
-    |  c|    d|
-    +---+-----+
-
-    Example 3: Exploding an array column with multiple rows
-
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame(
-    ...     [Row(id=1, values=[1, 2]), Row(id=2, values=[3, 4])])
-    >>> df.select("id", sf.explode(df.values).alias("value")).show()
-    +---+-----+
-    | id|value|
-    +---+-----+
-    |  1|    1|
-    |  1|    2|
-    |  2|    3|
-    |  2|    4|
-    +---+-----+
-
-    Example 4: Exploding a map column with multiple rows
-
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([
-    ...     Row(id=1, values={"a": "b", "c": "d"}),
-    ...     Row(id=2, values={"e": "f", "g": "h"})
-    ... ])
-    >>> df.select("id", sf.explode(df.values).alias("key", "value")).show()
-    +---+---+-----+
-    | id|key|value|
-    +---+---+-----+
-    |  1|  a|    b|
-    |  1|  c|    d|
-    |  2|  e|    f|
-    |  2|  g|    h|
-    +---+---+-----+
-
-    Example 5: Exploding multiple array columns
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,MAP(1,2,3,4,5,NULL)), (2,MAP()), (3,NULL) AS t(i,m)')
+    >>> df.show(truncate=False)
+    +---+---------------------------+
+    |i  |m                          |
+    +---+---------------------------+
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|
+    |2  |{}                         |
+    |3  |NULL                       |
+    +---+---------------------------+
 
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(a=1, list1=[1, 2], list2=[3, 4])])
-    >>> df.select(sf.explode(df.list1).alias("list1"), "list2") \\
-    ...     .select("list1", sf.explode(df.list2).alias("list2")).show()
-    +-----+-----+
-    |list1|list2|
-    +-----+-----+
-    |    1|    3|
-    |    1|    4|
-    |    2|    3|
-    |    2|    4|
-    +-----+-----+
+    >>> df.select('*', sf.explode('m')).show(truncate=False)
+    +---+---------------------------+---+-----+
+    |i  |m                          |key|value|
+    +---+---------------------------+---+-----+
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|1  |2    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|3  |4    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|5  |NULL |
+    +---+---------------------------+---+-----+
 
-    Example 6: Exploding an array of struct column
+    Example 3: Exploding multiple array columns
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame(
-    ...     [(1, [(1, 2), (3, 4)])],
-    ...     "id: int, structlist: array<struct<a:int,b:int>>")
-    >>> df = df.select(sf.explode(df.structlist).alias("struct"))
-    >>> df.select("struct.*").show()
+    >>> df = spark.sql('SELECT ARRAY(1,2) AS a1, ARRAY(3,4,5) AS a2')
+    >>> df.select(
+    ...     '*', sf.explode('a1').alias('v1')
+    ... ).select('*', sf.explode('a2').alias('v2')).show()
+    +------+---------+---+---+
+    |    a1|       a2| v1| v2|
+    +------+---------+---+---+
+    |[1, 2]|[3, 4, 5]|  1|  3|
+    |[1, 2]|[3, 4, 5]|  1|  4|
+    |[1, 2]|[3, 4, 5]|  1|  5|
+    |[1, 2]|[3, 4, 5]|  2|  3|
+    |[1, 2]|[3, 4, 5]|  2|  4|
+    |[1, 2]|[3, 4, 5]|  2|  5|
+    +------+---------+---+---+
+
+    Example 4: Exploding an array of struct column
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.sql('SELECT ARRAY(NAMED_STRUCT("a",1,"b",2), NAMED_STRUCT("a",3,"b",4)) AS a')
+    >>> df.select(sf.explode('a').alias("s")).select("s.*").show()
     +---+---+
     |  a|  b|
     +---+---+
     |  1|  2|
     |  3|  4|
     +---+---+
-
-    Example 7: Exploding an empty array column
-
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([(1, [])], "id: int, values: array<int>")
-    >>> df.select(sf.explode(df.values).alias("value")).show()
-    +-----+
-    |value|
-    +-----+
-    +-----+
-
-    Example 8: Exploding an empty map column
-
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([(1, {})], "id: int, values: map<int,int>")
-    >>> df.select(sf.explode(df.values).alias("key", "value")).show()
-    +---+-----+
-    |key|value|
-    +---+-----+
-    +---+-----+
-    """
+    """  # noqa: E501
     return _invoke_function_over_columns("explode", col)
 
 
@@ -18432,7 +19673,7 @@ def posexplode(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -18440,20 +19681,61 @@ def posexplode(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         one row per array item or map key value including positions as a separate column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.explode`
+    :meth:`pyspark.sql.functions.explode_outer`
+    :meth:`pyspark.sql.functions.posexplode_outer`
+    :meth:`pyspark.sql.functions.inline`
+    :meth:`pyspark.sql.functions.inline_outer`
+
     Examples
     --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
-    >>> df.select(posexplode(df.intlist)).collect()
-    [Row(pos=0, col=1), Row(pos=1, col=2), Row(pos=2, col=3)]
+    Example 1: Exploding an array column
 
-    >>> df.select(posexplode(df.mapfield)).show()
-    +---+---+-----+
-    |pos|key|value|
-    +---+---+-----+
-    |  0|  a|    b|
-    +---+---+-----+
-    """
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,ARRAY(1,2,3,NULL)), (2,ARRAY()), (3,NULL) AS t(i,a)')
+    >>> df.show()
+    +---+---------------+
+    |  i|              a|
+    +---+---------------+
+    |  1|[1, 2, 3, NULL]|
+    |  2|             []|
+    |  3|           NULL|
+    +---+---------------+
+
+    >>> df.select('*', sf.posexplode('a')).show()
+    +---+---------------+---+----+
+    |  i|              a|pos| col|
+    +---+---------------+---+----+
+    |  1|[1, 2, 3, NULL]|  0|   1|
+    |  1|[1, 2, 3, NULL]|  1|   2|
+    |  1|[1, 2, 3, NULL]|  2|   3|
+    |  1|[1, 2, 3, NULL]|  3|NULL|
+    +---+---------------+---+----+
+
+    Example 2: Exploding a map column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,MAP(1,2,3,4,5,NULL)), (2,MAP()), (3,NULL) AS t(i,m)')
+    >>> df.show(truncate=False)
+    +---+---------------------------+
+    |i  |m                          |
+    +---+---------------------------+
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|
+    |2  |{}                         |
+    |3  |NULL                       |
+    +---+---------------------------+
+
+    >>> df.select('*', sf.posexplode('m')).show(truncate=False)
+    +---+---------------------------+---+---+-----+
+    |i  |m                          |pos|key|value|
+    +---+---------------------------+---+---+-----+
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|0  |1  |2    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|1  |3  |4    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|2  |5  |NULL |
+    +---+---------------------------+---+---+-----+
+    """  # noqa: E501
     return _invoke_function_over_columns("posexplode", col)
 
 
@@ -18469,7 +19751,7 @@ def inline(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         Input column of values to explode.
 
     Returns
@@ -18480,6 +19762,9 @@ def inline(col: "ColumnOrName") -> Column:
     See Also
     --------
     :meth:`pyspark.sql.functions.explode`
+    :meth:`pyspark.sql.functions.explode_outer`
+    :meth:`pyspark.sql.functions.posexplode`
+    :meth:`pyspark.sql.functions.posexplode_outer`
     :meth:`pyspark.sql.functions.inline_outer`
 
     Examples
@@ -18487,102 +19772,89 @@ def inline(col: "ColumnOrName") -> Column:
     Example 1: Using inline with a single struct array column
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), Row(a=3, b=4)])])
-    >>> df.select(sf.inline(df.structlist)).show()
-    +---+---+
-    |  a|  b|
-    +---+---+
-    |  1|  2|
-    |  3|  4|
-    +---+---+
+    >>> df = spark.sql('SELECT ARRAY(NAMED_STRUCT("a",1,"b",2), NAMED_STRUCT("a",3,"b",4)) AS a')
+    >>> df.select('*', sf.inline(df.a)).show()
+    +----------------+---+---+
+    |               a|  a|  b|
+    +----------------+---+---+
+    |[{1, 2}, {3, 4}]|  1|  2|
+    |[{1, 2}, {3, 4}]|  3|  4|
+    +----------------+---+---+
 
     Example 2: Using inline with a column name
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), Row(a=3, b=4)])])
-    >>> df.select(sf.inline("structlist")).show()
-    +---+---+
-    |  a|  b|
-    +---+---+
-    |  1|  2|
-    |  3|  4|
-    +---+---+
+    >>> df = spark.sql('SELECT ARRAY(NAMED_STRUCT("a",1,"b",2), NAMED_STRUCT("a",3,"b",4)) AS a')
+    >>> df.select('*', sf.inline('a')).show()
+    +----------------+---+---+
+    |               a|  a|  b|
+    +----------------+---+---+
+    |[{1, 2}, {3, 4}]|  1|  2|
+    |[{1, 2}, {3, 4}]|  3|  4|
+    +----------------+---+---+
 
     Example 3: Using inline with an alias
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), Row(a=3, b=4)])])
-    >>> df.select(sf.inline("structlist").alias("c1", "c2")).show()
-    +---+---+
-    | c1| c2|
-    +---+---+
-    |  1|  2|
-    |  3|  4|
-    +---+---+
+    >>> df = spark.sql('SELECT ARRAY(NAMED_STRUCT("a",1,"b",2), NAMED_STRUCT("a",3,"b",4)) AS a')
+    >>> df.select('*', sf.inline('a').alias("c1", "c2")).show()
+    +----------------+---+---+
+    |               a| c1| c2|
+    +----------------+---+---+
+    |[{1, 2}, {3, 4}]|  1|  2|
+    |[{1, 2}, {3, 4}]|  3|  4|
+    +----------------+---+---+
 
     Example 4: Using inline with multiple struct array columns
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([
-    ...     Row(structlist1=[Row(a=1, b=2), Row(a=3, b=4)],
-    ...         structlist2=[Row(c=5, d=6), Row(c=7, d=8)])
-    ... ])
-    >>> df.select(sf.inline("structlist1"), "structlist2") \\
-    ...     .select("a", "b", sf.inline("structlist2")).show()
-    +---+---+---+---+
-    |  a|  b|  c|  d|
-    +---+---+---+---+
-    |  1|  2|  5|  6|
-    |  1|  2|  7|  8|
-    |  3|  4|  5|  6|
-    |  3|  4|  7|  8|
-    +---+---+---+---+
+    >>> df = spark.sql('SELECT ARRAY(NAMED_STRUCT("a",1,"b",2), NAMED_STRUCT("a",3,"b",4)) AS a1, ARRAY(NAMED_STRUCT("c",5,"d",6), NAMED_STRUCT("c",7,"d",8)) AS a2')
+    >>> df.select(
+    ...     '*', sf.inline('a1')
+    ... ).select('*', sf.inline('a2')).show()
+    +----------------+----------------+---+---+---+---+
+    |              a1|              a2|  a|  b|  c|  d|
+    +----------------+----------------+---+---+---+---+
+    |[{1, 2}, {3, 4}]|[{5, 6}, {7, 8}]|  1|  2|  5|  6|
+    |[{1, 2}, {3, 4}]|[{5, 6}, {7, 8}]|  1|  2|  7|  8|
+    |[{1, 2}, {3, 4}]|[{5, 6}, {7, 8}]|  3|  4|  5|  6|
+    |[{1, 2}, {3, 4}]|[{5, 6}, {7, 8}]|  3|  4|  7|  8|
+    +----------------+----------------+---+---+---+---+
 
     Example 5: Using inline with a nested struct array column
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([
-    ...     Row(structlist=Row(a=1, b=2, nested=[Row(c=3, d=4), Row(c=5, d=6)]))
-    ... ])
-    >>> df.select(sf.inline("structlist.nested")).show()
-    +---+---+
-    |  c|  d|
-    +---+---+
-    |  3|  4|
-    |  5|  6|
-    +---+---+
-
-    Example 6: Using inline with an empty struct array column
-
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame(
-    ...     [Row(structlist=[])], "structlist: array<struct<a:int,b:int>>")
-    >>> df.select(sf.inline(df.structlist)).show()
-    +---+---+
-    |  a|  b|
-    +---+---+
-    +---+---+
-
-    Example 7: Using inline with a struct array column containing null values
-
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), None, Row(a=3, b=4)])])
-    >>> df.select(sf.inline(df.structlist)).show()
-    +----+----+
-    |   a|   b|
-    +----+----+
-    |   1|   2|
-    |NULL|NULL|
-    |   3|   4|
-    +----+----+
-    """
+    >>> df = spark.sql('SELECT NAMED_STRUCT("a",1,"b",2,"c",ARRAY(NAMED_STRUCT("c",3,"d",4), NAMED_STRUCT("c",5,"d",6))) AS s')
+    >>> df.select('*', sf.inline('s.c')).show(truncate=False)
+    +------------------------+---+---+
+    |s                       |c  |d  |
+    +------------------------+---+---+
+    |{1, 2, [{3, 4}, {5, 6}]}|3  |4  |
+    |{1, 2, [{3, 4}, {5, 6}]}|5  |6  |
+    +------------------------+---+---+
+
+    Example 6: Using inline with a column containing: array continaing null, empty array and null
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,ARRAY(NAMED_STRUCT("a",1,"b",2), NULL, NAMED_STRUCT("a",3,"b",4))), (2,ARRAY()), (3,NULL) AS t(i,s)')
+    >>> df.show(truncate=False)
+    +---+----------------------+
+    |i  |s                     |
+    +---+----------------------+
+    |1  |[{1, 2}, NULL, {3, 4}]|
+    |2  |[]                    |
+    |3  |NULL                  |
+    +---+----------------------+
+
+    >>> df.select('*', sf.inline('s')).show(truncate=False)
+    +---+----------------------+----+----+
+    |i  |s                     |a   |b   |
+    +---+----------------------+----+----+
+    |1  |[{1, 2}, NULL, {3, 4}]|1   |2   |
+    |1  |[{1, 2}, NULL, {3, 4}]|NULL|NULL|
+    |1  |[{1, 2}, NULL, {3, 4}]|3   |4   |
+    +---+----------------------+----+----+
+    """  # noqa: E501
     return _invoke_function_over_columns("inline", col)
 
 
@@ -18601,7 +19873,7 @@ def explode_outer(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -18609,31 +19881,47 @@ def explode_outer(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         one row per array item or map key value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.explode`
+    :meth:`pyspark.sql.functions.posexplode`
+    :meth:`pyspark.sql.functions.posexplode_outer`
+    :meth:`pyspark.sql.functions.inline`
+    :meth:`pyspark.sql.functions.inline_outer`
+
     Examples
     --------
-    >>> df = spark.createDataFrame(
-    ...     [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
-    ...     ("id", "an_array", "a_map")
-    ... )
-    >>> df.select("id", "an_array", explode_outer("a_map")).show()
-    +---+----------+----+-----+
-    | id|  an_array| key|value|
-    +---+----------+----+-----+
-    |  1|[foo, bar]|   x|  1.0|
-    |  2|        []|NULL| NULL|
-    |  3|      NULL|NULL| NULL|
-    +---+----------+----+-----+
-
-    >>> df.select("id", "a_map", explode_outer("an_array")).show()
-    +---+----------+----+
-    | id|     a_map| col|
-    +---+----------+----+
-    |  1|{x -> 1.0}| foo|
-    |  1|{x -> 1.0}| bar|
-    |  2|        {}|NULL|
-    |  3|      NULL|NULL|
-    +---+----------+----+
-    """
+    Example 1: Using an array column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,ARRAY(1,2,3,NULL)), (2,ARRAY()), (3,NULL) AS t(i,a)')
+    >>> df.select('*', sf.explode_outer('a')).show()
+    +---+---------------+----+
+    |  i|              a| col|
+    +---+---------------+----+
+    |  1|[1, 2, 3, NULL]|   1|
+    |  1|[1, 2, 3, NULL]|   2|
+    |  1|[1, 2, 3, NULL]|   3|
+    |  1|[1, 2, 3, NULL]|NULL|
+    |  2|             []|NULL|
+    |  3|           NULL|NULL|
+    +---+---------------+----+
+
+    Example 2: Using a map column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,MAP(1,2,3,4,5,NULL)), (2,MAP()), (3,NULL) AS t(i,m)')
+    >>> df.select('*', sf.explode_outer('m')).show(truncate=False)
+    +---+---------------------------+----+-----+
+    |i  |m                          |key |value|
+    +---+---------------------------+----+-----+
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|1   |2    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|3   |4    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|5   |NULL |
+    |2  |{}                         |NULL|NULL |
+    |3  |NULL                       |NULL|NULL |
+    +---+---------------------------+----+-----+
+    """  # noqa: E501
     return _invoke_function_over_columns("explode_outer", col)
 
 
@@ -18652,7 +19940,7 @@ def posexplode_outer(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -18660,30 +19948,47 @@ def posexplode_outer(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         one row per array item or map key value including positions as a separate column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.explode`
+    :meth:`pyspark.sql.functions.explode_outer`
+    :meth:`pyspark.sql.functions.posexplode`
+    :meth:`pyspark.sql.functions.inline`
+    :meth:`pyspark.sql.functions.inline_outer`
+
     Examples
     --------
-    >>> df = spark.createDataFrame(
-    ...     [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
-    ...     ("id", "an_array", "a_map")
-    ... )
-    >>> df.select("id", "an_array", posexplode_outer("a_map")).show()
-    +---+----------+----+----+-----+
-    | id|  an_array| pos| key|value|
-    +---+----------+----+----+-----+
-    |  1|[foo, bar]|   0|   x|  1.0|
-    |  2|        []|NULL|NULL| NULL|
-    |  3|      NULL|NULL|NULL| NULL|
-    +---+----------+----+----+-----+
-    >>> df.select("id", "a_map", posexplode_outer("an_array")).show()
-    +---+----------+----+----+
-    | id|     a_map| pos| col|
-    +---+----------+----+----+
-    |  1|{x -> 1.0}|   0| foo|
-    |  1|{x -> 1.0}|   1| bar|
-    |  2|        {}|NULL|NULL|
-    |  3|      NULL|NULL|NULL|
-    +---+----------+----+----+
-    """
+    Example 1: Using an array column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,ARRAY(1,2,3,NULL)), (2,ARRAY()), (3,NULL) AS t(i,a)')
+    >>> df.select('*', sf.posexplode_outer('a')).show()
+    +---+---------------+----+----+
+    |  i|              a| pos| col|
+    +---+---------------+----+----+
+    |  1|[1, 2, 3, NULL]|   0|   1|
+    |  1|[1, 2, 3, NULL]|   1|   2|
+    |  1|[1, 2, 3, NULL]|   2|   3|
+    |  1|[1, 2, 3, NULL]|   3|NULL|
+    |  2|             []|NULL|NULL|
+    |  3|           NULL|NULL|NULL|
+    +---+---------------+----+----+
+
+    Example 2: Using a map column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,MAP(1,2,3,4,5,NULL)), (2,MAP()), (3,NULL) AS t(i,m)')
+    >>> df.select('*', sf.posexplode_outer('m')).show(truncate=False)
+    +---+---------------------------+----+----+-----+
+    |i  |m                          |pos |key |value|
+    +---+---------------------------+----+----+-----+
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|0   |1   |2    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|1   |3   |4    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|2   |5   |NULL |
+    |2  |{}                         |NULL|NULL|NULL |
+    |3  |NULL                       |NULL|NULL|NULL |
+    +---+---------------------------+----+----+-----+
+    """  # noqa: E501
     return _invoke_function_over_columns("posexplode_outer", col)
 
 
@@ -18697,7 +20002,7 @@ def inline_outer(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         input column of values to explode.
 
     Returns
@@ -18707,7 +20012,10 @@ def inline_outer(col: "ColumnOrName") -> Column:
 
     See Also
     --------
+    :meth:`pyspark.sql.functions.explode`
     :meth:`pyspark.sql.functions.explode_outer`
+    :meth:`pyspark.sql.functions.posexplode`
+    :meth:`pyspark.sql.functions.posexplode_outer`
     :meth:`pyspark.sql.functions.inline`
 
     Notes
@@ -18716,20 +20024,27 @@ def inline_outer(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([
-    ...     Row(id=1, structlist=[Row(a=1, b=2), Row(a=3, b=4)]),
-    ...     Row(id=2, structlist=[])
-    ... ])
-    >>> df.select('id', inline_outer(df.structlist)).show()
-    +---+----+----+
-    | id|   a|   b|
-    +---+----+----+
-    |  1|   1|   2|
-    |  1|   3|   4|
-    |  2|NULL|NULL|
-    +---+----+----+
-    """
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,ARRAY(NAMED_STRUCT("a",1,"b",2), NULL, NAMED_STRUCT("a",3,"b",4))), (2,ARRAY()), (3,NULL) AS t(i,s)')
+    >>> df.printSchema()
+    root
+     |-- i: integer (nullable = false)
+     |-- s: array (nullable = true)
+     |    |-- element: struct (containsNull = true)
+     |    |    |-- a: integer (nullable = false)
+     |    |    |-- b: integer (nullable = false)
+
+    >>> df.select('*', sf.inline_outer('s')).show(truncate=False)
+    +---+----------------------+----+----+
+    |i  |s                     |a   |b   |
+    +---+----------------------+----+----+
+    |1  |[{1, 2}, NULL, {3, 4}]|1   |2   |
+    |1  |[{1, 2}, NULL, {3, 4}]|NULL|NULL|
+    |1  |[{1, 2}, NULL, {3, 4}]|3   |4   |
+    |2  |[]                    |NULL|NULL|
+    |3  |NULL                  |NULL|NULL|
+    +---+----------------------+----+----+
+    """  # noqa: E501
     return _invoke_function_over_columns("inline_outer", col)
 
 
@@ -18817,7 +20132,7 @@ def from_json(
     """
     Parses a column containing a JSON string into a :class:`MapType` with :class:`StringType`
     as keys type, :class:`StructType` or :class:`ArrayType` with
-    the specified schema. Returns `null`, in the case of an unparseable string.
+    the specified schema. Returns `null`, in the case of an unparsable string.
 
     .. versionadded:: 2.1.0
 
@@ -19429,7 +20744,7 @@ def from_xml(
 ) -> Column:
     """
     Parses a column containing a XML string to a row with
-    the specified schema. Returns `null`, in the case of an unparseable string.
+    the specified schema. Returns `null`, in the case of an unparsable string.
 
     .. versionadded:: 4.0.0
 
@@ -21823,7 +23138,7 @@ def transform_keys(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        a new map of enties where new keys were calculated by applying given function to
+        a new map of entries where new keys were calculated by applying given function to
         each key value argument.
 
     Examples
@@ -21863,7 +23178,7 @@ def transform_values(col: "ColumnOrName", f: Callable[[Column, Column], Column])
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        a new map of enties where new values were calculated by applying given function to
+        a new map of entries where new values were calculated by applying given function to
         each key value argument.
 
     Examples
@@ -22314,7 +23629,7 @@ def convert_timezone(
         the current session time zone is used as the source time zone.
     targetTz : :class:`~pyspark.sql.Column`
         The time zone to which the input timestamp should be converted.
-    sourceTs : :class:`~pyspark.sql.Column`
+    sourceTs : :class:`~pyspark.sql.Column` or column name
         A timestamp without time zone.
 
     Returns
@@ -22322,35 +23637,43 @@ def convert_timezone(
     :class:`~pyspark.sql.Column`
         A new column that contains a timestamp for converted time zone.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.current_timezone`
+
     Examples
     --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
-    Example 1: Converts the timestamp without time zone `sourceTs`,
-        the source time zone `sourceTz` is None.
+    Example 1: Converts the timestamp without time zone `sourceTs`.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(sf.convert_timezone(   # doctest: +SKIP
-    ...     None, sf.lit('Asia/Hong_Kong'), 'dt')
-    ... ).show()
-    +--------------------------------------------------------+
-    |convert_timezone(current_timezone(), Asia/Hong_Kong, dt)|
-    +--------------------------------------------------------+
-    |                                     2015-04-08 00:00:00|
-    +--------------------------------------------------------+
+    >>> df = spark.createDataFrame([('2015-04-08 00:00:00',)], ['ts'])
+    >>> df.select(
+    ...     '*',
+    ...     sf.convert_timezone(None, sf.lit('Asia/Hong_Kong'), 'ts')
+    ... ).show() # doctest: +SKIP
+    +-------------------+--------------------------------------------------------+
+    |                 ts|convert_timezone(current_timezone(), Asia/Hong_Kong, ts)|
+    +-------------------+--------------------------------------------------------+
+    |2015-04-08 00:00:00|                                     2015-04-08 15:00:00|
+    +-------------------+--------------------------------------------------------+
 
-    Example 2: Converts the timestamp without time zone `sourceTs`.
+    Example 2: Converts the timestamp with time zone `sourceTs`.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(sf.convert_timezone(
-    ...     sf.lit('America/Los_Angeles'), sf.lit('Asia/Hong_Kong'), 'dt')
+    >>> df = spark.createDataFrame([('2015-04-08 15:00:00',)], ['ts'])
+    >>> df.select(
+    ...     '*',
+    ...     sf.convert_timezone(sf.lit('Asia/Hong_Kong'), sf.lit('America/Los_Angeles'), df.ts)
     ... ).show()
-    +---------------------------------------------------------+
-    |convert_timezone(America/Los_Angeles, Asia/Hong_Kong, dt)|
-    +---------------------------------------------------------+
-    |                                      2015-04-08 15:00:00|
-    +---------------------------------------------------------+
+    +-------------------+---------------------------------------------------------+
+    |                 ts|convert_timezone(Asia/Hong_Kong, America/Los_Angeles, ts)|
+    +-------------------+---------------------------------------------------------+
+    |2015-04-08 15:00:00|                                      2015-04-08 00:00:00|
+    +-------------------+---------------------------------------------------------+
+
+    >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     if sourceTz is None:
         return _invoke_function_over_columns("convert_timezone", targetTz, sourceTs)
@@ -22372,13 +23695,13 @@ def make_dt_interval(
 
     Parameters
     ----------
-    days : :class:`~pyspark.sql.Column` or str, optional
+    days : :class:`~pyspark.sql.Column` or column name, optional
         The number of days, positive or negative.
-    hours : :class:`~pyspark.sql.Column` or str, optional
+    hours : :class:`~pyspark.sql.Column` or column name, optional
         The number of hours, positive or negative.
-    mins : :class:`~pyspark.sql.Column` or str, optional
+    mins : :class:`~pyspark.sql.Column` or column name, optional
         The number of minutes, positive or negative.
-    secs : :class:`~pyspark.sql.Column` or str, optional
+    secs : :class:`~pyspark.sql.Column` or column name, optional
         The number of seconds with the fractional part in microsecond precision.
 
     Returns
@@ -22386,63 +23709,62 @@ def make_dt_interval(
     :class:`~pyspark.sql.Column`
         A new column that contains a DayTimeIntervalType duration.
 
-    Examples
+    See Also
     --------
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.make_ym_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
 
+    Examples
+    --------
     Example 1: Make DayTimeIntervalType duration from days, hours, mins and secs.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
-    ...     ["day", "hour", "min", "sec"])
-    >>> df.select(sf.make_dt_interval(df.day, df.hour, df.min, df.sec)).show(truncate=False)
-    +------------------------------------------+
-    |make_dt_interval(day, hour, min, sec)     |
-    +------------------------------------------+
-    |INTERVAL '1 12:30:01.001001' DAY TO SECOND|
-    +------------------------------------------+
+    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]], ['day', 'hour', 'min', 'sec'])
+    >>> df.select('*', sf.make_dt_interval(df.day, df.hour, df.min, df.sec)).show(truncate=False)
+    +---+----+---+--------+------------------------------------------+
+    |day|hour|min|sec     |make_dt_interval(day, hour, min, sec)     |
+    +---+----+---+--------+------------------------------------------+
+    |1  |12  |30 |1.001001|INTERVAL '1 12:30:01.001001' DAY TO SECOND|
+    +---+----+---+--------+------------------------------------------+
 
     Example 2: Make DayTimeIntervalType duration from days, hours and mins.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
-    ...     ["day", "hour", "min", "sec"])
-    >>> df.select(sf.make_dt_interval(df.day, df.hour, df.min)).show(truncate=False)
-    +-----------------------------------+
-    |make_dt_interval(day, hour, min, 0)|
-    +-----------------------------------+
-    |INTERVAL '1 12:30:00' DAY TO SECOND|
-    +-----------------------------------+
+    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]], ['day', 'hour', 'min', 'sec'])
+    >>> df.select('*', sf.make_dt_interval(df.day, 'hour', df.min)).show(truncate=False)
+    +---+----+---+--------+-----------------------------------+
+    |day|hour|min|sec     |make_dt_interval(day, hour, min, 0)|
+    +---+----+---+--------+-----------------------------------+
+    |1  |12  |30 |1.001001|INTERVAL '1 12:30:00' DAY TO SECOND|
+    +---+----+---+--------+-----------------------------------+
 
     Example 3: Make DayTimeIntervalType duration from days and hours.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
-    ...     ["day", "hour", "min", "sec"])
-    >>> df.select(sf.make_dt_interval(df.day, df.hour)).show(truncate=False)
-    +-----------------------------------+
-    |make_dt_interval(day, hour, 0, 0)  |
-    +-----------------------------------+
-    |INTERVAL '1 12:00:00' DAY TO SECOND|
-    +-----------------------------------+
+    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]], ['day', 'hour', 'min', 'sec'])
+    >>> df.select('*', sf.make_dt_interval(df.day, df.hour)).show(truncate=False)
+    +---+----+---+--------+-----------------------------------+
+    |day|hour|min|sec     |make_dt_interval(day, hour, 0, 0)  |
+    +---+----+---+--------+-----------------------------------+
+    |1  |12  |30 |1.001001|INTERVAL '1 12:00:00' DAY TO SECOND|
+    +---+----+---+--------+-----------------------------------+
 
     Example 4: Make DayTimeIntervalType duration from days.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
-    ...     ["day", "hour", "min", "sec"])
-    >>> df.select(sf.make_dt_interval(df.day)).show(truncate=False)
-    +-----------------------------------+
-    |make_dt_interval(day, 0, 0, 0)     |
-    +-----------------------------------+
-    |INTERVAL '1 00:00:00' DAY TO SECOND|
-    +-----------------------------------+
+    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]], ['day', 'hour', 'min', 'sec'])
+    >>> df.select('*', sf.make_dt_interval('day')).show(truncate=False)
+    +---+----+---+--------+-----------------------------------+
+    |day|hour|min|sec     |make_dt_interval(day, 0, 0, 0)     |
+    +---+----+---+--------+-----------------------------------+
+    |1  |12  |30 |1.001001|INTERVAL '1 00:00:00' DAY TO SECOND|
+    +---+----+---+--------+-----------------------------------+
 
-    Example 5: Make DayTimeIntervalType duration.
+    Example 5: Make empty interval.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
-    ...     ["day", "hour", "min", "sec"])
-    >>> df.select(sf.make_dt_interval()).show(truncate=False)
+    >>> spark.range(1).select(sf.make_dt_interval()).show(truncate=False)
     +-----------------------------------+
     |make_dt_interval(0, 0, 0, 0)       |
     +-----------------------------------+
@@ -22474,19 +23796,19 @@ def try_make_interval(
 
     Parameters
     ----------
-    years : :class:`~pyspark.sql.Column` or str, optional
+    years : :class:`~pyspark.sql.Column` or column name, optional
         The number of years, positive or negative.
-    months : :class:`~pyspark.sql.Column` or str, optional
+    months : :class:`~pyspark.sql.Column` or column name, optional
         The number of months, positive or negative.
-    weeks : :class:`~pyspark.sql.Column` or str, optional
+    weeks : :class:`~pyspark.sql.Column` or column name, optional
         The number of weeks, positive or negative.
-    days : :class:`~pyspark.sql.Column` or str, optional
+    days : :class:`~pyspark.sql.Column` or column name, optional
         The number of days, positive or negative.
-    hours : :class:`~pyspark.sql.Column` or str, optional
+    hours : :class:`~pyspark.sql.Column` or column name, optional
         The number of hours, positive or negative.
-    mins : :class:`~pyspark.sql.Column` or str, optional
+    mins : :class:`~pyspark.sql.Column` or column name, optional
         The number of minutes, positive or negative.
-    secs : :class:`~pyspark.sql.Column` or str, optional
+    secs : :class:`~pyspark.sql.Column` or column name, optional
         The number of seconds with the fractional part in microsecond precision.
 
     Returns
@@ -22494,16 +23816,21 @@ def try_make_interval(
     :class:`~pyspark.sql.Column`
         A new column that contains an interval.
 
-    Examples
+    See Also
     --------
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.make_dt_interval`
+    :meth:`pyspark.sql.functions.make_ym_interval`
 
+    Examples
+    --------
     Example 1: Try make interval from years, months, weeks, days, hours, mins and secs.
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.try_make_interval(
-    ...     df.year, df.month, df.week, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.try_make_interval(df.year, df.month, 'week', df.day, 'hour', df.min, df.sec)
     ... ).show(truncate=False)
     +---------------------------------------------------------------+
     |try_make_interval(year, month, week, day, hour, min, sec)      |
@@ -22515,9 +23842,9 @@ def try_make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.try_make_interval(
-    ...     df.year, df.month, df.week, df.day, df.hour, df.min)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.try_make_interval(df.year, df.month, 'week', df.day, df.hour, df.min)
     ... ).show(truncate=False)
     +-------------------------------------------------------+
     |try_make_interval(year, month, week, day, hour, min, 0)|
@@ -22529,9 +23856,9 @@ def try_make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.try_make_interval(
-    ...     df.year, df.month, df.week, df.day, df.hour)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.try_make_interval(df.year, df.month, 'week', df.day, df.hour)
     ... ).show(truncate=False)
     +-----------------------------------------------------+
     |try_make_interval(year, month, week, day, hour, 0, 0)|
@@ -22543,8 +23870,8 @@ def try_make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.try_make_interval(df.year, df.month, df.week, df.day)).show(truncate=False)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(sf.try_make_interval(df.year, 'month', df.week, df.day)).show(truncate=False)
     +--------------------------------------------------+
     |try_make_interval(year, month, week, day, 0, 0, 0)|
     +--------------------------------------------------+
@@ -22555,8 +23882,8 @@ def try_make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.try_make_interval(df.year, df.month, df.week)).show(truncate=False)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(sf.try_make_interval(df.year, 'month', df.week)).show(truncate=False)
     +------------------------------------------------+
     |try_make_interval(year, month, week, 0, 0, 0, 0)|
     +------------------------------------------------+
@@ -22567,8 +23894,8 @@ def try_make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.try_make_interval(df.year, df.month)).show(truncate=False)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(sf.try_make_interval(df.year, 'month')).show(truncate=False)
     +---------------------------------------------+
     |try_make_interval(year, month, 0, 0, 0, 0, 0)|
     +---------------------------------------------+
@@ -22579,7 +23906,7 @@ def try_make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
     >>> df.select(sf.try_make_interval(df.year)).show(truncate=False)
     +-----------------------------------------+
     |try_make_interval(year, 0, 0, 0, 0, 0, 0)|
@@ -22587,18 +23914,25 @@ def try_make_interval(
     |100 years                                |
     +-----------------------------------------+
 
-    Example 8: Try make interval from years with overflow.
+    Example 8: Try make empty interval.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[2147483647, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.try_make_interval(df.year)).show(truncate=False)
-    +-----------------------------------------+
-    |try_make_interval(year, 0, 0, 0, 0, 0, 0)|
-    +-----------------------------------------+
-    |NULL                                     |
-    +-----------------------------------------+
+    >>> spark.range(1).select(sf.try_make_interval()).show(truncate=False)
+    +--------------------------------------+
+    |try_make_interval(0, 0, 0, 0, 0, 0, 0)|
+    +--------------------------------------+
+    |0 seconds                             |
+    +--------------------------------------+
+
+    Example 9: Try make interval from years with overflow.
 
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.try_make_interval(sf.lit(2147483647))).show(truncate=False)
+    +-----------------------------------------------+
+    |try_make_interval(2147483647, 0, 0, 0, 0, 0, 0)|
+    +-----------------------------------------------+
+    |NULL                                           |
+    +-----------------------------------------------+
     """
     _years = lit(0) if years is None else years
     _months = lit(0) if months is None else months
@@ -22629,19 +23963,19 @@ def make_interval(
 
     Parameters
     ----------
-    years : :class:`~pyspark.sql.Column` or str, optional
+    years : :class:`~pyspark.sql.Column` or column name, optional
         The number of years, positive or negative.
-    months : :class:`~pyspark.sql.Column` or str, optional
+    months : :class:`~pyspark.sql.Column` or column name, optional
         The number of months, positive or negative.
-    weeks : :class:`~pyspark.sql.Column` or str, optional
+    weeks : :class:`~pyspark.sql.Column` or column name, optional
         The number of weeks, positive or negative.
-    days : :class:`~pyspark.sql.Column` or str, optional
+    days : :class:`~pyspark.sql.Column` or column name, optional
         The number of days, positive or negative.
-    hours : :class:`~pyspark.sql.Column` or str, optional
+    hours : :class:`~pyspark.sql.Column` or column name, optional
         The number of hours, positive or negative.
-    mins : :class:`~pyspark.sql.Column` or str, optional
+    mins : :class:`~pyspark.sql.Column` or column name, optional
         The number of minutes, positive or negative.
-    secs : :class:`~pyspark.sql.Column` or str, optional
+    secs : :class:`~pyspark.sql.Column` or column name, optional
         The number of seconds with the fractional part in microsecond precision.
 
     Returns
@@ -22649,16 +23983,21 @@ def make_interval(
     :class:`~pyspark.sql.Column`
         A new column that contains an interval.
 
-    Examples
+    See Also
     --------
+    :meth:`pyspark.sql.functions.make_dt_interval`
+    :meth:`pyspark.sql.functions.make_ym_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
 
+    Examples
+    --------
     Example 1: Make interval from years, months, weeks, days, hours, mins and secs.
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_interval(
-    ...     df.year, df.month, df.week, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.make_interval(df.year, df.month, 'week', df.day, df.hour, df.min, df.sec)
     ... ).show(truncate=False)
     +---------------------------------------------------------------+
     |make_interval(year, month, week, day, hour, min, sec)          |
@@ -22670,9 +24009,9 @@ def make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_interval(
-    ...     df.year, df.month, df.week, df.day, df.hour, df.min)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.make_interval(df.year, df.month, 'week', df.day, df.hour, df.min)
     ... ).show(truncate=False)
     +---------------------------------------------------+
     |make_interval(year, month, week, day, hour, min, 0)|
@@ -22684,9 +24023,9 @@ def make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_interval(
-    ...     df.year, df.month, df.week, df.day, df.hour)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.make_interval(df.year, df.month, 'week', df.day, df.hour)
     ... ).show(truncate=False)
     +-------------------------------------------------+
     |make_interval(year, month, week, day, hour, 0, 0)|
@@ -22698,8 +24037,8 @@ def make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_interval(df.year, df.month, df.week, df.day)).show(truncate=False)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(sf.make_interval(df.year, df.month, 'week', df.day)).show(truncate=False)
     +----------------------------------------------+
     |make_interval(year, month, week, day, 0, 0, 0)|
     +----------------------------------------------+
@@ -22710,8 +24049,8 @@ def make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_interval(df.year, df.month, df.week)).show(truncate=False)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(sf.make_interval(df.year, df.month, 'week')).show(truncate=False)
     +--------------------------------------------+
     |make_interval(year, month, week, 0, 0, 0, 0)|
     +--------------------------------------------+
@@ -22722,7 +24061,7 @@ def make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
     >>> df.select(sf.make_interval(df.year, df.month)).show(truncate=False)
     +-----------------------------------------+
     |make_interval(year, month, 0, 0, 0, 0, 0)|
@@ -22734,7 +24073,7 @@ def make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
     >>> df.select(sf.make_interval(df.year)).show(truncate=False)
     +-------------------------------------+
     |make_interval(year, 0, 0, 0, 0, 0, 0)|
@@ -22742,12 +24081,10 @@ def make_interval(
     |100 years                            |
     +-------------------------------------+
 
-    Example 8: Make interval.
+    Example 8: Make empty interval.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_interval()).show(truncate=False)
+    >>> spark.range(1).select(sf.make_interval()).show(truncate=False)
     +----------------------------------+
     |make_interval(0, 0, 0, 0, 0, 0, 0)|
     +----------------------------------+
@@ -22786,22 +24123,22 @@ def make_timestamp(
 
     Parameters
     ----------
-    years : :class:`~pyspark.sql.Column` or str
+    years : :class:`~pyspark.sql.Column` or column name
         The year to represent, from 1 to 9999
-    months : :class:`~pyspark.sql.Column` or str
+    months : :class:`~pyspark.sql.Column` or column name
         The month-of-year to represent, from 1 (January) to 12 (December)
-    days : :class:`~pyspark.sql.Column` or str
+    days : :class:`~pyspark.sql.Column` or column name
         The day-of-month to represent, from 1 to 31
-    hours : :class:`~pyspark.sql.Column` or str
+    hours : :class:`~pyspark.sql.Column` or column name
         The hour-of-day to represent, from 0 to 23
-    mins : :class:`~pyspark.sql.Column` or str
+    mins : :class:`~pyspark.sql.Column` or column name
         The minute-of-hour to represent, from 0 to 59
-    secs : :class:`~pyspark.sql.Column` or str
+    secs : :class:`~pyspark.sql.Column` or column name
         The second-of-minute and its micro-fraction to represent, from 0 to 60.
         The value can be either an integer like 13 , or a fraction like 13.123.
         If the sec argument equals to 60, the seconds field is set
         to 0 and 1 minute is added to the final timestamp.
-    timezone : :class:`~pyspark.sql.Column` or str, optional
+    timezone : :class:`~pyspark.sql.Column` or column name, optional
         The time zone identifier. For example, CET, UTC and etc.
 
     Returns
@@ -22809,38 +24146,48 @@ def make_timestamp(
     :class:`~pyspark.sql.Column`
         A new column that contains a timestamp.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.try_make_timestamp`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
+
     Examples
     --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
     Example 1: Make timestamp from years, months, days, hours, mins and secs.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.make_timestamp(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec, df.timezone)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.make_timestamp(df.year, df.month, df.day, 'hour', df.min, df.sec, 'tz')
     ... ).show(truncate=False)
-    +----------------------------------------------------------+
-    |make_timestamp(year, month, day, hour, min, sec, timezone)|
-    +----------------------------------------------------------+
-    |2014-12-27 21:30:45.887                                   |
-    +----------------------------------------------------------+
+    +----------------------------------------------------+
+    |make_timestamp(year, month, day, hour, min, sec, tz)|
+    +----------------------------------------------------+
+    |2014-12-27 21:30:45.887                             |
+    +----------------------------------------------------+
 
     Example 2: Make timestamp without timezone.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.make_timestamp(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.make_timestamp(df.year, df.month, df.day, 'hour', df.min, df.sec)
     ... ).show(truncate=False)
     +------------------------------------------------+
     |make_timestamp(year, month, day, hour, min, sec)|
     +------------------------------------------------+
     |2014-12-28 06:30:45.887                         |
     +------------------------------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     if timezone is not None:
@@ -22895,17 +24242,27 @@ def try_make_timestamp(
     :class:`~pyspark.sql.Column`
         A new column that contains a timestamp or NULL in case of an error.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_timestamp`
+    :meth:`pyspark.sql.functions.make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
+
     Examples
     --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
     Example 1: Make timestamp from years, months, days, hours, mins and secs.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.try_make_timestamp(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec, df.timezone)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.try_make_timestamp(df.year, df.month, df.day, 'hour', df.min, df.sec, 'tz')
     ... ).show(truncate=False)
     +----------------------------------------------------+
     |try_make_timestamp(year, month, day, hour, min, sec)|
@@ -22916,11 +24273,10 @@ def try_make_timestamp(
     Example 2: Make timestamp without timezone.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.try_make_timestamp(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.try_make_timestamp(df.year, df.month, df.day, 'hour', df.min, df.sec)
     ... ).show(truncate=False)
     +----------------------------------------------------+
     |try_make_timestamp(year, month, day, hour, min, sec)|
@@ -22932,17 +24288,17 @@ def try_make_timestamp(
     Example 3: Make timestamp with invalid input.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 13, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.try_make_timestamp(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.try_make_timestamp(df.year, df.month, df.day, 'hour', df.min, df.sec)
     ... ).show(truncate=False)
     +----------------------------------------------------+
     |try_make_timestamp(year, month, day, hour, min, sec)|
     +----------------------------------------------------+
     |NULL                                                |
     +----------------------------------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     if timezone is not None:
@@ -22997,38 +24353,48 @@ def make_timestamp_ltz(
     :class:`~pyspark.sql.Column`
         A new column that contains a current timestamp.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_timestamp`
+    :meth:`pyspark.sql.functions.make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.try_make_timestamp`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
+
     Examples
     --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
     Example 1: Make the current timestamp from years, months, days, hours, mins and secs.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.make_timestamp_ltz(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec, df.timezone)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.make_timestamp_ltz(df.year, df.month, 'day', df.hour, df.min, df.sec, 'tz')
     ... ).show(truncate=False)
-    +--------------------------------------------------------------+
-    |make_timestamp_ltz(year, month, day, hour, min, sec, timezone)|
-    +--------------------------------------------------------------+
-    |2014-12-27 21:30:45.887                                       |
-    +--------------------------------------------------------------+
+    +--------------------------------------------------------+
+    |make_timestamp_ltz(year, month, day, hour, min, sec, tz)|
+    +--------------------------------------------------------+
+    |2014-12-27 21:30:45.887                                 |
+    +--------------------------------------------------------+
 
     Example 2: Make the current timestamp without timezone.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.make_timestamp_ltz(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.make_timestamp_ltz(df.year, df.month, 'day', df.hour, df.min, df.sec)
     ... ).show(truncate=False)
     +----------------------------------------------------+
     |make_timestamp_ltz(year, month, day, hour, min, sec)|
     +----------------------------------------------------+
     |2014-12-28 06:30:45.887                             |
     +----------------------------------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     if timezone is not None:
@@ -23083,54 +24449,62 @@ def try_make_timestamp_ltz(
     :class:`~pyspark.sql.Column`
         A new column that contains a current timestamp, or NULL in case of an error.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_timestamp`
+    :meth:`pyspark.sql.functions.make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.try_make_timestamp`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
+
     Examples
     --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
     Example 1: Make the current timestamp from years, months, days, hours, mins and secs.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.try_make_timestamp_ltz(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec, df.timezone)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.try_make_timestamp_ltz('year', 'month', df.day, df.hour, df.min, df.sec, 'tz')
     ... ).show(truncate=False)
-    +------------------------------------------------------------------+
-    |try_make_timestamp_ltz(year, month, day, hour, min, sec, timezone)|
-    +------------------------------------------------------------------+
-    |2014-12-27 21:30:45.887                                           |
-    +------------------------------------------------------------------+
+    +------------------------------------------------------------+
+    |try_make_timestamp_ltz(year, month, day, hour, min, sec, tz)|
+    +------------------------------------------------------------+
+    |2014-12-27 21:30:45.887                                     |
+    +------------------------------------------------------------+
 
     Example 2: Make the current timestamp without timezone.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.try_make_timestamp_ltz(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.try_make_timestamp_ltz('year', 'month', df.day, df.hour, df.min, df.sec)
     ... ).show(truncate=False)
     +--------------------------------------------------------+
     |try_make_timestamp_ltz(year, month, day, hour, min, sec)|
     +--------------------------------------------------------+
     |2014-12-28 06:30:45.887                                 |
     +--------------------------------------------------------+
-    >>> spark.conf.unset("spark.sql.session.timeZone")
 
     Example 3: Make the current timestamp with invalid input.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 13, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.try_make_timestamp_ltz(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.try_make_timestamp_ltz('year', 'month', df.day, df.hour, df.min, df.sec)
     ... ).show(truncate=False)
     +--------------------------------------------------------+
     |try_make_timestamp_ltz(year, month, day, hour, min, sec)|
     +--------------------------------------------------------+
     |NULL                                                    |
     +--------------------------------------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     if timezone is not None:
@@ -23161,17 +24535,17 @@ def make_timestamp_ntz(
 
     Parameters
     ----------
-    years : :class:`~pyspark.sql.Column` or str
+    years : :class:`~pyspark.sql.Column` or column name
         The year to represent, from 1 to 9999
-    months : :class:`~pyspark.sql.Column` or str
+    months : :class:`~pyspark.sql.Column` or column name
         The month-of-year to represent, from 1 (January) to 12 (December)
-    days : :class:`~pyspark.sql.Column` or str
+    days : :class:`~pyspark.sql.Column` or column name
         The day-of-month to represent, from 1 to 31
-    hours : :class:`~pyspark.sql.Column` or str
+    hours : :class:`~pyspark.sql.Column` or column name
         The hour-of-day to represent, from 0 to 23
-    mins : :class:`~pyspark.sql.Column` or str
+    mins : :class:`~pyspark.sql.Column` or column name
         The minute-of-hour to represent, from 0 to 59
-    secs : :class:`~pyspark.sql.Column` or str
+    secs : :class:`~pyspark.sql.Column` or column name
         The second-of-minute and its micro-fraction to represent, from 0 to 60.
         The value can be either an integer like 13 , or a fraction like 13.123.
         If the sec argument equals to 60, the seconds field is set
@@ -23182,23 +24556,32 @@ def make_timestamp_ntz(
     :class:`~pyspark.sql.Column`
         A new column that contains a local date-time.
 
-    Examples
+    See Also
     --------
+    :meth:`pyspark.sql.functions.make_timestamp`
+    :meth:`pyspark.sql.functions.make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.try_make_timestamp`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
 
-    Example 1: Make local date-time from years, months, days, hours, mins, secs.
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887]],
-    ...     ["year", "month", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_timestamp_ntz(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.make_timestamp_ntz('year', 'month', df.day, df.hour, df.min, df.sec)
     ... ).show(truncate=False)
     +----------------------------------------------------+
     |make_timestamp_ntz(year, month, day, hour, min, sec)|
     +----------------------------------------------------+
     |2014-12-28 06:30:45.887                             |
     +----------------------------------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     return _invoke_function_over_columns(
@@ -23244,39 +24627,48 @@ def try_make_timestamp_ntz(
     :class:`~pyspark.sql.Column`
         A new column that contains a local date-time, or NULL in case of an error.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_timestamp`
+    :meth:`pyspark.sql.functions.make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.try_make_timestamp`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
+
     Examples
     --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
     Example 1: Make local date-time from years, months, days, hours, mins, secs.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887]],
-    ...     ["year", "month", "day", "hour", "min", "sec"])
-    >>> df.select(sf.try_make_timestamp_ntz(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.try_make_timestamp_ntz('year', 'month', df.day, df.hour, df.min, df.sec)
     ... ).show(truncate=False)
     +--------------------------------------------------------+
     |try_make_timestamp_ntz(year, month, day, hour, min, sec)|
     +--------------------------------------------------------+
     |2014-12-28 06:30:45.887                                 |
     +--------------------------------------------------------+
-    >>> spark.conf.unset("spark.sql.session.timeZone")
 
     Example 2: Make local date-time with invalid input
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 13, 28, 6, 30, 45.887]],
-    ...     ["year", "month", "day", "hour", "min", "sec"])
-    >>> df.select(sf.try_make_timestamp_ntz(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.try_make_timestamp_ntz('year', 'month', df.day, df.hour, df.min, df.sec)
     ... ).show(truncate=False)
     +--------------------------------------------------------+
     |try_make_timestamp_ntz(year, month, day, hour, min, sec)|
     +--------------------------------------------------------+
     |NULL                                                    |
     +--------------------------------------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     return _invoke_function_over_columns(
@@ -23296,9 +24688,9 @@ def make_ym_interval(
 
     Parameters
     ----------
-    years : :class:`~pyspark.sql.Column` or str, optional
+    years : :class:`~pyspark.sql.Column` or column name, optional
         The number of years, positive or negative
-    months : :class:`~pyspark.sql.Column` or str, optional
+    months : :class:`~pyspark.sql.Column` or column name, optional
         The number of months, positive or negative
 
     Returns
@@ -23306,44 +24698,48 @@ def make_ym_interval(
     :class:`~pyspark.sql.Column`
         A new column that contains a year-month interval.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.make_dt_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
+
     Examples
     --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
     Example 1: Make year-month interval from years, months.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
-    >>> df = spark.createDataFrame([[2014, 12]], ["year", "month"])
-    >>> df.select(sf.make_ym_interval(df.year, df.month)).show(truncate=False)
-    +-------------------------------+
-    |make_ym_interval(year, month)  |
-    +-------------------------------+
-    |INTERVAL '2015-0' YEAR TO MONTH|
-    +-------------------------------+
+    >>> df = spark.createDataFrame([[2014, 12]], ['year', 'month'])
+    >>> df.select('*', sf.make_ym_interval('year', df.month)).show(truncate=False)
+    +----+-----+-------------------------------+
+    |year|month|make_ym_interval(year, month)  |
+    +----+-----+-------------------------------+
+    |2014|12   |INTERVAL '2015-0' YEAR TO MONTH|
+    +----+-----+-------------------------------+
 
     Example 2: Make year-month interval from years.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
-    >>> df = spark.createDataFrame([[2014, 12]], ["year", "month"])
-    >>> df.select(sf.make_ym_interval(df.year)).show(truncate=False)
-    +-------------------------------+
-    |make_ym_interval(year, 0)      |
-    +-------------------------------+
-    |INTERVAL '2014-0' YEAR TO MONTH|
-    +-------------------------------+
+    >>> df = spark.createDataFrame([[2014, 12]], ['year', 'month'])
+    >>> df.select('*', sf.make_ym_interval(df.year)).show(truncate=False)
+    +----+-----+-------------------------------+
+    |year|month|make_ym_interval(year, 0)      |
+    +----+-----+-------------------------------+
+    |2014|12   |INTERVAL '2014-0' YEAR TO MONTH|
+    +----+-----+-------------------------------+
 
-    Example 3: Make year-month interval.
+    Example 3: Make empty interval.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
-    >>> df = spark.createDataFrame([[2014, 12]], ["year", "month"])
-    >>> df.select(sf.make_ym_interval()).show(truncate=False)
+    >>> spark.range(1).select(sf.make_ym_interval()).show(truncate=False)
     +----------------------------+
     |make_ym_interval(0, 0)      |
     +----------------------------+
     |INTERVAL '0-0' YEAR TO MONTH|
     +----------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     _years = lit(0) if years is None else years
@@ -23948,21 +25344,21 @@ def aes_encrypt(
 
     Parameters
     ----------
-    input : :class:`~pyspark.sql.Column` or str
+    input : :class:`~pyspark.sql.Column` or column name
         The binary value to encrypt.
-    key : :class:`~pyspark.sql.Column` or str
+    key : :class:`~pyspark.sql.Column` or column name
         The passphrase to use to encrypt the data.
     mode : :class:`~pyspark.sql.Column` or str, optional
         Specifies which block cipher mode should be used to encrypt messages. Valid modes: ECB,
         GCM, CBC.
-    padding : :class:`~pyspark.sql.Column` or str, optional
+    padding : :class:`~pyspark.sql.Column` or column name, optional
         Specifies how to pad messages whose length is not a multiple of the block size. Valid
         values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS
         for CBC.
-    iv : :class:`~pyspark.sql.Column` or str, optional
+    iv : :class:`~pyspark.sql.Column` or column name, optional
         Optional initialization vector. Only supported for CBC and GCM modes. Valid values: None or
         "". 16-byte array for CBC mode. 12-byte array for GCM mode.
-    aad : :class:`~pyspark.sql.Column` or str, optional
+    aad : :class:`~pyspark.sql.Column` or column name, optional
         Optional additional authenticated data. Only supported for GCM mode. This can be any
         free-form input and must be provided for both encryption and decryption.
 
@@ -23971,6 +25367,11 @@ def aes_encrypt(
     :class:`~pyspark.sql.Column`
         A new column that contains an encrypted value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.aes_decrypt`
+    :meth:`pyspark.sql.functions.try_aes_decrypt`
+
     Examples
     --------
 
@@ -23983,7 +25384,7 @@ def aes_encrypt(
     ...     ["input", "key", "mode", "padding", "iv", "aad"]
     ... )
     >>> df.select(sf.base64(sf.aes_encrypt(
-    ...     df.input, df.key, df.mode, df.padding, sf.to_binary(df.iv, sf.lit("hex")), df.aad)
+    ...     df.input, df.key, "mode", df.padding, sf.to_binary(df.iv, sf.lit("hex")), df.aad)
     ... )).show(truncate=False)
     +-----------------------------------------------------------------------+
     |base64(aes_encrypt(input, key, mode, padding, to_binary(iv, hex), aad))|
@@ -24000,7 +25401,7 @@ def aes_encrypt(
     ...     ["input", "key", "mode", "padding", "iv", "aad"]
     ... )
     >>> df.select(sf.base64(sf.aes_encrypt(
-    ...     df.input, df.key, df.mode, df.padding, sf.to_binary(df.iv, sf.lit("hex")))
+    ...     df.input, df.key, "mode", df.padding, sf.to_binary(df.iv, sf.lit("hex")))
     ... )).show(truncate=False)
     +--------------------------------------------------------------------+
     |base64(aes_encrypt(input, key, mode, padding, to_binary(iv, hex), ))|
@@ -24015,7 +25416,7 @@ def aes_encrypt(
     ...     "Spark SQL", "1234567890abcdef", "ECB", "PKCS",)],
     ...     ["input", "key", "mode", "padding"]
     ... )
-    >>> df.select(sf.aes_decrypt(sf.aes_encrypt(df.input, df.key, df.mode, df.padding),
+    >>> df.select(sf.aes_decrypt(sf.aes_encrypt(df.input, df.key, "mode", df.padding),
     ...     df.key, df.mode, df.padding
     ... ).cast("STRING")).show(truncate=False)
     +---------------------------------------------------------------------------------------------+
@@ -24031,7 +25432,7 @@ def aes_encrypt(
     ...     "Spark SQL", "0000111122223333", "ECB",)],
     ...     ["input", "key", "mode"]
     ... )
-    >>> df.select(sf.aes_decrypt(sf.aes_encrypt(df.input, df.key, df.mode),
+    >>> df.select(sf.aes_decrypt(sf.aes_encrypt(df.input, df.key, "mode"),
     ...     df.key, df.mode
     ... ).cast("STRING")).show(truncate=False)
     +---------------------------------------------------------------------------------------------+
@@ -24082,18 +25483,18 @@ def aes_decrypt(
 
     Parameters
     ----------
-    input : :class:`~pyspark.sql.Column` or str
+    input : :class:`~pyspark.sql.Column` or column name
         The binary value to decrypt.
-    key : :class:`~pyspark.sql.Column` or str
+    key : :class:`~pyspark.sql.Column` or column name
         The passphrase to use to decrypt the data.
-    mode : :class:`~pyspark.sql.Column` or str, optional
+    mode : :class:`~pyspark.sql.Column` or column name, optional
         Specifies which block cipher mode should be used to decrypt messages. Valid modes: ECB,
         GCM, CBC.
-    padding : :class:`~pyspark.sql.Column` or str, optional
+    padding : :class:`~pyspark.sql.Column` or column name, optional
         Specifies how to pad messages whose length is not a multiple of the block size. Valid
         values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS
         for CBC.
-    aad : :class:`~pyspark.sql.Column` or str, optional
+    aad : :class:`~pyspark.sql.Column` or column name, optional
         Optional additional authenticated data. Only supported for GCM mode. This can be any
         free-form input and must be provided for both encryption and decryption.
 
@@ -24102,6 +25503,11 @@ def aes_decrypt(
     :class:`~pyspark.sql.Column`
         A new column that contains a decrypted value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.aes_encrypt`
+    :meth:`pyspark.sql.functions.try_aes_decrypt`
+
     Examples
     --------
 
@@ -24115,7 +25521,7 @@ def aes_decrypt(
     ...     ["input", "key", "mode", "padding", "aad"]
     ... )
     >>> df.select(sf.aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode, df.padding, df.aad
+    ...     sf.unbase64(df.input), df.key, "mode", df.padding, df.aad
     ... ).cast("STRING")).show(truncate=False)
     +---------------------------------------------------------------------+
     |CAST(aes_decrypt(unbase64(input), key, mode, padding, aad) AS STRING)|
@@ -24132,7 +25538,7 @@ def aes_decrypt(
     ...     ["input", "key", "mode", "padding"]
     ... )
     >>> df.select(sf.aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode, df.padding
+    ...     sf.unbase64(df.input), df.key, "mode", df.padding
     ... ).cast("STRING")).show(truncate=False)
     +------------------------------------------------------------------+
     |CAST(aes_decrypt(unbase64(input), key, mode, padding, ) AS STRING)|
@@ -24149,7 +25555,7 @@ def aes_decrypt(
     ...     ["input", "key", "mode", "padding"]
     ... )
     >>> df.select(sf.aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode
+    ...     sf.unbase64(df.input), df.key, "mode"
     ... ).cast("STRING")).show(truncate=False)
     +------------------------------------------------------------------+
     |CAST(aes_decrypt(unbase64(input), key, mode, DEFAULT, ) AS STRING)|
@@ -24201,18 +25607,18 @@ def try_aes_decrypt(
 
     Parameters
     ----------
-    input : :class:`~pyspark.sql.Column` or str
+    input : :class:`~pyspark.sql.Column` or column name
         The binary value to decrypt.
-    key : :class:`~pyspark.sql.Column` or str
+    key : :class:`~pyspark.sql.Column` or column name
         The passphrase to use to decrypt the data.
-    mode : :class:`~pyspark.sql.Column` or str, optional
+    mode : :class:`~pyspark.sql.Column` or column name, optional
         Specifies which block cipher mode should be used to decrypt messages. Valid modes: ECB,
         GCM, CBC.
-    padding : :class:`~pyspark.sql.Column` or str, optional
+    padding : :class:`~pyspark.sql.Column` or column name, optional
         Specifies how to pad messages whose length is not a multiple of the block size. Valid
         values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS
         for CBC.
-    aad : :class:`~pyspark.sql.Column` or str, optional
+    aad : :class:`~pyspark.sql.Column` or column name, optional
         Optional additional authenticated data. Only supported for GCM mode. This can be any
         free-form input and must be provided for both encryption and decryption.
 
@@ -24221,6 +25627,11 @@ def try_aes_decrypt(
     :class:`~pyspark.sql.Column`
         A new column that contains a decrypted value or a NULL value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.aes_encrypt`
+    :meth:`pyspark.sql.functions.aes_decrypt`
+
     Examples
     --------
 
@@ -24234,7 +25645,7 @@ def try_aes_decrypt(
     ...     ["input", "key", "mode", "padding", "aad"]
     ... )
     >>> df.select(sf.try_aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode, df.padding, df.aad
+    ...     sf.unbase64(df.input), df.key, "mode", df.padding, df.aad
     ... ).cast("STRING")).show(truncate=False)
     +-------------------------------------------------------------------------+
     |CAST(try_aes_decrypt(unbase64(input), key, mode, padding, aad) AS STRING)|
@@ -24252,7 +25663,7 @@ def try_aes_decrypt(
     ...     ["input", "key", "mode", "padding", "aad"]
     ... )
     >>> df.select(sf.try_aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode, df.padding, df.aad
+    ...     sf.unbase64(df.input), df.key, "mode", df.padding, df.aad
     ... ).cast("STRING")).show(truncate=False)
     +-------------------------------------------------------------------------+
     |CAST(try_aes_decrypt(unbase64(input), key, mode, padding, aad) AS STRING)|
@@ -24269,7 +25680,7 @@ def try_aes_decrypt(
     ...     ["input", "key", "mode", "padding"]
     ... )
     >>> df.select(sf.try_aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode, df.padding
+    ...     sf.unbase64(df.input), df.key, "mode", df.padding
     ... ).cast("STRING")).show(truncate=False)
     +----------------------------------------------------------------------+
     |CAST(try_aes_decrypt(unbase64(input), key, mode, padding, ) AS STRING)|
@@ -24286,7 +25697,7 @@ def try_aes_decrypt(
     ...     ["input", "key", "mode", "padding"]
     ... )
     >>> df.select(sf.try_aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode
+    ...     sf.unbase64(df.input), df.key, "mode"
     ... ).cast("STRING")).show(truncate=False)
     +----------------------------------------------------------------------+
     |CAST(try_aes_decrypt(unbase64(input), key, mode, DEFAULT, ) AS STRING)|
@@ -24326,7 +25737,12 @@ def sha(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.sha1`
+    :meth:`pyspark.sql.functions.sha2`
 
     Examples
     --------
@@ -24416,18 +25832,28 @@ def reflect(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
-        the first element should be a literal string for the class name,
-        and the second element should be a literal string for the method name,
-        and the remaining are input arguments to the Java method.
+    cols : :class:`~pyspark.sql.Column` or column name
+        the first element should be a Column representing literal string for the class name,
+        and the second element should be a Column representing literal string for the method name,
+        and the remaining are input arguments (Columns or column names) to the Java method.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.java_method`
+    :meth:`pyspark.sql.functions.try_reflect`
 
     Examples
     --------
-    >>> df = spark.createDataFrame([("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2",)], ["a"])
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('a5cf6c42-0c85-418f-af6c-3e4e5b1328f2',)], ['a'])
     >>> df.select(
-    ...     reflect(lit("java.util.UUID"), lit("fromString"), df.a).alias('r')
-    ... ).collect()
-    [Row(r='a5cf6c42-0c85-418f-af6c-3e4e5b1328f2')]
+    ...     sf.reflect(sf.lit('java.util.UUID'), sf.lit('fromString'), 'a')
+    ... ).show(truncate=False)
+    +--------------------------------------+
+    |reflect(java.util.UUID, fromString, a)|
+    +--------------------------------------+
+    |a5cf6c42-0c85-418f-af6c-3e4e5b1328f2  |
+    +--------------------------------------+
     """
     return _invoke_function_over_seq_of_columns("reflect", cols)
 
@@ -24441,13 +25867,20 @@ def java_method(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
-        the first element should be a literal string for the class name,
-        and the second element should be a literal string for the method name,
-        and the remaining are input arguments to the Java method.
+    cols : :class:`~pyspark.sql.Column` or column name
+        the first element should be a Column representing literal string for the class name,
+        and the second element should be a Column representing literal string for the method name,
+        and the remaining are input arguments (Columns or column names) to the Java method.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.reflect`
+    :meth:`pyspark.sql.functions.try_reflect`
 
     Examples
     --------
+    Example 1: Reflecting a method call with a column argument
+
     >>> import pyspark.sql.functions as sf
     >>> spark.range(1).select(
     ...     sf.java_method(
@@ -24461,6 +25894,19 @@ def java_method(*cols: "ColumnOrName") -> Column:
     +-----------------------------------------------------------------------------+
     |a5cf6c42-0c85-418f-af6c-3e4e5b1328f2                                         |
     +-----------------------------------------------------------------------------+
+
+    Example 2: Reflecting a method call with a column name argument
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('a5cf6c42-0c85-418f-af6c-3e4e5b1328f2',)], ['a'])
+    >>> df.select(
+    ...     sf.java_method(sf.lit('java.util.UUID'), sf.lit('fromString'), 'a')
+    ... ).show(truncate=False)
+    +------------------------------------------+
+    |java_method(java.util.UUID, fromString, a)|
+    +------------------------------------------+
+    |a5cf6c42-0c85-418f-af6c-3e4e5b1328f2      |
+    +------------------------------------------+
     """
     return _invoke_function_over_seq_of_columns("java_method", cols)
 
@@ -24476,10 +25922,15 @@ def try_reflect(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
-        the first element should be a literal string for the class name,
-        and the second element should be a literal string for the method name,
-        and the remaining are input arguments to the Java method.
+    cols : :class:`~pyspark.sql.Column` or column name
+        the first element should be a Column representing literal string for the class name,
+        and the second element should be a Column representing literal string for the method name,
+        and the remaining are input arguments (Columns or column names) to the Java method.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.reflect`
+    :meth:`pyspark.sql.functions.java_method`
 
     Examples
     --------
@@ -24488,25 +25939,24 @@ def try_reflect(*cols: "ColumnOrName") -> Column:
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2",)], ["a"])
     >>> df.select(
-    ...     sf.try_reflect(sf.lit("java.util.UUID"), sf.lit("fromString"), df.a)
-    ... ).show()
+    ...     sf.try_reflect(sf.lit("java.util.UUID"), sf.lit("fromString"), "a")
+    ... ).show(truncate=False)
     +------------------------------------------+
     |try_reflect(java.util.UUID, fromString, a)|
     +------------------------------------------+
-    |                      a5cf6c42-0c85-418...|
+    |a5cf6c42-0c85-418f-af6c-3e4e5b1328f2      |
     +------------------------------------------+
 
     Example 2: Exception in the reflection call, resulting in null
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.range(1)
-    >>> df.select(
+    >>> spark.range(1).select(
     ...     sf.try_reflect(sf.lit("scala.Predef"), sf.lit("require"), sf.lit(False))
-    ... ).show()
+    ... ).show(truncate=False)
     +-----------------------------------------+
     |try_reflect(scala.Predef, require, false)|
     +-----------------------------------------+
-    |                                     NULL|
+    |NULL                                     |
     +-----------------------------------------+
     """
     return _invoke_function_over_seq_of_columns("try_reflect", cols)
@@ -24522,12 +25972,12 @@ def version() -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(version()).show(truncate=False) # doctest: +SKIP
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(1).select(sf.version()).show(truncate=False) # doctest: +SKIP
     +----------------------------------------------+
     |version()                                     |
     +----------------------------------------------+
-    |3.5.0 cafbea5b13623276517a9d716f75745eff91f616|
+    |4.0.0 4f8d1f575e99aeef8990c63a9614af0fc5479330|
     +----------------------------------------------+
     """
     return _invoke_function_over_columns("version")
@@ -24542,13 +25992,18 @@ def typeof(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(1,)], ["a"])
-    >>> df.select(typeof(df.a).alias('r')).collect()
-    [Row(r='bigint')]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(True, 1, 1.0, 'xyz',)], ['a', 'b', 'c', 'd'])
+    >>> df.select(sf.typeof(df.a), sf.typeof(df.b), sf.typeof('c'), sf.typeof('d')).show()
+    +---------+---------+---------+---------+
+    |typeof(a)|typeof(b)|typeof(c)|typeof(d)|
+    +---------+---------+---------+---------+
+    |  boolean|   bigint|   double|   string|
+    +---------+---------+---------+---------+
     """
     return _invoke_function_over_columns("typeof", col)
 
@@ -24563,20 +26018,48 @@ def stack(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         the first element should be a literal int for the number of rows to be separated,
         and the remaining are input elements to be separated.
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(1, 2, 3)], ["a", "b", "c"])
-    >>> df.select(stack(lit(2), df.a, df.b, df.c)).show(truncate=False)
-    +----+----+
-    |col0|col1|
-    +----+----+
-    |1   |2   |
-    |3   |NULL|
-    +----+----+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(1, 2, 3)], ['a', 'b', 'c'])
+    >>> df.select('*', sf.stack(sf.lit(2), df.a, df.b, 'c')).show()
+    +---+---+---+----+----+
+    |  a|  b|  c|col0|col1|
+    +---+---+---+----+----+
+    |  1|  2|  3|   1|   2|
+    |  1|  2|  3|   3|NULL|
+    +---+---+---+----+----+
+
+    >>> df.select('*', sf.stack(sf.lit(2), df.a, df.b, 'c').alias('x', 'y')).show()
+    +---+---+---+---+----+
+    |  a|  b|  c|  x|   y|
+    +---+---+---+---+----+
+    |  1|  2|  3|  1|   2|
+    |  1|  2|  3|  3|NULL|
+    +---+---+---+---+----+
+
+    >>> df.select('*', sf.stack(sf.lit(3), df.a, df.b, 'c')).show()
+    +---+---+---+----+
+    |  a|  b|  c|col0|
+    +---+---+---+----+
+    |  1|  2|  3|   1|
+    |  1|  2|  3|   2|
+    |  1|  2|  3|   3|
+    +---+---+---+----+
+
+    >>> df.select('*', sf.stack(sf.lit(4), df.a, df.b, 'c')).show()
+    +---+---+---+----+
+    |  a|  b|  c|col0|
+    +---+---+---+----+
+    |  1|  2|  3|   1|
+    |  1|  2|  3|   2|
+    |  1|  2|  3|   3|
+    |  1|  2|  3|NULL|
+    +---+---+---+----+
     """
     return _invoke_function_over_seq_of_columns("stack", cols)
 
@@ -24590,14 +26073,26 @@ def bitmap_bit_position(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The input column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bitmap_bucket_number`
+    :meth:`pyspark.sql.functions.bitmap_construct_agg`
+    :meth:`pyspark.sql.functions.bitmap_count`
+    :meth:`pyspark.sql.functions.bitmap_or_agg`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([(123,)], ["a"])
-    >>> df.select(bitmap_bit_position(df.a).alias("r")).collect()
-    [Row(r=122)]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(123,)], ['a'])
+    >>> df.select('*', sf.bitmap_bit_position('a')).show()
+    +---+----------------------+
+    |  a|bitmap_bit_position(a)|
+    +---+----------------------+
+    |123|                   122|
+    +---+----------------------+
     """
     return _invoke_function_over_columns("bitmap_bit_position", col)
 
@@ -24611,14 +26106,26 @@ def bitmap_bucket_number(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The input column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bitmap_bit_position`
+    :meth:`pyspark.sql.functions.bitmap_construct_agg`
+    :meth:`pyspark.sql.functions.bitmap_count`
+    :meth:`pyspark.sql.functions.bitmap_or_agg`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([(123,)], ["a"])
-    >>> df.select(bitmap_bucket_number(df.a).alias("r")).collect()
-    [Row(r=1)]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(123,)], ['a'])
+    >>> df.select('*', sf.bitmap_bucket_number('a')).show()
+    +---+-----------------------+
+    |  a|bitmap_bucket_number(a)|
+    +---+-----------------------+
+    |123|                      1|
+    +---+-----------------------+
     """
     return _invoke_function_over_columns("bitmap_bucket_number", col)
 
@@ -24633,16 +26140,28 @@ def bitmap_construct_agg(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The input column will most likely be bitmap_bit_position().
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bitmap_bit_position`
+    :meth:`pyspark.sql.functions.bitmap_bucket_number`
+    :meth:`pyspark.sql.functions.bitmap_count`
+    :meth:`pyspark.sql.functions.bitmap_or_agg`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(1,),(2,),(3,)], ["a"])
-    >>> df.select(substring(hex(
-    ...     bitmap_construct_agg(bitmap_bit_position(df.a))
-    ... ), 0, 6).alias("r")).collect()
-    [Row(r='070000')]
+    >>> df.select(
+    ...     sf.bitmap_construct_agg(sf.bitmap_bit_position('a'))
+    ... ).show()
+    +--------------------------------------------+
+    |bitmap_construct_agg(bitmap_bit_position(a))|
+    +--------------------------------------------+
+    |                        [07 00 00 00 00 0...|
+    +--------------------------------------------+
     """
     return _invoke_function_over_columns("bitmap_construct_agg", col)
 
@@ -24656,14 +26175,26 @@ def bitmap_count(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The input bitmap.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bitmap_bit_position`
+    :meth:`pyspark.sql.functions.bitmap_bucket_number`
+    :meth:`pyspark.sql.functions.bitmap_construct_agg`
+    :meth:`pyspark.sql.functions.bitmap_or_agg`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("FFFF",)], ["a"])
-    >>> df.select(bitmap_count(to_binary(df.a, lit("hex"))).alias('r')).collect()
-    [Row(r=16)]
+    >>> df.select(sf.bitmap_count(sf.to_binary(df.a, sf.lit("hex")))).show()
+    +-------------------------------+
+    |bitmap_count(to_binary(a, hex))|
+    +-------------------------------+
+    |                             16|
+    +-------------------------------+
     """
     return _invoke_function_over_columns("bitmap_count", col)
 
@@ -24676,18 +26207,28 @@ def bitmap_or_agg(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bitmap_bit_position`
+    :meth:`pyspark.sql.functions.bitmap_bucket_number`
+    :meth:`pyspark.sql.functions.bitmap_construct_agg`
+    :meth:`pyspark.sql.functions.bitmap_count`
+
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The input column should be bitmaps created from bitmap_construct_agg().
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("10",),("20",),("40",)], ["a"])
-    >>> df.select(substring(hex(
-    ...     bitmap_or_agg(to_binary(df.a, lit("hex")))
-    ... ), 0, 6).alias("r")).collect()
-    [Row(r='700000')]
+    >>> df.select(sf.bitmap_or_agg(sf.to_binary(df.a, sf.lit("hex")))).show()
+    +--------------------------------+
+    |bitmap_or_agg(to_binary(a, hex))|
+    +--------------------------------+
+    |            [70 00 00 00 00 0...|
+    +--------------------------------+
     """
     return _invoke_function_over_columns("bitmap_or_agg", col)
 
diff --git a/python/pyspark/sql/observation.py b/python/pyspark/sql/observation.py
index 6ceb6bc90327a..09ae7a339a4cb 100644
--- a/python/pyspark/sql/observation.py
+++ b/python/pyspark/sql/observation.py
@@ -122,7 +122,7 @@ def _on(self, df: DataFrame, *exprs: Column) -> DataFrame:
 
         self._jvm = df._sc._jvm
         assert self._jvm is not None
-        cls = self._jvm.org.apache.spark.sql.Observation
+        cls = getattr(self._jvm, "org.apache.spark.sql.Observation")
         self._jo = cls(self._name) if self._name is not None else cls()
         observed_df = df._jdf.observe(
             self._jo, exprs[0]._jc, _to_seq(df._sc, [c._jc for c in exprs[1:]])
diff --git a/python/pyspark/sql/pandas/group_ops.py b/python/pyspark/sql/pandas/group_ops.py
index 56efe0676c08f..343a68bf010bf 100644
--- a/python/pyspark/sql/pandas/group_ops.py
+++ b/python/pyspark/sql/pandas/group_ops.py
@@ -35,7 +35,8 @@
     TimerValues,
 )
 from pyspark.sql.streaming.stateful_processor import StatefulProcessor, StatefulProcessorHandle
-from pyspark.sql.types import StructType, _parse_datatype_string
+from pyspark.sql.streaming.stateful_processor_util import TransformWithStateInPandasFuncMode
+from pyspark.sql.types import StructType
 
 if TYPE_CHECKING:
     from pyspark.sql.pandas._typing import (
@@ -347,9 +348,9 @@ def applyInPandasWithState(
         ]
 
         if isinstance(outputStructType, str):
-            outputStructType = cast(StructType, _parse_datatype_string(outputStructType))
+            outputStructType = cast(StructType, self._df._session._parse_ddl(outputStructType))
         if isinstance(stateStructType, str):
-            stateStructType = cast(StructType, _parse_datatype_string(stateStructType))
+            stateStructType = cast(StructType, self._df._session._parse_ddl(stateStructType))
 
         udf = pandas_udf(
             func,  # type: ignore[call-overload]
@@ -374,6 +375,7 @@ def transformWithStateInPandas(
         outputMode: str,
         timeMode: str,
         initialState: Optional["GroupedData"] = None,
+        eventTimeColumnName: str = "",
     ) -> DataFrame:
         """
         Invokes methods defined in the stateful processor used in arbitrary state API v2. It
@@ -500,63 +502,85 @@ def transformWithStateInPandas(
         if initialState is not None:
             assert isinstance(initialState, GroupedData)
         if isinstance(outputStructType, str):
-            outputStructType = cast(StructType, _parse_datatype_string(outputStructType))
+            outputStructType = cast(StructType, self._df._session._parse_ddl(outputStructType))
 
-        def handle_data_with_timers(
+        def handle_pre_init(
+            statefulProcessorApiClient: StatefulProcessorApiClient,
+        ) -> Iterator["PandasDataFrameLike"]:
+            # Driver handle is different from the handle used on executors;
+            # On JVM side, we will use `DriverStatefulProcessorHandleImpl` for driver handle which
+            # will only be used for handling init() and get the state schema on the driver.
+            driver_handle = StatefulProcessorHandle(statefulProcessorApiClient)
+            statefulProcessorApiClient.set_handle_state(StatefulProcessorHandleState.PRE_INIT)
+            statefulProcessor.init(driver_handle)
+
+            # This method is used for the driver-side stateful processor after we have collected
+            # all the necessary schemas. This instance of the DriverStatefulProcessorHandleImpl
+            # won't be used again on JVM.
+            statefulProcessor.close()
+
+            # return a dummy results, no return value is needed for pre init
+            return iter([])
+
+        def handle_data_rows(
             statefulProcessorApiClient: StatefulProcessorApiClient,
             key: Any,
-            inputRows: Iterator["PandasDataFrameLike"],
+            inputRows: Optional[Iterator["PandasDataFrameLike"]] = None,
         ) -> Iterator["PandasDataFrameLike"]:
             statefulProcessorApiClient.set_implicit_key(key)
-            if timeMode != "none":
-                batch_timestamp = statefulProcessorApiClient.get_batch_timestamp()
-                watermark_timestamp = statefulProcessorApiClient.get_watermark_timestamp()
+
+            batch_timestamp, watermark_timestamp = statefulProcessorApiClient.get_timestamps(
+                timeMode
+            )
+
+            # process with data rows
+            if inputRows is not None:
+                data_iter = statefulProcessor.handleInputRows(
+                    key, inputRows, TimerValues(batch_timestamp, watermark_timestamp)
+                )
+                return data_iter
             else:
-                batch_timestamp = -1
-                watermark_timestamp = -1
-            # process with invalid expiry timer info and emit data rows
-            data_iter = statefulProcessor.handleInputRows(
-                key,
-                inputRows,
-                TimerValues(batch_timestamp, watermark_timestamp),
-                ExpiredTimerInfo(False),
+                return iter([])
+
+        def handle_expired_timers(
+            statefulProcessorApiClient: StatefulProcessorApiClient,
+        ) -> Iterator["PandasDataFrameLike"]:
+            batch_timestamp, watermark_timestamp = statefulProcessorApiClient.get_timestamps(
+                timeMode
             )
-            statefulProcessorApiClient.set_handle_state(StatefulProcessorHandleState.DATA_PROCESSED)
 
-            if timeMode == "processingtime":
+            if timeMode.lower() == "processingtime":
                 expiry_list_iter = statefulProcessorApiClient.get_expiry_timers_iterator(
                     batch_timestamp
                 )
-            elif timeMode == "eventtime":
+            elif timeMode.lower() == "eventtime":
                 expiry_list_iter = statefulProcessorApiClient.get_expiry_timers_iterator(
                     watermark_timestamp
                 )
             else:
                 expiry_list_iter = iter([[]])
 
-            result_iter_list = [data_iter]
-            # process with valid expiry time info and with empty input rows,
-            # only timer related rows will be emitted
+            # process with expiry timers, only timer related rows will be emitted
             for expiry_list in expiry_list_iter:
                 for key_obj, expiry_timestamp in expiry_list:
-                    result_iter_list.append(
-                        statefulProcessor.handleInputRows(
-                            key_obj,
-                            iter([]),
-                            TimerValues(batch_timestamp, watermark_timestamp),
-                            ExpiredTimerInfo(True, expiry_timestamp),
-                        )
-                    )
-            # TODO(SPARK-49603) set the handle state in the lazily initialized iterator
-
-            result = itertools.chain(*result_iter_list)
-            return result
+                    statefulProcessorApiClient.set_implicit_key(key_obj)
+                    for pd in statefulProcessor.handleExpiredTimer(
+                        key=key_obj,
+                        timer_values=TimerValues(batch_timestamp, watermark_timestamp),
+                        expired_timer_info=ExpiredTimerInfo(expiry_timestamp),
+                    ):
+                        yield pd
+                    statefulProcessorApiClient.delete_timer(expiry_timestamp)
 
         def transformWithStateUDF(
             statefulProcessorApiClient: StatefulProcessorApiClient,
+            mode: TransformWithStateInPandasFuncMode,
             key: Any,
             inputRows: Iterator["PandasDataFrameLike"],
         ) -> Iterator["PandasDataFrameLike"]:
+            if mode == TransformWithStateInPandasFuncMode.PRE_INIT:
+                return handle_pre_init(statefulProcessorApiClient)
+
             handle = StatefulProcessorHandle(statefulProcessorApiClient)
 
             if statefulProcessorApiClient.handle_state == StatefulProcessorHandleState.CREATED:
@@ -565,19 +589,28 @@ def transformWithStateUDF(
                     StatefulProcessorHandleState.INITIALIZED
                 )
 
-            # Key is None when we have processed all the input data from the worker and ready to
-            # proceed with the cleanup steps.
-            if key is None:
+            if mode == TransformWithStateInPandasFuncMode.PROCESS_TIMER:
+                statefulProcessorApiClient.set_handle_state(
+                    StatefulProcessorHandleState.DATA_PROCESSED
+                )
+                result = handle_expired_timers(statefulProcessorApiClient)
+                return result
+            elif mode == TransformWithStateInPandasFuncMode.COMPLETE:
+                statefulProcessorApiClient.set_handle_state(
+                    StatefulProcessorHandleState.TIMER_PROCESSED
+                )
                 statefulProcessorApiClient.remove_implicit_key()
                 statefulProcessor.close()
                 statefulProcessorApiClient.set_handle_state(StatefulProcessorHandleState.CLOSED)
                 return iter([])
-
-            result = handle_data_with_timers(statefulProcessorApiClient, key, inputRows)
-            return result
+            else:
+                # mode == TransformWithStateInPandasFuncMode.PROCESS_DATA
+                result = handle_data_rows(statefulProcessorApiClient, key, inputRows)
+                return result
 
         def transformWithStateWithInitStateUDF(
             statefulProcessorApiClient: StatefulProcessorApiClient,
+            mode: TransformWithStateInPandasFuncMode,
             key: Any,
             inputRows: Iterator["PandasDataFrameLike"],
             initialStates: Optional[Iterator["PandasDataFrameLike"]] = None,
@@ -594,6 +627,9 @@ def transformWithStateWithInitStateUDF(
             - `initialStates` is None, while `inputRows` is not empty. This is not first batch.
              `initialStates` is initialized to the positional value as None.
             """
+            if mode == TransformWithStateInPandasFuncMode.PRE_INIT:
+                return handle_pre_init(statefulProcessorApiClient)
+
             handle = StatefulProcessorHandle(statefulProcessorApiClient)
 
             if statefulProcessorApiClient.handle_state == StatefulProcessorHandleState.CREATED:
@@ -602,20 +638,30 @@ def transformWithStateWithInitStateUDF(
                     StatefulProcessorHandleState.INITIALIZED
                 )
 
-            # Key is None when we have processed all the input data from the worker and ready to
-            # proceed with the cleanup steps.
-            if key is None:
+            if mode == TransformWithStateInPandasFuncMode.PROCESS_TIMER:
+                statefulProcessorApiClient.set_handle_state(
+                    StatefulProcessorHandleState.DATA_PROCESSED
+                )
+                result = handle_expired_timers(statefulProcessorApiClient)
+                return result
+            elif mode == TransformWithStateInPandasFuncMode.COMPLETE:
                 statefulProcessorApiClient.remove_implicit_key()
                 statefulProcessor.close()
                 statefulProcessorApiClient.set_handle_state(StatefulProcessorHandleState.CLOSED)
                 return iter([])
+            else:
+                # mode == TransformWithStateInPandasFuncMode.PROCESS_DATA
+                batch_timestamp, watermark_timestamp = statefulProcessorApiClient.get_timestamps(
+                    timeMode
+                )
 
             # only process initial state if first batch and initial state is not None
             if initialStates is not None:
                 for cur_initial_state in initialStates:
                     statefulProcessorApiClient.set_implicit_key(key)
-                    # TODO(SPARK-50194) integration with new timer API with initial state
-                    statefulProcessor.handleInitialState(key, cur_initial_state)
+                    statefulProcessor.handleInitialState(
+                        key, cur_initial_state, TimerValues(batch_timestamp, watermark_timestamp)
+                    )
 
             # if we don't have input rows for the given key but only have initial state
             # for the grouping key, the inputRows iterator could be empty
@@ -628,14 +674,14 @@ def transformWithStateWithInitStateUDF(
                 inputRows = itertools.chain([first], inputRows)
 
             if not input_rows_empty:
-                result = handle_data_with_timers(statefulProcessorApiClient, key, inputRows)
+                result = handle_data_rows(statefulProcessorApiClient, key, inputRows)
             else:
                 result = iter([])
 
             return result
 
         if isinstance(outputStructType, str):
-            outputStructType = cast(StructType, _parse_datatype_string(outputStructType))
+            outputStructType = cast(StructType, self._df._session._parse_ddl(outputStructType))
 
         df = self._df
 
@@ -662,6 +708,7 @@ def transformWithStateWithInitStateUDF(
             outputMode,
             timeMode,
             initial_state_java_obj,
+            eventTimeColumnName,
         )
         return DataFrame(jdf, self.session)
 
diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py
index c11a8b9d8d4d2..424269035f7ee 100644
--- a/python/pyspark/sql/pandas/map_ops.py
+++ b/python/pyspark/sql/pandas/map_ops.py
@@ -94,7 +94,7 @@ def _build_java_profile(
                 jvm = self.sparkSession.sparkContext._jvm
                 assert jvm is not None
 
-                builder = jvm.org.apache.spark.resource.ResourceProfileBuilder()
+                builder = getattr(jvm, "org.apache.spark.resource.ResourceProfileBuilder")()
                 ereqs = ExecutorResourceRequests(jvm, profile._executor_resource_requests)
                 treqs = TaskResourceRequests(jvm, profile._task_resource_requests)
                 builder.require(ereqs._java_executor_resource_requests)
diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
index 5bf07b87400fe..536bf7307065c 100644
--- a/python/pyspark/sql/pandas/serializers.py
+++ b/python/pyspark/sql/pandas/serializers.py
@@ -36,6 +36,7 @@
     _create_converter_from_pandas,
     _create_converter_to_pandas,
 )
+from pyspark.sql.streaming.stateful_processor_util import TransformWithStateInPandasFuncMode
 from pyspark.sql.types import (
     DataType,
     StringType,
@@ -1197,7 +1198,11 @@ def generate_data_batches(batches):
         data_batches = generate_data_batches(_batches)
 
         for k, g in groupby(data_batches, key=lambda x: x[0]):
-            yield (k, g)
+            yield (TransformWithStateInPandasFuncMode.PROCESS_DATA, k, g)
+
+        yield (TransformWithStateInPandasFuncMode.PROCESS_TIMER, None, None)
+
+        yield (TransformWithStateInPandasFuncMode.COMPLETE, None, None)
 
     def dump_stream(self, iterator, stream):
         """
@@ -1281,4 +1286,8 @@ def flatten_columns(cur_batch, col_name):
         data_batches = generate_data_batches(_batches)
 
         for k, g in groupby(data_batches, key=lambda x: x[0]):
-            yield (k, g)
+            yield (TransformWithStateInPandasFuncMode.PROCESS_DATA, k, g)
+
+        yield (TransformWithStateInPandasFuncMode.PROCESS_TIMER, None, None)
+
+        yield (TransformWithStateInPandasFuncMode.COMPLETE, None, None)
diff --git a/python/pyspark/sql/pandas/types.py b/python/pyspark/sql/pandas/types.py
index 648af21502864..d65126bb3db9e 100644
--- a/python/pyspark/sql/pandas/types.py
+++ b/python/pyspark/sql/pandas/types.py
@@ -53,14 +53,11 @@
 )
 from pyspark.errors import PySparkTypeError, UnsupportedOperationException, PySparkValueError
 from pyspark.loose_version import LooseVersion
-from pyspark.sql.utils import has_numpy
-
-if has_numpy:
-    import numpy as np
 
 if TYPE_CHECKING:
     import pandas as pd
     import pyarrow as pa
+    import numpy as np
 
     from pyspark.sql.pandas._typing import SeriesLike as PandasSeriesLike
     from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike
diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py
index 5849ae0edd6d9..a351c13ff0a08 100644
--- a/python/pyspark/sql/pandas/utils.py
+++ b/python/pyspark/sql/pandas/utils.py
@@ -61,7 +61,7 @@ def require_minimum_pandas_version() -> None:
 def require_minimum_pyarrow_version() -> None:
     """Raise ImportError if minimum version of pyarrow is not installed"""
     # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
-    minimum_pyarrow_version = "10.0.0"
+    minimum_pyarrow_version = "11.0.0"
 
     import os
 
diff --git a/python/pyspark/sql/plot/core.py b/python/pyspark/sql/plot/core.py
index f7133bdb70ed6..e565a5d1ebf32 100644
--- a/python/pyspark/sql/plot/core.py
+++ b/python/pyspark/sql/plot/core.py
@@ -19,11 +19,10 @@
 
 from typing import Any, TYPE_CHECKING, List, Optional, Union, Sequence
 from types import ModuleType
-from pyspark.errors import PySparkTypeError, PySparkValueError
+from pyspark.errors import PySparkValueError
 from pyspark.sql import Column, functions as F
 from pyspark.sql.internal import InternalFunction as SF
 from pyspark.sql.pandas.utils import require_minimum_pandas_version
-from pyspark.sql.types import NumericType
 from pyspark.sql.utils import NumpyHelper, require_minimum_plotly_version
 
 if TYPE_CHECKING:
@@ -295,7 +294,7 @@ def area(self, x: str, y: Union[str, list[str]], **kwargs: Any) -> "Figure":
         """
         return self(kind="area", x=x, y=y, **kwargs)
 
-    def pie(self, x: str, y: str, **kwargs: Any) -> "Figure":
+    def pie(self, x: str, y: Optional[str], **kwargs: Any) -> "Figure":
         """
         Generate a pie plot.
 
@@ -306,8 +305,8 @@ def pie(self, x: str, y: str, **kwargs: Any) -> "Figure":
         ----------
         x : str
             Name of column to be used as the category labels for the pie plot.
-        y : str
-            Name of the column to plot.
+        y : str, optional
+            Name of the column to plot. If not provided, `subplots=True` must be passed at `kwargs`.
         **kwargs
             Additional keyword arguments.
 
@@ -327,19 +326,8 @@ def pie(self, x: str, y: str, **kwargs: Any) -> "Figure":
         >>> columns = ["sales", "signups", "visits", "date"]
         >>> df = spark.createDataFrame(data, columns)
         >>> df.plot.pie(x='date', y='sales')  # doctest: +SKIP
+        >>> df.plot.pie(x='date', subplots=True)  # doctest: +SKIP
         """
-        schema = self.data.schema
-
-        # Check if 'y' is a numerical column
-        y_field = schema[y] if y in schema.names else None
-        if y_field is None or not isinstance(y_field.dataType, NumericType):
-            raise PySparkTypeError(
-                errorClass="PLOT_NOT_NUMERIC_COLUMN_ARGUMENT",
-                messageParameters={
-                    "arg_name": "y",
-                    "arg_type": str(y_field.dataType.__class__.__name__) if y_field else "None",
-                },
-            )
         return self(kind="pie", x=x, y=y, **kwargs)
 
     def box(self, column: Optional[Union[str, List[str]]] = None, **kwargs: Any) -> "Figure":
diff --git a/python/pyspark/sql/plot/plotly.py b/python/pyspark/sql/plot/plotly.py
index 959562b43552a..526a36033e2fc 100644
--- a/python/pyspark/sql/plot/plotly.py
+++ b/python/pyspark/sql/plot/plotly.py
@@ -48,13 +48,34 @@ def plot_pyspark(data: "DataFrame", kind: str, **kwargs: Any) -> "Figure":
 
 
 def plot_pie(data: "DataFrame", **kwargs: Any) -> "Figure":
-    # TODO(SPARK-49530): Support pie subplots with plotly backend
     from plotly import express
 
     pdf = PySparkPlotAccessor.plot_data_map["pie"](data)
     x = kwargs.pop("x", None)
     y = kwargs.pop("y", None)
-    fig = express.pie(pdf, values=y, names=x, **kwargs)
+    subplots = kwargs.pop("subplots", False)
+    if y is None and not subplots:
+        raise PySparkValueError(errorClass="UNSUPPORTED_PIE_PLOT_PARAM", messageParameters={})
+
+    numeric_ys = process_column_param(y, data)
+
+    if subplots:
+        # One pie chart per numeric column
+        from plotly.subplots import make_subplots
+
+        fig = make_subplots(
+            rows=1,
+            cols=len(numeric_ys),
+            # To accommodate domain-based trace - pie chart
+            specs=[[{"type": "domain"}] * len(numeric_ys)],
+        )
+        for i, y_col in enumerate(numeric_ys):
+            subplot_fig = express.pie(pdf, values=y_col, names=x, **kwargs)
+            fig.add_trace(
+                subplot_fig.data[0], row=1, col=i + 1
+            )  # A single pie chart has only one trace
+    else:
+        fig = express.pie(pdf, values=numeric_ys[0], names=x, **kwargs)
 
     return fig
 
@@ -130,7 +151,7 @@ def plot_box(data: "DataFrame", **kwargs: Any) -> "Figure":
 
 
 def plot_kde(data: "DataFrame", **kwargs: Any) -> "Figure":
-    from pyspark.sql.utils import has_numpy
+    from pyspark.testing.utils import have_numpy
     from pyspark.sql.pandas.utils import require_minimum_pandas_version
 
     require_minimum_pandas_version()
@@ -145,7 +166,7 @@ def plot_kde(data: "DataFrame", **kwargs: Any) -> "Figure":
     colnames = process_column_param(kwargs.pop("column", None), data)
     ind = PySparkKdePlotBase.get_ind(data.select(*colnames), kwargs.pop("ind", None))
 
-    if has_numpy:
+    if have_numpy:
         import numpy as np
 
         if isinstance(ind, np.ndarray):
diff --git a/python/pyspark/sql/protobuf/functions.py b/python/pyspark/sql/protobuf/functions.py
index 1e75874e75f9a..ece450a77f4f3 100644
--- a/python/pyspark/sql/protobuf/functions.py
+++ b/python/pyspark/sql/protobuf/functions.py
@@ -149,13 +149,13 @@ def from_protobuf(
         elif descFilePath is not None:
             binary_proto = _read_descriptor_set_file(descFilePath)
         if binary_proto is not None:
-            jc = cast(JVMView, sc._jvm).org.apache.spark.sql.protobuf.functions.from_protobuf(
-                _to_java_column(data), messageName, binary_proto, options or {}
-            )
+            jc = getattr(
+                cast(JVMView, sc._jvm), "org.apache.spark.sql.protobuf.functions"
+            ).from_protobuf(_to_java_column(data), messageName, binary_proto, options or {})
         else:
-            jc = cast(JVMView, sc._jvm).org.apache.spark.sql.protobuf.functions.from_protobuf(
-                _to_java_column(data), messageName, options or {}
-            )
+            jc = getattr(
+                cast(JVMView, sc._jvm), "org.apache.spark.sql.protobuf.functions"
+            ).from_protobuf(_to_java_column(data), messageName, options or {})
     except TypeError as e:
         if str(e) == "'JavaPackage' object is not callable":
             _print_missing_jar("Protobuf", "protobuf", "protobuf", sc.version)
@@ -271,13 +271,13 @@ def to_protobuf(
         elif descFilePath is not None:
             binary_proto = _read_descriptor_set_file(descFilePath)
         if binary_proto is not None:
-            jc = cast(JVMView, sc._jvm).org.apache.spark.sql.protobuf.functions.to_protobuf(
-                _to_java_column(data), messageName, binary_proto, options or {}
-            )
+            jc = getattr(
+                cast(JVMView, sc._jvm), "org.apache.spark.sql.protobuf.functions"
+            ).to_protobuf(_to_java_column(data), messageName, binary_proto, options or {})
         else:
-            jc = cast(JVMView, sc._jvm).org.apache.spark.sql.protobuf.functions.to_protobuf(
-                _to_java_column(data), messageName, options or {}
-            )
+            jc = getattr(
+                cast(JVMView, sc._jvm), "org.apache.spark.sql.protobuf.functions"
+            ).to_protobuf(_to_java_column(data), messageName, options or {})
 
     except TypeError as e:
         if str(e) == "'JavaPackage' object is not callable":
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 4744bdf861d37..96c8f8a475b26 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -215,7 +215,7 @@ def options(self, **options: "OptionalPrimitiveType") -> "DataFrameReader":
         Parameters
         ----------
         **options : dict
-            The dictionary of string keys and prmitive-type values.
+            The dictionary of string keys and primitive-type values.
 
         Examples
         --------
@@ -1174,7 +1174,9 @@ def jdbc(
         if predicates is not None:
             gateway = self._spark._sc._gateway
             assert gateway is not None
-            jpredicates = utils.to_java_array(gateway, gateway.jvm.java.lang.String, predicates)
+            jpredicates = utils.to_java_array(
+                gateway, getattr(gateway.jvm, "java.lang.String"), predicates
+            )
             return self._df(self._jreader.jdbc(url, table, jpredicates, jprop))
         return self._df(self._jreader.jdbc(url, table, jprop))
 
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index e97b844564100..f5bb269c23d6e 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -18,7 +18,7 @@
 import sys
 import warnings
 from collections.abc import Sized
-from functools import reduce
+from functools import reduce, cached_property
 from threading import RLock
 from types import TracebackType
 from typing import (
@@ -58,7 +58,6 @@
     _has_nulltype,
     _merge_type,
     _create_converter,
-    _parse_datatype_string,
     _from_numpy_type,
 )
 from pyspark.errors.exceptions.captured import install_exception_handler
@@ -90,12 +89,6 @@
     from pyspark.sql.connect.client import SparkConnectClient
     from pyspark.sql.connect.shell.progress import ProgressHandler
 
-try:
-    import memory_profiler  # noqa: F401
-
-    has_memory_profiler = True
-except Exception:
-    has_memory_profiler = False
 
 __all__ = ["SparkSession"]
 
@@ -779,7 +772,7 @@ def sparkContext(self) -> "SparkContext":
             """
             return self._sc
 
-    @property
+    @cached_property
     def version(self) -> str:
         """
         The version of Spark on which this application is running.
@@ -800,7 +793,7 @@ def version(self) -> str:
         """
         return self._jsparkSession.version()
 
-    @property
+    @cached_property
     def conf(self) -> RuntimeConfig:
         """Runtime configuration interface for Spark.
 
@@ -828,11 +821,9 @@ def conf(self) -> RuntimeConfig:
         >>> spark.conf.get("key")
         'value'
         """
-        if not hasattr(self, "_conf"):
-            self._conf = RuntimeConfig(self._jsparkSession.conf())
-        return self._conf
+        return RuntimeConfig(self._jsparkSession.conf())
 
-    @property
+    @cached_property
     def catalog(self) -> "Catalog":
         """Interface through which the user may create, drop, alter or query underlying
         databases, tables, functions, etc.
@@ -860,9 +851,7 @@ def catalog(self) -> "Catalog":
         """
         from pyspark.sql.catalog import Catalog
 
-        if not hasattr(self, "_catalog"):
-            self._catalog = Catalog(self)
-        return self._catalog
+        return Catalog(self)
 
     @property
     def udf(self) -> "UDFRegistration":
@@ -1478,7 +1467,9 @@ def createDataFrame(  # type: ignore[misc]
         +-----+---+
         |Alice|  1|
         +-----+---+
-        >>> spark.createDataFrame(pandas.DataFrame([[1, 2]])).collect()  # doctest: +SKIP
+
+        >>> pdf = pandas.DataFrame([[1, 2]])  # doctest: +SKIP
+        >>> spark.createDataFrame(pdf).show()  # doctest: +SKIP
         +---+---+
         |  0|  1|
         +---+---+
@@ -1493,8 +1484,9 @@ def createDataFrame(  # type: ignore[misc]
         +-----+---+
         |Alice|  1|
         +-----+---+
+
         >>> table = pyarrow.table({'0': [1], '1': [2]})  # doctest: +SKIP
-        >>> spark.createDataFrame(table).collect()  # doctest: +SKIP
+        >>> spark.createDataFrame(table).show()  # doctest: +SKIP
         +---+---+
         |  0|  1|
         +---+---+
@@ -1511,7 +1503,7 @@ def createDataFrame(  # type: ignore[misc]
             )
 
         if isinstance(schema, str):
-            schema = cast(Union[AtomicType, StructType, str], _parse_datatype_string(schema))
+            schema = cast(Union[AtomicType, StructType, str], self._parse_ddl(schema))
         elif isinstance(schema, (list, tuple)):
             # Must re-encode any unicode strings to be consistent with StructField names
             schema = [x.encode("utf-8") if not isinstance(x, str) else x for x in schema]
@@ -1913,7 +1905,7 @@ def readStream(self) -> DataStreamReader:
         """
         return DataStreamReader(self)
 
-    @property
+    @cached_property
     def streams(self) -> "StreamingQueryManager":
         """Returns a :class:`StreamingQueryManager` that allows managing all the
         :class:`StreamingQuery` instances active on `this` context.
@@ -1947,10 +1939,7 @@ def streams(self) -> "StreamingQueryManager":
         """
         from pyspark.sql.streaming import StreamingQueryManager
 
-        if hasattr(self, "_sqm"):
-            return self._sqm
-        self._sqm: StreamingQueryManager = StreamingQueryManager(self._jsparkSession.streams())
-        return self._sqm
+        return StreamingQueryManager(self._jsparkSession.streams())
 
     @property
     def tvf(self) -> "TableValuedFunction":
@@ -2211,13 +2200,15 @@ def copyFromLocalToFs(self, local_path: str, dest_path: str) -> None:
             messageParameters={"feature": "SparkSession.copyFromLocalToFs"},
         )
 
-    @remote_only
     def interruptAll(self) -> List[str]:
         """
         Interrupt all operations of this session currently running on the connected server.
 
         .. versionadded:: 3.5.0
 
+        .. versionchanged:: 4.0.0
+            Supports Spark Classic.
+
         Returns
         -------
         list of str
@@ -2227,18 +2218,25 @@ def interruptAll(self) -> List[str]:
         -----
         There is still a possibility of operation finishing just as it is interrupted.
         """
-        raise PySparkRuntimeError(
-            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            messageParameters={"feature": "SparkSession.interruptAll"},
-        )
+        java_list = self._jsparkSession.interruptAll()
+        python_list = list()
+
+        # Use iterator to manually iterate through Java list
+        java_iterator = java_list.iterator()
+        while java_iterator.hasNext():
+            python_list.append(str(java_iterator.next()))
+
+        return python_list
 
-    @remote_only
     def interruptTag(self, tag: str) -> List[str]:
         """
         Interrupt all operations of this session with the given operation tag.
 
         .. versionadded:: 3.5.0
 
+        .. versionchanged:: 4.0.0
+            Supports Spark Classic.
+
         Returns
         -------
         list of str
@@ -2248,18 +2246,25 @@ def interruptTag(self, tag: str) -> List[str]:
         -----
         There is still a possibility of operation finishing just as it is interrupted.
         """
-        raise PySparkRuntimeError(
-            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            messageParameters={"feature": "SparkSession.interruptTag"},
-        )
+        java_list = self._jsparkSession.interruptTag(tag)
+        python_list = list()
+
+        # Use iterator to manually iterate through Java list
+        java_iterator = java_list.iterator()
+        while java_iterator.hasNext():
+            python_list.append(str(java_iterator.next()))
+
+        return python_list
 
-    @remote_only
     def interruptOperation(self, op_id: str) -> List[str]:
         """
         Interrupt an operation of this session with the given operationId.
 
         .. versionadded:: 3.5.0
 
+        .. versionchanged:: 4.0.0
+            Supports Spark Classic.
+
         Returns
         -------
         list of str
@@ -2269,12 +2274,16 @@ def interruptOperation(self, op_id: str) -> List[str]:
         -----
         There is still a possibility of operation finishing just as it is interrupted.
         """
-        raise PySparkRuntimeError(
-            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            messageParameters={"feature": "SparkSession.interruptOperation"},
-        )
+        java_list = self._jsparkSession.interruptOperation(op_id)
+        python_list = list()
+
+        # Use iterator to manually iterate through Java list
+        java_iterator = java_list.iterator()
+        while java_iterator.hasNext():
+            python_list.append(str(java_iterator.next()))
+
+        return python_list
 
-    @remote_only
     def addTag(self, tag: str) -> None:
         """
         Add a tag to be assigned to all the operations started by this thread in this session.
@@ -2289,17 +2298,16 @@ def addTag(self, tag: str) -> None:
 
         .. versionadded:: 3.5.0
 
+        .. versionchanged:: 4.0.0
+            Supports Spark Classic.
+
         Parameters
         ----------
         tag : str
             The tag to be added. Cannot contain ',' (comma) character or be an empty string.
         """
-        raise PySparkRuntimeError(
-            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            messageParameters={"feature": "SparkSession.addTag"},
-        )
+        self._jsparkSession.addTag(tag)
 
-    @remote_only
     def removeTag(self, tag: str) -> None:
         """
         Remove a tag previously added to be assigned to all the operations started by this thread in
@@ -2307,17 +2315,16 @@ def removeTag(self, tag: str) -> None:
 
         .. versionadded:: 3.5.0
 
+        .. versionchanged:: 4.0.0
+            Supports Spark Classic.
+
         Parameters
         ----------
         tag : list of str
             The tag to be removed. Cannot contain ',' (comma) character or be an empty string.
         """
-        raise PySparkRuntimeError(
-            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            messageParameters={"feature": "SparkSession.removeTag"},
-        )
+        self._jsparkSession.removeTag(tag)
 
-    @remote_only
     def getTags(self) -> Set[str]:
         """
         Get the tags that are currently set to be assigned to all the operations started by this
@@ -2325,27 +2332,40 @@ def getTags(self) -> Set[str]:
 
         .. versionadded:: 3.5.0
 
+        .. versionchanged:: 4.0.0
+            Supports Spark Classic.
+
         Returns
         -------
         set of str
             Set of tags of interrupted operations.
         """
-        raise PySparkRuntimeError(
-            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            messageParameters={"feature": "SparkSession.getTags"},
-        )
+        java_set = self._jsparkSession.getTags()
+        python_set = set()
+
+        # Use iterator to manually iterate through Java Set
+        java_iterator = java_set.iterator()
+        while java_iterator.hasNext():
+            python_set.add(str(java_iterator.next()))
+
+        return python_set
 
-    @remote_only
     def clearTags(self) -> None:
         """
         Clear the current thread's operation tags.
 
         .. versionadded:: 3.5.0
+
+        .. versionchanged:: 4.0.0
+            Supports Spark Classic.
         """
-        raise PySparkRuntimeError(
-            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            messageParameters={"feature": "SparkSession.clearTags"},
-        )
+        self._jsparkSession.clearTags()
+
+    def _to_ddl(self, struct: StructType) -> str:
+        return self._sc._to_ddl(struct)
+
+    def _parse_ddl(self, ddl: str) -> DataType:
+        return self._sc._parse_ddl(ddl)
 
 
 def _test() -> None:
diff --git a/python/pyspark/sql/streaming/list_state_client.py b/python/pyspark/sql/streaming/list_state_client.py
index d2152842819a5..cb618d1a691b3 100644
--- a/python/pyspark/sql/streaming/list_state_client.py
+++ b/python/pyspark/sql/streaming/list_state_client.py
@@ -14,10 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from typing import Dict, Iterator, List, Union, cast, Tuple
+from typing import Dict, Iterator, List, Union, Tuple
 
 from pyspark.sql.streaming.stateful_processor_api_client import StatefulProcessorApiClient
-from pyspark.sql.types import StructType, TYPE_CHECKING, _parse_datatype_string
+from pyspark.sql.types import StructType, TYPE_CHECKING
 from pyspark.errors import PySparkRuntimeError
 import uuid
 
@@ -28,8 +28,16 @@
 
 
 class ListStateClient:
-    def __init__(self, stateful_processor_api_client: StatefulProcessorApiClient) -> None:
+    def __init__(
+        self,
+        stateful_processor_api_client: StatefulProcessorApiClient,
+        schema: Union[StructType, str],
+    ) -> None:
         self._stateful_processor_api_client = stateful_processor_api_client
+        if isinstance(schema, str):
+            self.schema = self._stateful_processor_api_client._parse_string_schema(schema)
+        else:
+            self.schema = schema
         # A dictionary to store the mapping between list state name and a tuple of pandas DataFrame
         # and the index of the last row that was read.
         self.pandas_df_dict: Dict[str, Tuple["PandasDataFrameLike", int]] = {}
@@ -105,12 +113,10 @@ def get(self, state_name: str, iterator_id: str) -> Tuple:
         pandas_row = pandas_df.iloc[index]
         return tuple(pandas_row)
 
-    def append_value(self, state_name: str, schema: Union[StructType, str], value: Tuple) -> None:
+    def append_value(self, state_name: str, value: Tuple) -> None:
         import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
 
-        if isinstance(schema, str):
-            schema = cast(StructType, _parse_datatype_string(schema))
-        bytes = self._stateful_processor_api_client._serialize_to_bytes(schema, value)
+        bytes = self._stateful_processor_api_client._serialize_to_bytes(self.schema, value)
         append_value_call = stateMessage.AppendValue(value=bytes)
         list_state_call = stateMessage.ListStateCall(
             stateName=state_name, appendValue=append_value_call
@@ -125,13 +131,9 @@ def append_value(self, state_name: str, schema: Union[StructType, str], value: T
             # TODO(SPARK-49233): Classify user facing errors.
             raise PySparkRuntimeError(f"Error updating value state: " f"{response_message[1]}")
 
-    def append_list(
-        self, state_name: str, schema: Union[StructType, str], values: List[Tuple]
-    ) -> None:
+    def append_list(self, state_name: str, values: List[Tuple]) -> None:
         import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
 
-        if isinstance(schema, str):
-            schema = cast(StructType, _parse_datatype_string(schema))
         append_list_call = stateMessage.AppendList()
         list_state_call = stateMessage.ListStateCall(
             stateName=state_name, appendList=append_list_call
@@ -141,18 +143,16 @@ def append_list(
 
         self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
 
-        self._stateful_processor_api_client._send_arrow_state(schema, values)
+        self._stateful_processor_api_client._send_arrow_state(self.schema, values)
         response_message = self._stateful_processor_api_client._receive_proto_message()
         status = response_message[0]
         if status != 0:
             # TODO(SPARK-49233): Classify user facing errors.
             raise PySparkRuntimeError(f"Error updating value state: " f"{response_message[1]}")
 
-    def put(self, state_name: str, schema: Union[StructType, str], values: List[Tuple]) -> None:
+    def put(self, state_name: str, values: List[Tuple]) -> None:
         import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
 
-        if isinstance(schema, str):
-            schema = cast(StructType, _parse_datatype_string(schema))
         put_call = stateMessage.ListStatePut()
         list_state_call = stateMessage.ListStateCall(stateName=state_name, listStatePut=put_call)
         state_variable_request = stateMessage.StateVariableRequest(listStateCall=list_state_call)
@@ -160,7 +160,7 @@ def put(self, state_name: str, schema: Union[StructType, str], values: List[Tupl
 
         self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
 
-        self._stateful_processor_api_client._send_arrow_state(schema, values)
+        self._stateful_processor_api_client._send_arrow_state(self.schema, values)
         response_message = self._stateful_processor_api_client._receive_proto_message()
         status = response_message[0]
         if status != 0:
diff --git a/python/pyspark/sql/streaming/map_state_client.py b/python/pyspark/sql/streaming/map_state_client.py
index 6ec7448b48634..c4761ddd48a16 100644
--- a/python/pyspark/sql/streaming/map_state_client.py
+++ b/python/pyspark/sql/streaming/map_state_client.py
@@ -14,10 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from typing import Dict, Iterator, Union, cast, Tuple, Optional
+from typing import Dict, Iterator, Union, Tuple, Optional
 
 from pyspark.sql.streaming.stateful_processor_api_client import StatefulProcessorApiClient
-from pyspark.sql.types import StructType, TYPE_CHECKING, _parse_datatype_string
+from pyspark.sql.types import StructType, TYPE_CHECKING
 from pyspark.errors import PySparkRuntimeError
 import uuid
 
@@ -36,11 +36,15 @@ def __init__(
     ) -> None:
         self._stateful_processor_api_client = stateful_processor_api_client
         if isinstance(user_key_schema, str):
-            self.user_key_schema = cast(StructType, _parse_datatype_string(user_key_schema))
+            self.user_key_schema = self._stateful_processor_api_client._parse_string_schema(
+                user_key_schema
+            )
         else:
             self.user_key_schema = user_key_schema
         if isinstance(value_schema, str):
-            self.value_schema = cast(StructType, _parse_datatype_string(value_schema))
+            self.value_schema = self._stateful_processor_api_client._parse_string_schema(
+                value_schema
+            )
         else:
             self.value_schema = value_schema
         # Dictionaries to store the mapping between iterator id and a tuple of pandas DataFrame
diff --git a/python/pyspark/sql/streaming/proto/StateMessage_pb2.py b/python/pyspark/sql/streaming/proto/StateMessage_pb2.py
index 0a54690513a39..20af541f307cd 100644
--- a/python/pyspark/sql/streaming/proto/StateMessage_pb2.py
+++ b/python/pyspark/sql/streaming/proto/StateMessage_pb2.py
@@ -40,7 +40,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n;org/apache/spark/sql/execution/streaming/StateMessage.proto\x12.org.apache.spark.sql.execution.streaming.state"\xa0\x04\n\x0cStateRequest\x12\x18\n\x07version\x18\x01 \x01(\x05R\x07version\x12}\n\x15statefulProcessorCall\x18\x02 \x01(\x0b\x32\x45.org.apache.spark.sql.execution.streaming.state.StatefulProcessorCallH\x00R\x15statefulProcessorCall\x12z\n\x14stateVariableRequest\x18\x03 \x01(\x0b\x32\x44.org.apache.spark.sql.execution.streaming.state.StateVariableRequestH\x00R\x14stateVariableRequest\x12\x8c\x01\n\x1aimplicitGroupingKeyRequest\x18\x04 \x01(\x0b\x32J.org.apache.spark.sql.execution.streaming.state.ImplicitGroupingKeyRequestH\x00R\x1aimplicitGroupingKeyRequest\x12\x62\n\x0ctimerRequest\x18\x05 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.TimerRequestH\x00R\x0ctimerRequestB\x08\n\x06method"i\n\rStateResponse\x12\x1e\n\nstatusCode\x18\x01 \x01(\x05R\nstatusCode\x12"\n\x0c\x65rrorMessage\x18\x02 \x01(\tR\x0c\x65rrorMessage\x12\x14\n\x05value\x18\x03 \x01(\x0cR\x05value"x\n\x1cStateResponseWithLongTypeVal\x12\x1e\n\nstatusCode\x18\x01 \x01(\x05R\nstatusCode\x12"\n\x0c\x65rrorMessage\x18\x02 \x01(\tR\x0c\x65rrorMessage\x12\x14\n\x05value\x18\x03 \x01(\x03R\x05value"\xa0\x05\n\x15StatefulProcessorCall\x12h\n\x0esetHandleState\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.SetHandleStateH\x00R\x0esetHandleState\x12h\n\rgetValueState\x18\x02 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\rgetValueState\x12\x66\n\x0cgetListState\x18\x03 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\x0cgetListState\x12\x64\n\x0bgetMapState\x18\x04 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\x0bgetMapState\x12o\n\x0etimerStateCall\x18\x05 \x01(\x0b\x32\x45.org.apache.spark.sql.execution.streaming.state.TimerStateCallCommandH\x00R\x0etimerStateCall\x12j\n\x0e\x64\x65leteIfExists\x18\x06 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\x0e\x64\x65leteIfExistsB\x08\n\x06method"\xd5\x02\n\x14StateVariableRequest\x12h\n\x0evalueStateCall\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.ValueStateCallH\x00R\x0evalueStateCall\x12\x65\n\rlistStateCall\x18\x02 \x01(\x0b\x32=.org.apache.spark.sql.execution.streaming.state.ListStateCallH\x00R\rlistStateCall\x12\x62\n\x0cmapStateCall\x18\x03 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.MapStateCallH\x00R\x0cmapStateCallB\x08\n\x06method"\x83\x02\n\x1aImplicitGroupingKeyRequest\x12h\n\x0esetImplicitKey\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.SetImplicitKeyH\x00R\x0esetImplicitKey\x12q\n\x11removeImplicitKey\x18\x02 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.RemoveImplicitKeyH\x00R\x11removeImplicitKeyB\x08\n\x06method"\x81\x02\n\x0cTimerRequest\x12q\n\x11timerValueRequest\x18\x01 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.TimerValueRequestH\x00R\x11timerValueRequest\x12t\n\x12\x65xpiryTimerRequest\x18\x02 \x01(\x0b\x32\x42.org.apache.spark.sql.execution.streaming.state.ExpiryTimerRequestH\x00R\x12\x65xpiryTimerRequestB\x08\n\x06method"\xf6\x01\n\x11TimerValueRequest\x12s\n\x12getProcessingTimer\x18\x01 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.GetProcessingTimeH\x00R\x12getProcessingTimer\x12\x62\n\x0cgetWatermark\x18\x02 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.GetWatermarkH\x00R\x0cgetWatermarkB\x08\n\x06method"B\n\x12\x45xpiryTimerRequest\x12,\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03R\x11\x65xpiryTimestampMs"\x13\n\x11GetProcessingTime"\x0e\n\x0cGetWatermark"\xc7\x01\n\x10StateCallCommand\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12\x16\n\x06schema\x18\x02 \x01(\tR\x06schema\x12\x30\n\x13mapStateValueSchema\x18\x03 \x01(\tR\x13mapStateValueSchema\x12K\n\x03ttl\x18\x04 \x01(\x0b\x32\x39.org.apache.spark.sql.execution.streaming.state.TTLConfigR\x03ttl"\xa7\x02\n\x15TimerStateCallCommand\x12[\n\x08register\x18\x01 \x01(\x0b\x32=.org.apache.spark.sql.execution.streaming.state.RegisterTimerH\x00R\x08register\x12U\n\x06\x64\x65lete\x18\x02 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.DeleteTimerH\x00R\x06\x64\x65lete\x12P\n\x04list\x18\x03 \x01(\x0b\x32:.org.apache.spark.sql.execution.streaming.state.ListTimersH\x00R\x04listB\x08\n\x06method"\x92\x03\n\x0eValueStateCall\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12P\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00R\x06\x65xists\x12G\n\x03get\x18\x03 \x01(\x0b\x32\x33.org.apache.spark.sql.execution.streaming.state.GetH\x00R\x03get\x12n\n\x10valueStateUpdate\x18\x04 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.ValueStateUpdateH\x00R\x10valueStateUpdate\x12M\n\x05\x63lear\x18\x05 \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00R\x05\x63learB\x08\n\x06method"\xdf\x04\n\rListStateCall\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12P\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00R\x06\x65xists\x12\x62\n\x0clistStateGet\x18\x03 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.ListStateGetH\x00R\x0clistStateGet\x12\x62\n\x0clistStatePut\x18\x04 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.ListStatePutH\x00R\x0clistStatePut\x12_\n\x0b\x61ppendValue\x18\x05 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.AppendValueH\x00R\x0b\x61ppendValue\x12\\\n\nappendList\x18\x06 \x01(\x0b\x32:.org.apache.spark.sql.execution.streaming.state.AppendListH\x00R\nappendList\x12M\n\x05\x63lear\x18\x07 \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00R\x05\x63learB\x08\n\x06method"\xc2\x06\n\x0cMapStateCall\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12P\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00R\x06\x65xists\x12V\n\x08getValue\x18\x03 \x01(\x0b\x32\x38.org.apache.spark.sql.execution.streaming.state.GetValueH\x00R\x08getValue\x12_\n\x0b\x63ontainsKey\x18\x04 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.ContainsKeyH\x00R\x0b\x63ontainsKey\x12_\n\x0bupdateValue\x18\x05 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.UpdateValueH\x00R\x0bupdateValue\x12V\n\x08iterator\x18\x06 \x01(\x0b\x32\x38.org.apache.spark.sql.execution.streaming.state.IteratorH\x00R\x08iterator\x12J\n\x04keys\x18\x07 \x01(\x0b\x32\x34.org.apache.spark.sql.execution.streaming.state.KeysH\x00R\x04keys\x12P\n\x06values\x18\x08 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ValuesH\x00R\x06values\x12Y\n\tremoveKey\x18\t \x01(\x0b\x32\x39.org.apache.spark.sql.execution.streaming.state.RemoveKeyH\x00R\tremoveKey\x12M\n\x05\x63lear\x18\n \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00R\x05\x63learB\x08\n\x06method""\n\x0eSetImplicitKey\x12\x10\n\x03key\x18\x01 \x01(\x0cR\x03key"\x13\n\x11RemoveImplicitKey"\x08\n\x06\x45xists"\x05\n\x03Get"=\n\rRegisterTimer\x12,\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03R\x11\x65xpiryTimestampMs";\n\x0b\x44\x65leteTimer\x12,\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03R\x11\x65xpiryTimestampMs",\n\nListTimers\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"(\n\x10ValueStateUpdate\x12\x14\n\x05value\x18\x01 \x01(\x0cR\x05value"\x07\n\x05\x43lear".\n\x0cListStateGet\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"\x0e\n\x0cListStatePut"#\n\x0b\x41ppendValue\x12\x14\n\x05value\x18\x01 \x01(\x0cR\x05value"\x0c\n\nAppendList"$\n\x08GetValue\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey"\'\n\x0b\x43ontainsKey\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey"=\n\x0bUpdateValue\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey\x12\x14\n\x05value\x18\x02 \x01(\x0cR\x05value"*\n\x08Iterator\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"&\n\x04Keys\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"(\n\x06Values\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"%\n\tRemoveKey\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey"c\n\x0eSetHandleState\x12Q\n\x05state\x18\x01 \x01(\x0e\x32;.org.apache.spark.sql.execution.streaming.state.HandleStateR\x05state"+\n\tTTLConfig\x12\x1e\n\ndurationMs\x18\x01 \x01(\x05R\ndurationMs*`\n\x0bHandleState\x12\x0b\n\x07\x43REATED\x10\x00\x12\x0f\n\x0bINITIALIZED\x10\x01\x12\x12\n\x0e\x44\x41TA_PROCESSED\x10\x02\x12\x13\n\x0fTIMER_PROCESSED\x10\x03\x12\n\n\x06\x43LOSED\x10\x04\x62\x06proto3'
+    b'\n;org/apache/spark/sql/execution/streaming/StateMessage.proto\x12.org.apache.spark.sql.execution.streaming.state"\x84\x05\n\x0cStateRequest\x12\x18\n\x07version\x18\x01 \x01(\x05R\x07version\x12}\n\x15statefulProcessorCall\x18\x02 \x01(\x0b\x32\x45.org.apache.spark.sql.execution.streaming.state.StatefulProcessorCallH\x00R\x15statefulProcessorCall\x12z\n\x14stateVariableRequest\x18\x03 \x01(\x0b\x32\x44.org.apache.spark.sql.execution.streaming.state.StateVariableRequestH\x00R\x14stateVariableRequest\x12\x8c\x01\n\x1aimplicitGroupingKeyRequest\x18\x04 \x01(\x0b\x32J.org.apache.spark.sql.execution.streaming.state.ImplicitGroupingKeyRequestH\x00R\x1aimplicitGroupingKeyRequest\x12\x62\n\x0ctimerRequest\x18\x05 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.TimerRequestH\x00R\x0ctimerRequest\x12\x62\n\x0cutilsRequest\x18\x06 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.UtilsRequestH\x00R\x0cutilsRequestB\x08\n\x06method"i\n\rStateResponse\x12\x1e\n\nstatusCode\x18\x01 \x01(\x05R\nstatusCode\x12"\n\x0c\x65rrorMessage\x18\x02 \x01(\tR\x0c\x65rrorMessage\x12\x14\n\x05value\x18\x03 \x01(\x0cR\x05value"x\n\x1cStateResponseWithLongTypeVal\x12\x1e\n\nstatusCode\x18\x01 \x01(\x05R\nstatusCode\x12"\n\x0c\x65rrorMessage\x18\x02 \x01(\tR\x0c\x65rrorMessage\x12\x14\n\x05value\x18\x03 \x01(\x03R\x05value"z\n\x1eStateResponseWithStringTypeVal\x12\x1e\n\nstatusCode\x18\x01 \x01(\x05R\nstatusCode\x12"\n\x0c\x65rrorMessage\x18\x02 \x01(\tR\x0c\x65rrorMessage\x12\x14\n\x05value\x18\x03 \x01(\tR\x05value"\xa0\x05\n\x15StatefulProcessorCall\x12h\n\x0esetHandleState\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.SetHandleStateH\x00R\x0esetHandleState\x12h\n\rgetValueState\x18\x02 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\rgetValueState\x12\x66\n\x0cgetListState\x18\x03 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\x0cgetListState\x12\x64\n\x0bgetMapState\x18\x04 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\x0bgetMapState\x12o\n\x0etimerStateCall\x18\x05 \x01(\x0b\x32\x45.org.apache.spark.sql.execution.streaming.state.TimerStateCallCommandH\x00R\x0etimerStateCall\x12j\n\x0e\x64\x65leteIfExists\x18\x06 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\x0e\x64\x65leteIfExistsB\x08\n\x06method"\xd5\x02\n\x14StateVariableRequest\x12h\n\x0evalueStateCall\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.ValueStateCallH\x00R\x0evalueStateCall\x12\x65\n\rlistStateCall\x18\x02 \x01(\x0b\x32=.org.apache.spark.sql.execution.streaming.state.ListStateCallH\x00R\rlistStateCall\x12\x62\n\x0cmapStateCall\x18\x03 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.MapStateCallH\x00R\x0cmapStateCallB\x08\n\x06method"\x83\x02\n\x1aImplicitGroupingKeyRequest\x12h\n\x0esetImplicitKey\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.SetImplicitKeyH\x00R\x0esetImplicitKey\x12q\n\x11removeImplicitKey\x18\x02 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.RemoveImplicitKeyH\x00R\x11removeImplicitKeyB\x08\n\x06method"\x81\x02\n\x0cTimerRequest\x12q\n\x11timerValueRequest\x18\x01 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.TimerValueRequestH\x00R\x11timerValueRequest\x12t\n\x12\x65xpiryTimerRequest\x18\x02 \x01(\x0b\x32\x42.org.apache.spark.sql.execution.streaming.state.ExpiryTimerRequestH\x00R\x12\x65xpiryTimerRequestB\x08\n\x06method"\xf6\x01\n\x11TimerValueRequest\x12s\n\x12getProcessingTimer\x18\x01 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.GetProcessingTimeH\x00R\x12getProcessingTimer\x12\x62\n\x0cgetWatermark\x18\x02 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.GetWatermarkH\x00R\x0cgetWatermarkB\x08\n\x06method"B\n\x12\x45xpiryTimerRequest\x12,\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03R\x11\x65xpiryTimestampMs"\x13\n\x11GetProcessingTime"\x0e\n\x0cGetWatermark"\x8b\x01\n\x0cUtilsRequest\x12q\n\x11parseStringSchema\x18\x01 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.ParseStringSchemaH\x00R\x11parseStringSchemaB\x08\n\x06method"+\n\x11ParseStringSchema\x12\x16\n\x06schema\x18\x01 \x01(\tR\x06schema"\xc7\x01\n\x10StateCallCommand\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12\x16\n\x06schema\x18\x02 \x01(\tR\x06schema\x12\x30\n\x13mapStateValueSchema\x18\x03 \x01(\tR\x13mapStateValueSchema\x12K\n\x03ttl\x18\x04 \x01(\x0b\x32\x39.org.apache.spark.sql.execution.streaming.state.TTLConfigR\x03ttl"\xa7\x02\n\x15TimerStateCallCommand\x12[\n\x08register\x18\x01 \x01(\x0b\x32=.org.apache.spark.sql.execution.streaming.state.RegisterTimerH\x00R\x08register\x12U\n\x06\x64\x65lete\x18\x02 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.DeleteTimerH\x00R\x06\x64\x65lete\x12P\n\x04list\x18\x03 \x01(\x0b\x32:.org.apache.spark.sql.execution.streaming.state.ListTimersH\x00R\x04listB\x08\n\x06method"\x92\x03\n\x0eValueStateCall\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12P\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00R\x06\x65xists\x12G\n\x03get\x18\x03 \x01(\x0b\x32\x33.org.apache.spark.sql.execution.streaming.state.GetH\x00R\x03get\x12n\n\x10valueStateUpdate\x18\x04 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.ValueStateUpdateH\x00R\x10valueStateUpdate\x12M\n\x05\x63lear\x18\x05 \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00R\x05\x63learB\x08\n\x06method"\xdf\x04\n\rListStateCall\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12P\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00R\x06\x65xists\x12\x62\n\x0clistStateGet\x18\x03 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.ListStateGetH\x00R\x0clistStateGet\x12\x62\n\x0clistStatePut\x18\x04 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.ListStatePutH\x00R\x0clistStatePut\x12_\n\x0b\x61ppendValue\x18\x05 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.AppendValueH\x00R\x0b\x61ppendValue\x12\\\n\nappendList\x18\x06 \x01(\x0b\x32:.org.apache.spark.sql.execution.streaming.state.AppendListH\x00R\nappendList\x12M\n\x05\x63lear\x18\x07 \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00R\x05\x63learB\x08\n\x06method"\xc2\x06\n\x0cMapStateCall\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12P\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00R\x06\x65xists\x12V\n\x08getValue\x18\x03 \x01(\x0b\x32\x38.org.apache.spark.sql.execution.streaming.state.GetValueH\x00R\x08getValue\x12_\n\x0b\x63ontainsKey\x18\x04 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.ContainsKeyH\x00R\x0b\x63ontainsKey\x12_\n\x0bupdateValue\x18\x05 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.UpdateValueH\x00R\x0bupdateValue\x12V\n\x08iterator\x18\x06 \x01(\x0b\x32\x38.org.apache.spark.sql.execution.streaming.state.IteratorH\x00R\x08iterator\x12J\n\x04keys\x18\x07 \x01(\x0b\x32\x34.org.apache.spark.sql.execution.streaming.state.KeysH\x00R\x04keys\x12P\n\x06values\x18\x08 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ValuesH\x00R\x06values\x12Y\n\tremoveKey\x18\t \x01(\x0b\x32\x39.org.apache.spark.sql.execution.streaming.state.RemoveKeyH\x00R\tremoveKey\x12M\n\x05\x63lear\x18\n \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00R\x05\x63learB\x08\n\x06method""\n\x0eSetImplicitKey\x12\x10\n\x03key\x18\x01 \x01(\x0cR\x03key"\x13\n\x11RemoveImplicitKey"\x08\n\x06\x45xists"\x05\n\x03Get"=\n\rRegisterTimer\x12,\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03R\x11\x65xpiryTimestampMs";\n\x0b\x44\x65leteTimer\x12,\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03R\x11\x65xpiryTimestampMs",\n\nListTimers\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"(\n\x10ValueStateUpdate\x12\x14\n\x05value\x18\x01 \x01(\x0cR\x05value"\x07\n\x05\x43lear".\n\x0cListStateGet\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"\x0e\n\x0cListStatePut"#\n\x0b\x41ppendValue\x12\x14\n\x05value\x18\x01 \x01(\x0cR\x05value"\x0c\n\nAppendList"$\n\x08GetValue\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey"\'\n\x0b\x43ontainsKey\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey"=\n\x0bUpdateValue\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey\x12\x14\n\x05value\x18\x02 \x01(\x0cR\x05value"*\n\x08Iterator\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"&\n\x04Keys\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"(\n\x06Values\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"%\n\tRemoveKey\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey"c\n\x0eSetHandleState\x12Q\n\x05state\x18\x01 \x01(\x0e\x32;.org.apache.spark.sql.execution.streaming.state.HandleStateR\x05state"+\n\tTTLConfig\x12\x1e\n\ndurationMs\x18\x01 \x01(\x05R\ndurationMs*n\n\x0bHandleState\x12\x0c\n\x08PRE_INIT\x10\x00\x12\x0b\n\x07\x43REATED\x10\x01\x12\x0f\n\x0bINITIALIZED\x10\x02\x12\x12\n\x0e\x44\x41TA_PROCESSED\x10\x03\x12\x13\n\x0fTIMER_PROCESSED\x10\x04\x12\n\n\x06\x43LOSED\x10\x05\x62\x06proto3'
 )
 
 _globals = globals()
@@ -50,82 +50,88 @@
 )
 if not _descriptor._USE_C_DESCRIPTORS:
     DESCRIPTOR._loaded_options = None
-    _globals["_HANDLESTATE"]._serialized_start = 5997
-    _globals["_HANDLESTATE"]._serialized_end = 6093
+    _globals["_HANDLESTATE"]._serialized_start = 6408
+    _globals["_HANDLESTATE"]._serialized_end = 6518
     _globals["_STATEREQUEST"]._serialized_start = 112
-    _globals["_STATEREQUEST"]._serialized_end = 656
-    _globals["_STATERESPONSE"]._serialized_start = 658
-    _globals["_STATERESPONSE"]._serialized_end = 763
-    _globals["_STATERESPONSEWITHLONGTYPEVAL"]._serialized_start = 765
-    _globals["_STATERESPONSEWITHLONGTYPEVAL"]._serialized_end = 885
-    _globals["_STATEFULPROCESSORCALL"]._serialized_start = 888
-    _globals["_STATEFULPROCESSORCALL"]._serialized_end = 1560
-    _globals["_STATEVARIABLEREQUEST"]._serialized_start = 1563
-    _globals["_STATEVARIABLEREQUEST"]._serialized_end = 1904
-    _globals["_IMPLICITGROUPINGKEYREQUEST"]._serialized_start = 1907
-    _globals["_IMPLICITGROUPINGKEYREQUEST"]._serialized_end = 2166
-    _globals["_TIMERREQUEST"]._serialized_start = 2169
-    _globals["_TIMERREQUEST"]._serialized_end = 2426
-    _globals["_TIMERVALUEREQUEST"]._serialized_start = 2429
-    _globals["_TIMERVALUEREQUEST"]._serialized_end = 2675
-    _globals["_EXPIRYTIMERREQUEST"]._serialized_start = 2677
-    _globals["_EXPIRYTIMERREQUEST"]._serialized_end = 2743
-    _globals["_GETPROCESSINGTIME"]._serialized_start = 2745
-    _globals["_GETPROCESSINGTIME"]._serialized_end = 2764
-    _globals["_GETWATERMARK"]._serialized_start = 2766
-    _globals["_GETWATERMARK"]._serialized_end = 2780
-    _globals["_STATECALLCOMMAND"]._serialized_start = 2783
-    _globals["_STATECALLCOMMAND"]._serialized_end = 2982
-    _globals["_TIMERSTATECALLCOMMAND"]._serialized_start = 2985
-    _globals["_TIMERSTATECALLCOMMAND"]._serialized_end = 3280
-    _globals["_VALUESTATECALL"]._serialized_start = 3283
-    _globals["_VALUESTATECALL"]._serialized_end = 3685
-    _globals["_LISTSTATECALL"]._serialized_start = 3688
-    _globals["_LISTSTATECALL"]._serialized_end = 4295
-    _globals["_MAPSTATECALL"]._serialized_start = 4298
-    _globals["_MAPSTATECALL"]._serialized_end = 5132
-    _globals["_SETIMPLICITKEY"]._serialized_start = 5134
-    _globals["_SETIMPLICITKEY"]._serialized_end = 5168
-    _globals["_REMOVEIMPLICITKEY"]._serialized_start = 5170
-    _globals["_REMOVEIMPLICITKEY"]._serialized_end = 5189
-    _globals["_EXISTS"]._serialized_start = 5191
-    _globals["_EXISTS"]._serialized_end = 5199
-    _globals["_GET"]._serialized_start = 5201
-    _globals["_GET"]._serialized_end = 5206
-    _globals["_REGISTERTIMER"]._serialized_start = 5208
-    _globals["_REGISTERTIMER"]._serialized_end = 5269
-    _globals["_DELETETIMER"]._serialized_start = 5271
-    _globals["_DELETETIMER"]._serialized_end = 5330
-    _globals["_LISTTIMERS"]._serialized_start = 5332
-    _globals["_LISTTIMERS"]._serialized_end = 5376
-    _globals["_VALUESTATEUPDATE"]._serialized_start = 5378
-    _globals["_VALUESTATEUPDATE"]._serialized_end = 5418
-    _globals["_CLEAR"]._serialized_start = 5420
-    _globals["_CLEAR"]._serialized_end = 5427
-    _globals["_LISTSTATEGET"]._serialized_start = 5429
-    _globals["_LISTSTATEGET"]._serialized_end = 5475
-    _globals["_LISTSTATEPUT"]._serialized_start = 5477
-    _globals["_LISTSTATEPUT"]._serialized_end = 5491
-    _globals["_APPENDVALUE"]._serialized_start = 5493
-    _globals["_APPENDVALUE"]._serialized_end = 5528
-    _globals["_APPENDLIST"]._serialized_start = 5530
-    _globals["_APPENDLIST"]._serialized_end = 5542
-    _globals["_GETVALUE"]._serialized_start = 5544
-    _globals["_GETVALUE"]._serialized_end = 5580
-    _globals["_CONTAINSKEY"]._serialized_start = 5582
-    _globals["_CONTAINSKEY"]._serialized_end = 5621
-    _globals["_UPDATEVALUE"]._serialized_start = 5623
-    _globals["_UPDATEVALUE"]._serialized_end = 5684
-    _globals["_ITERATOR"]._serialized_start = 5686
-    _globals["_ITERATOR"]._serialized_end = 5728
-    _globals["_KEYS"]._serialized_start = 5730
-    _globals["_KEYS"]._serialized_end = 5768
-    _globals["_VALUES"]._serialized_start = 5770
-    _globals["_VALUES"]._serialized_end = 5810
-    _globals["_REMOVEKEY"]._serialized_start = 5812
-    _globals["_REMOVEKEY"]._serialized_end = 5849
-    _globals["_SETHANDLESTATE"]._serialized_start = 5851
-    _globals["_SETHANDLESTATE"]._serialized_end = 5950
-    _globals["_TTLCONFIG"]._serialized_start = 5952
-    _globals["_TTLCONFIG"]._serialized_end = 5995
+    _globals["_STATEREQUEST"]._serialized_end = 756
+    _globals["_STATERESPONSE"]._serialized_start = 758
+    _globals["_STATERESPONSE"]._serialized_end = 863
+    _globals["_STATERESPONSEWITHLONGTYPEVAL"]._serialized_start = 865
+    _globals["_STATERESPONSEWITHLONGTYPEVAL"]._serialized_end = 985
+    _globals["_STATERESPONSEWITHSTRINGTYPEVAL"]._serialized_start = 987
+    _globals["_STATERESPONSEWITHSTRINGTYPEVAL"]._serialized_end = 1109
+    _globals["_STATEFULPROCESSORCALL"]._serialized_start = 1112
+    _globals["_STATEFULPROCESSORCALL"]._serialized_end = 1784
+    _globals["_STATEVARIABLEREQUEST"]._serialized_start = 1787
+    _globals["_STATEVARIABLEREQUEST"]._serialized_end = 2128
+    _globals["_IMPLICITGROUPINGKEYREQUEST"]._serialized_start = 2131
+    _globals["_IMPLICITGROUPINGKEYREQUEST"]._serialized_end = 2390
+    _globals["_TIMERREQUEST"]._serialized_start = 2393
+    _globals["_TIMERREQUEST"]._serialized_end = 2650
+    _globals["_TIMERVALUEREQUEST"]._serialized_start = 2653
+    _globals["_TIMERVALUEREQUEST"]._serialized_end = 2899
+    _globals["_EXPIRYTIMERREQUEST"]._serialized_start = 2901
+    _globals["_EXPIRYTIMERREQUEST"]._serialized_end = 2967
+    _globals["_GETPROCESSINGTIME"]._serialized_start = 2969
+    _globals["_GETPROCESSINGTIME"]._serialized_end = 2988
+    _globals["_GETWATERMARK"]._serialized_start = 2990
+    _globals["_GETWATERMARK"]._serialized_end = 3004
+    _globals["_UTILSREQUEST"]._serialized_start = 3007
+    _globals["_UTILSREQUEST"]._serialized_end = 3146
+    _globals["_PARSESTRINGSCHEMA"]._serialized_start = 3148
+    _globals["_PARSESTRINGSCHEMA"]._serialized_end = 3191
+    _globals["_STATECALLCOMMAND"]._serialized_start = 3194
+    _globals["_STATECALLCOMMAND"]._serialized_end = 3393
+    _globals["_TIMERSTATECALLCOMMAND"]._serialized_start = 3396
+    _globals["_TIMERSTATECALLCOMMAND"]._serialized_end = 3691
+    _globals["_VALUESTATECALL"]._serialized_start = 3694
+    _globals["_VALUESTATECALL"]._serialized_end = 4096
+    _globals["_LISTSTATECALL"]._serialized_start = 4099
+    _globals["_LISTSTATECALL"]._serialized_end = 4706
+    _globals["_MAPSTATECALL"]._serialized_start = 4709
+    _globals["_MAPSTATECALL"]._serialized_end = 5543
+    _globals["_SETIMPLICITKEY"]._serialized_start = 5545
+    _globals["_SETIMPLICITKEY"]._serialized_end = 5579
+    _globals["_REMOVEIMPLICITKEY"]._serialized_start = 5581
+    _globals["_REMOVEIMPLICITKEY"]._serialized_end = 5600
+    _globals["_EXISTS"]._serialized_start = 5602
+    _globals["_EXISTS"]._serialized_end = 5610
+    _globals["_GET"]._serialized_start = 5612
+    _globals["_GET"]._serialized_end = 5617
+    _globals["_REGISTERTIMER"]._serialized_start = 5619
+    _globals["_REGISTERTIMER"]._serialized_end = 5680
+    _globals["_DELETETIMER"]._serialized_start = 5682
+    _globals["_DELETETIMER"]._serialized_end = 5741
+    _globals["_LISTTIMERS"]._serialized_start = 5743
+    _globals["_LISTTIMERS"]._serialized_end = 5787
+    _globals["_VALUESTATEUPDATE"]._serialized_start = 5789
+    _globals["_VALUESTATEUPDATE"]._serialized_end = 5829
+    _globals["_CLEAR"]._serialized_start = 5831
+    _globals["_CLEAR"]._serialized_end = 5838
+    _globals["_LISTSTATEGET"]._serialized_start = 5840
+    _globals["_LISTSTATEGET"]._serialized_end = 5886
+    _globals["_LISTSTATEPUT"]._serialized_start = 5888
+    _globals["_LISTSTATEPUT"]._serialized_end = 5902
+    _globals["_APPENDVALUE"]._serialized_start = 5904
+    _globals["_APPENDVALUE"]._serialized_end = 5939
+    _globals["_APPENDLIST"]._serialized_start = 5941
+    _globals["_APPENDLIST"]._serialized_end = 5953
+    _globals["_GETVALUE"]._serialized_start = 5955
+    _globals["_GETVALUE"]._serialized_end = 5991
+    _globals["_CONTAINSKEY"]._serialized_start = 5993
+    _globals["_CONTAINSKEY"]._serialized_end = 6032
+    _globals["_UPDATEVALUE"]._serialized_start = 6034
+    _globals["_UPDATEVALUE"]._serialized_end = 6095
+    _globals["_ITERATOR"]._serialized_start = 6097
+    _globals["_ITERATOR"]._serialized_end = 6139
+    _globals["_KEYS"]._serialized_start = 6141
+    _globals["_KEYS"]._serialized_end = 6179
+    _globals["_VALUES"]._serialized_start = 6181
+    _globals["_VALUES"]._serialized_end = 6221
+    _globals["_REMOVEKEY"]._serialized_start = 6223
+    _globals["_REMOVEKEY"]._serialized_end = 6260
+    _globals["_SETHANDLESTATE"]._serialized_start = 6262
+    _globals["_SETHANDLESTATE"]._serialized_end = 6361
+    _globals["_TTLCONFIG"]._serialized_start = 6363
+    _globals["_TTLCONFIG"]._serialized_end = 6406
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/streaming/proto/StateMessage_pb2.pyi b/python/pyspark/sql/streaming/proto/StateMessage_pb2.pyi
index 52f66928294cb..ac4b03b820349 100644
--- a/python/pyspark/sql/streaming/proto/StateMessage_pb2.pyi
+++ b/python/pyspark/sql/streaming/proto/StateMessage_pb2.pyi
@@ -56,19 +56,21 @@ class _HandleStateEnumTypeWrapper(
     builtins.type,
 ):  # noqa: F821
     DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
-    CREATED: _HandleState.ValueType  # 0
-    INITIALIZED: _HandleState.ValueType  # 1
-    DATA_PROCESSED: _HandleState.ValueType  # 2
-    TIMER_PROCESSED: _HandleState.ValueType  # 3
-    CLOSED: _HandleState.ValueType  # 4
+    PRE_INIT: _HandleState.ValueType  # 0
+    CREATED: _HandleState.ValueType  # 1
+    INITIALIZED: _HandleState.ValueType  # 2
+    DATA_PROCESSED: _HandleState.ValueType  # 3
+    TIMER_PROCESSED: _HandleState.ValueType  # 4
+    CLOSED: _HandleState.ValueType  # 5
 
 class HandleState(_HandleState, metaclass=_HandleStateEnumTypeWrapper): ...
 
-CREATED: HandleState.ValueType  # 0
-INITIALIZED: HandleState.ValueType  # 1
-DATA_PROCESSED: HandleState.ValueType  # 2
-TIMER_PROCESSED: HandleState.ValueType  # 3
-CLOSED: HandleState.ValueType  # 4
+PRE_INIT: HandleState.ValueType  # 0
+CREATED: HandleState.ValueType  # 1
+INITIALIZED: HandleState.ValueType  # 2
+DATA_PROCESSED: HandleState.ValueType  # 3
+TIMER_PROCESSED: HandleState.ValueType  # 4
+CLOSED: HandleState.ValueType  # 5
 global___HandleState = HandleState
 
 class StateRequest(google.protobuf.message.Message):
@@ -79,6 +81,7 @@ class StateRequest(google.protobuf.message.Message):
     STATEVARIABLEREQUEST_FIELD_NUMBER: builtins.int
     IMPLICITGROUPINGKEYREQUEST_FIELD_NUMBER: builtins.int
     TIMERREQUEST_FIELD_NUMBER: builtins.int
+    UTILSREQUEST_FIELD_NUMBER: builtins.int
     version: builtins.int
     @property
     def statefulProcessorCall(self) -> global___StatefulProcessorCall: ...
@@ -88,6 +91,8 @@ class StateRequest(google.protobuf.message.Message):
     def implicitGroupingKeyRequest(self) -> global___ImplicitGroupingKeyRequest: ...
     @property
     def timerRequest(self) -> global___TimerRequest: ...
+    @property
+    def utilsRequest(self) -> global___UtilsRequest: ...
     def __init__(
         self,
         *,
@@ -96,6 +101,7 @@ class StateRequest(google.protobuf.message.Message):
         stateVariableRequest: global___StateVariableRequest | None = ...,
         implicitGroupingKeyRequest: global___ImplicitGroupingKeyRequest | None = ...,
         timerRequest: global___TimerRequest | None = ...,
+        utilsRequest: global___UtilsRequest | None = ...,
     ) -> None: ...
     def HasField(
         self,
@@ -110,6 +116,8 @@ class StateRequest(google.protobuf.message.Message):
             b"statefulProcessorCall",
             "timerRequest",
             b"timerRequest",
+            "utilsRequest",
+            b"utilsRequest",
         ],
     ) -> builtins.bool: ...
     def ClearField(
@@ -125,6 +133,8 @@ class StateRequest(google.protobuf.message.Message):
             b"statefulProcessorCall",
             "timerRequest",
             b"timerRequest",
+            "utilsRequest",
+            b"utilsRequest",
             "version",
             b"version",
         ],
@@ -137,6 +147,7 @@ class StateRequest(google.protobuf.message.Message):
             "stateVariableRequest",
             "implicitGroupingKeyRequest",
             "timerRequest",
+            "utilsRequest",
         ]
         | None
     ): ...
@@ -193,6 +204,31 @@ class StateResponseWithLongTypeVal(google.protobuf.message.Message):
 
 global___StateResponseWithLongTypeVal = StateResponseWithLongTypeVal
 
+class StateResponseWithStringTypeVal(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    STATUSCODE_FIELD_NUMBER: builtins.int
+    ERRORMESSAGE_FIELD_NUMBER: builtins.int
+    VALUE_FIELD_NUMBER: builtins.int
+    statusCode: builtins.int
+    errorMessage: builtins.str
+    value: builtins.str
+    def __init__(
+        self,
+        *,
+        statusCode: builtins.int = ...,
+        errorMessage: builtins.str = ...,
+        value: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "errorMessage", b"errorMessage", "statusCode", b"statusCode", "value", b"value"
+        ],
+    ) -> None: ...
+
+global___StateResponseWithStringTypeVal = StateResponseWithStringTypeVal
+
 class StatefulProcessorCall(google.protobuf.message.Message):
     DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
@@ -492,6 +528,49 @@ class GetWatermark(google.protobuf.message.Message):
 
 global___GetWatermark = GetWatermark
 
+class UtilsRequest(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    PARSESTRINGSCHEMA_FIELD_NUMBER: builtins.int
+    @property
+    def parseStringSchema(self) -> global___ParseStringSchema: ...
+    def __init__(
+        self,
+        *,
+        parseStringSchema: global___ParseStringSchema | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "method", b"method", "parseStringSchema", b"parseStringSchema"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "method", b"method", "parseStringSchema", b"parseStringSchema"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["method", b"method"]
+    ) -> typing_extensions.Literal["parseStringSchema"] | None: ...
+
+global___UtilsRequest = UtilsRequest
+
+class ParseStringSchema(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    SCHEMA_FIELD_NUMBER: builtins.int
+    schema: builtins.str
+    def __init__(
+        self,
+        *,
+        schema: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["schema", b"schema"]) -> None: ...
+
+global___ParseStringSchema = ParseStringSchema
+
 class StateCallCommand(google.protobuf.message.Message):
     DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
diff --git a/python/pyspark/sql/streaming/readwriter.py b/python/pyspark/sql/streaming/readwriter.py
index 6aa01d2f83a42..69282dce37afe 100644
--- a/python/pyspark/sql/streaming/readwriter.py
+++ b/python/pyspark/sql/streaming/readwriter.py
@@ -1317,9 +1317,9 @@ def trigger(
                     },
                 )
             interval = processingTime.strip()
-            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.ProcessingTime(
-                interval
-            )
+            jTrigger = getattr(
+                self._spark._sc._jvm, "org.apache.spark.sql.streaming.Trigger"
+            ).ProcessingTime(interval)
 
         elif once is not None:
             if once is not True:
@@ -1328,7 +1328,9 @@ def trigger(
                     messageParameters={"arg_name": "once", "arg_value": str(once)},
                 )
 
-            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.Once()
+            jTrigger = getattr(
+                self._spark._sc._jvm, "org.apache.spark.sql.streaming.Trigger"
+            ).Once()
 
         elif continuous is not None:
             if type(continuous) != str or len(continuous.strip()) == 0:
@@ -1337,16 +1339,18 @@ def trigger(
                     messageParameters={"arg_name": "continuous", "arg_value": str(continuous)},
                 )
             interval = continuous.strip()
-            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.Continuous(
-                interval
-            )
+            jTrigger = getattr(
+                self._spark._sc._jvm, "org.apache.spark.sql.streaming.Trigger"
+            ).Continuous(interval)
         else:
             if availableNow is not True:
                 raise PySparkValueError(
                     errorClass="VALUE_NOT_TRUE",
                     messageParameters={"arg_name": "availableNow", "arg_value": str(availableNow)},
                 )
-            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.AvailableNow()
+            jTrigger = getattr(
+                self._spark._sc._jvm, "org.apache.spark.sql.streaming.Trigger"
+            ).AvailableNow()
 
         self._jwrite = self._jwrite.trigger(jTrigger)
         return self
@@ -1557,11 +1561,9 @@ def foreach(self, f: Union[Callable[[Row], None], "SupportsProcess"]) -> "DataSt
         serializer = AutoBatchedSerializer(CPickleSerializer())
         wrapped_func = _wrap_function(self._spark._sc, func, serializer, serializer)
         assert self._spark._sc._jvm is not None
-        jForeachWriter = (
-            self._spark._sc._jvm.org.apache.spark.sql.execution.python.PythonForeachWriter(
-                wrapped_func, self._df._jdf.schema()
-            )
-        )
+        jForeachWriter = getattr(
+            self._spark._sc._jvm, "org.apache.spark.sql.execution.python.PythonForeachWriter"
+        )(wrapped_func, self._df._jdf.schema())
         self._jwrite.foreach(jForeachWriter)
         return self
 
diff --git a/python/pyspark/sql/streaming/state.py b/python/pyspark/sql/streaming/state.py
index 0ea5590ef2e65..cd067a8413e1c 100644
--- a/python/pyspark/sql/streaming/state.py
+++ b/python/pyspark/sql/streaming/state.py
@@ -19,7 +19,6 @@
 from typing import Tuple, Optional
 
 from pyspark.sql.types import Row, StructType, TimestampType
-from pyspark.sql.utils import has_numpy
 from pyspark.errors import PySparkTypeError, PySparkValueError, PySparkRuntimeError
 
 __all__ = ["GroupState", "GroupStateTimeout"]
@@ -132,6 +131,8 @@ def update(self, newValue: Tuple) -> None:
         """
         Update the value of the state. The value of the state cannot be null.
         """
+        from pyspark.testing.utils import have_numpy
+
         if newValue is None:
             raise PySparkTypeError(
                 errorClass="CANNOT_BE_NONE",
@@ -139,7 +140,7 @@ def update(self, newValue: Tuple) -> None:
             )
 
         converted = []
-        if has_numpy:
+        if have_numpy:
             import numpy as np
 
             # In order to convert NumPy types to Python primitive types.
diff --git a/python/pyspark/sql/streaming/stateful_processor.py b/python/pyspark/sql/streaming/stateful_processor.py
index 20078c215bace..b04bb955488ab 100644
--- a/python/pyspark/sql/streaming/stateful_processor.py
+++ b/python/pyspark/sql/streaming/stateful_processor.py
@@ -45,12 +45,9 @@ class ValueState:
     .. versionadded:: 4.0.0
     """
 
-    def __init__(
-        self, value_state_client: ValueStateClient, state_name: str, schema: Union[StructType, str]
-    ) -> None:
+    def __init__(self, value_state_client: ValueStateClient, state_name: str) -> None:
         self._value_state_client = value_state_client
         self._state_name = state_name
-        self.schema = schema
 
     def exists(self) -> bool:
         """
@@ -68,7 +65,7 @@ def update(self, new_value: Tuple) -> None:
         """
         Update the value of the state.
         """
-        self._value_state_client.update(self._state_name, self.schema, new_value)
+        self._value_state_client.update(self._state_name, new_value)
 
     def clear(self) -> None:
         """
@@ -105,21 +102,13 @@ def get_current_watermark_in_ms(self) -> int:
 
 class ExpiredTimerInfo:
     """
-    Class used for arbitrary stateful operations with transformWithState to access expired timer
-    info. When is_valid is false, the expiry timestamp is invalid.
+    Class used to provide access to expired timer's expiry time.
     .. versionadded:: 4.0.0
     """
 
-    def __init__(self, is_valid: bool, expiry_time_in_ms: int = -1) -> None:
-        self._is_valid = is_valid
+    def __init__(self, expiry_time_in_ms: int = -1) -> None:
         self._expiry_time_in_ms = expiry_time_in_ms
 
-    def is_valid(self) -> bool:
-        """
-        Whether the expiry info is valid.
-        """
-        return self._is_valid
-
     def get_expiry_time_in_ms(self) -> int:
         """
         Get the timestamp for expired timer, return timestamp in millisecond.
@@ -135,12 +124,9 @@ class ListState:
     .. versionadded:: 4.0.0
     """
 
-    def __init__(
-        self, list_state_client: ListStateClient, state_name: str, schema: Union[StructType, str]
-    ) -> None:
+    def __init__(self, list_state_client: ListStateClient, state_name: str) -> None:
         self._list_state_client = list_state_client
         self._state_name = state_name
-        self.schema = schema
 
     def exists(self) -> bool:
         """
@@ -158,19 +144,19 @@ def put(self, new_state: List[Tuple]) -> None:
         """
         Update the values of the list state.
         """
-        self._list_state_client.put(self._state_name, self.schema, new_state)
+        self._list_state_client.put(self._state_name, new_state)
 
     def append_value(self, new_state: Tuple) -> None:
         """
         Append a new value to the list state.
         """
-        self._list_state_client.append_value(self._state_name, self.schema, new_state)
+        self._list_state_client.append_value(self._state_name, new_state)
 
     def append_list(self, new_state: List[Tuple]) -> None:
         """
         Append a list of new values to the list state.
         """
-        self._list_state_client.append_list(self._state_name, self.schema, new_state)
+        self._list_state_client.append_list(self._state_name, new_state)
 
     def clear(self) -> None:
         """
@@ -283,7 +269,7 @@ def getValueState(
             If ttl is not specified the state will never expire.
         """
         self.stateful_processor_api_client.get_value_state(state_name, schema, ttl_duration_ms)
-        return ValueState(ValueStateClient(self.stateful_processor_api_client), state_name, schema)
+        return ValueState(ValueStateClient(self.stateful_processor_api_client, schema), state_name)
 
     def getListState(
         self, state_name: str, schema: Union[StructType, str], ttl_duration_ms: Optional[int] = None
@@ -307,7 +293,7 @@ def getListState(
             If ttl is not specified the state will never expire.
         """
         self.stateful_processor_api_client.get_list_state(state_name, schema, ttl_duration_ms)
-        return ListState(ListStateClient(self.stateful_processor_api_client), state_name, schema)
+        return ListState(ListStateClient(self.stateful_processor_api_client, schema), state_name)
 
     def getMapState(
         self,
@@ -398,7 +384,6 @@ def handleInputRows(
         key: Any,
         rows: Iterator["PandasDataFrameLike"],
         timer_values: TimerValues,
-        expired_timer_info: ExpiredTimerInfo,
     ) -> Iterator["PandasDataFrameLike"]:
         """
         Function that will allow users to interact with input data rows along with the grouping key.
@@ -420,11 +405,29 @@ def handleInputRows(
         timer_values: TimerValues
                       Timer value for the current batch that process the input rows.
                       Users can get the processing or event time timestamp from TimerValues.
-        expired_timer_info: ExpiredTimerInfo
-                            Timestamp of expired timers on the grouping key.
         """
         ...
 
+    def handleExpiredTimer(
+        self, key: Any, timer_values: TimerValues, expired_timer_info: ExpiredTimerInfo
+    ) -> Iterator["PandasDataFrameLike"]:
+        """
+        Optional to implement. Will act return an empty iterator if not defined.
+        Function that will be invoked when a timer is fired for a given key. Users can choose to
+        evict state, register new timers and optionally provide output rows.
+
+        Parameters
+        ----------
+        key : Any
+            grouping key.
+        timer_values: TimerValues
+                      Timer value for the current batch that process the input rows.
+                      Users can get the processing or event time timestamp from TimerValues.
+        expired_timer_info: ExpiredTimerInfo
+                            Instance of ExpiredTimerInfo that provides access to expired timer.
+        """
+        return iter([])
+
     @abstractmethod
     def close(self) -> None:
         """
@@ -433,9 +436,21 @@ def close(self) -> None:
         """
         ...
 
-    def handleInitialState(self, key: Any, initialState: "PandasDataFrameLike") -> None:
+    def handleInitialState(
+        self, key: Any, initialState: "PandasDataFrameLike", timer_values: TimerValues
+    ) -> None:
         """
         Optional to implement. Will act as no-op if not defined or no initial state input.
          Function that will be invoked only in the first batch for users to process initial states.
+
+        Parameters
+        ----------
+        key : Any
+            grouping key.
+        initialState: :class:`pandas.DataFrame`
+                      One dataframe in the initial state associated with the key.
+        timer_values: TimerValues
+                      Timer value for the current batch that process the input rows.
+                      Users can get the processing or event time timestamp from TimerValues.
         """
         pass
diff --git a/python/pyspark/sql/streaming/stateful_processor_api_client.py b/python/pyspark/sql/streaming/stateful_processor_api_client.py
index 353f75e267962..6fd56481bc612 100644
--- a/python/pyspark/sql/streaming/stateful_processor_api_client.py
+++ b/python/pyspark/sql/streaming/stateful_processor_api_client.py
@@ -15,20 +15,19 @@
 # limitations under the License.
 #
 from enum import Enum
+import json
 import os
 import socket
-from typing import Any, Dict, List, Union, Optional, cast, Tuple, Iterator
+from typing import Any, Dict, List, Union, Optional, Tuple, Iterator
 
 from pyspark.serializers import write_int, read_int, UTF8Deserializer
 from pyspark.sql.pandas.serializers import ArrowStreamSerializer
 from pyspark.sql.types import (
     StructType,
     TYPE_CHECKING,
-    _parse_datatype_string,
     Row,
 )
 from pyspark.sql.pandas.types import convert_pandas_using_numpy_type
-from pyspark.sql.utils import has_numpy
 from pyspark.serializers import CPickleSerializer
 from pyspark.errors import PySparkRuntimeError
 import uuid
@@ -40,6 +39,7 @@
 
 
 class StatefulProcessorHandleState(Enum):
+    PRE_INIT = 0
     CREATED = 1
     INITIALIZED = 2
     DATA_PROCESSED = 3
@@ -48,25 +48,36 @@ class StatefulProcessorHandleState(Enum):
 
 
 class StatefulProcessorApiClient:
-    def __init__(self, state_server_port: int, key_schema: StructType) -> None:
+    def __init__(
+        self, state_server_port: int, key_schema: StructType, is_driver: bool = False
+    ) -> None:
         self.key_schema = key_schema
         self._client_socket = socket.socket()
         self._client_socket.connect(("localhost", state_server_port))
         self.sockfile = self._client_socket.makefile(
             "rwb", int(os.environ.get("SPARK_BUFFER_SIZE", 65536))
         )
-        self.handle_state = StatefulProcessorHandleState.CREATED
+        if is_driver:
+            self.handle_state = StatefulProcessorHandleState.PRE_INIT
+        else:
+            self.handle_state = StatefulProcessorHandleState.CREATED
         self.utf8_deserializer = UTF8Deserializer()
         self.pickleSer = CPickleSerializer()
         self.serializer = ArrowStreamSerializer()
         # Dictionaries to store the mapping between iterator id and a tuple of pandas DataFrame
         # and the index of the last row that was read.
         self.list_timer_iterator_cursors: Dict[str, Tuple["PandasDataFrameLike", int]] = {}
+        # statefulProcessorApiClient is initialized per batch per partition,
+        # so we will have new timestamps for a new batch
+        self._batch_timestamp = -1
+        self._watermark_timestamp = -1
 
     def set_handle_state(self, state: StatefulProcessorHandleState) -> None:
         import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
 
-        if state == StatefulProcessorHandleState.CREATED:
+        if state == StatefulProcessorHandleState.PRE_INIT:
+            proto_state = stateMessage.PRE_INIT
+        elif state == StatefulProcessorHandleState.CREATED:
             proto_state = stateMessage.CREATED
         elif state == StatefulProcessorHandleState.INITIALIZED:
             proto_state = stateMessage.INITIALIZED
@@ -125,7 +136,7 @@ def get_value_state(
         import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
 
         if isinstance(schema, str):
-            schema = cast(StructType, _parse_datatype_string(schema))
+            schema = self._parse_string_schema(schema)
 
         state_call_command = stateMessage.StateCallCommand()
         state_call_command.stateName = state_name
@@ -148,7 +159,7 @@ def get_list_state(
         import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
 
         if isinstance(schema, str):
-            schema = cast(StructType, _parse_datatype_string(schema))
+            schema = self._parse_string_schema(schema)
 
         state_call_command = stateMessage.StateCallCommand()
         state_call_command.stateName = state_name
@@ -266,47 +277,15 @@ def get_expiry_timers_iterator(
                 # TODO(SPARK-49233): Classify user facing errors.
                 raise PySparkRuntimeError(f"Error getting expiry timers: " f"{response_message[1]}")
 
-    def get_batch_timestamp(self) -> int:
-        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
-
-        get_processing_time_call = stateMessage.GetProcessingTime()
-        timer_value_call = stateMessage.TimerValueRequest(
-            getProcessingTimer=get_processing_time_call
-        )
-        timer_request = stateMessage.TimerRequest(timerValueRequest=timer_value_call)
-        message = stateMessage.StateRequest(timerRequest=timer_request)
-
-        self._send_proto_message(message.SerializeToString())
-        response_message = self._receive_proto_message_with_long_value()
-        status = response_message[0]
-        if status != 0:
-            # TODO(SPARK-49233): Classify user facing errors.
-            raise PySparkRuntimeError(
-                f"Error getting processing timestamp: " f"{response_message[1]}"
-            )
+    def get_timestamps(self, time_mode: str) -> Tuple[int, int]:
+        if time_mode.lower() == "none":
+            return -1, -1
         else:
-            timestamp = response_message[2]
-            return timestamp
-
-    def get_watermark_timestamp(self) -> int:
-        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
-
-        get_watermark_call = stateMessage.GetWatermark()
-        timer_value_call = stateMessage.TimerValueRequest(getWatermark=get_watermark_call)
-        timer_request = stateMessage.TimerRequest(timerValueRequest=timer_value_call)
-        message = stateMessage.StateRequest(timerRequest=timer_request)
-
-        self._send_proto_message(message.SerializeToString())
-        response_message = self._receive_proto_message_with_long_value()
-        status = response_message[0]
-        if status != 0:
-            # TODO(SPARK-49233): Classify user facing errors.
-            raise PySparkRuntimeError(
-                f"Error getting eventtime timestamp: " f"{response_message[1]}"
-            )
-        else:
-            timestamp = response_message[2]
-            return timestamp
+            if self._batch_timestamp == -1:
+                self._batch_timestamp = self._get_batch_timestamp()
+            if self._watermark_timestamp == -1:
+                self._watermark_timestamp = self._get_watermark_timestamp()
+        return self._batch_timestamp, self._watermark_timestamp
 
     def get_map_state(
         self,
@@ -318,9 +297,9 @@ def get_map_state(
         import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
 
         if isinstance(user_key_schema, str):
-            user_key_schema = cast(StructType, _parse_datatype_string(user_key_schema))
+            user_key_schema = self._parse_string_schema(user_key_schema)
         if isinstance(value_schema, str):
-            value_schema = cast(StructType, _parse_datatype_string(value_schema))
+            value_schema = self._parse_string_schema(value_schema)
 
         state_call_command = stateMessage.StateCallCommand()
         state_call_command.stateName = state_name
@@ -353,6 +332,48 @@ def delete_if_exists(self, state_name: str) -> None:
             # TODO(SPARK-49233): Classify user facing errors.
             raise PySparkRuntimeError(f"Error deleting state: " f"{response_message[1]}")
 
+    def _get_batch_timestamp(self) -> int:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        get_processing_time_call = stateMessage.GetProcessingTime()
+        timer_value_call = stateMessage.TimerValueRequest(
+            getProcessingTimer=get_processing_time_call
+        )
+        timer_request = stateMessage.TimerRequest(timerValueRequest=timer_value_call)
+        message = stateMessage.StateRequest(timerRequest=timer_request)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message_with_long_value()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(
+                f"Error getting processing timestamp: " f"{response_message[1]}"
+            )
+        else:
+            timestamp = response_message[2]
+            return timestamp
+
+    def _get_watermark_timestamp(self) -> int:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        get_watermark_call = stateMessage.GetWatermark()
+        timer_value_call = stateMessage.TimerValueRequest(getWatermark=get_watermark_call)
+        timer_request = stateMessage.TimerRequest(timerValueRequest=timer_value_call)
+        message = stateMessage.StateRequest(timerRequest=timer_request)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message_with_long_value()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(
+                f"Error getting eventtime timestamp: " f"{response_message[1]}"
+            )
+        else:
+            timestamp = response_message[2]
+            return timestamp
+
     def _send_proto_message(self, message: bytes) -> None:
         # Writing zero here to indicate message version. This allows us to evolve the message
         # format or even changing the message protocol in the future.
@@ -379,12 +400,24 @@ def _receive_proto_message_with_long_value(self) -> Tuple[int, str, int]:
         message.ParseFromString(bytes)
         return message.statusCode, message.errorMessage, message.value
 
+    def _receive_proto_message_with_string_value(self) -> Tuple[int, str, str]:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        length = read_int(self.sockfile)
+        bytes = self.sockfile.read(length)
+        message = stateMessage.StateResponseWithStringTypeVal()
+        message.ParseFromString(bytes)
+        return message.statusCode, message.errorMessage, message.value
+
     def _receive_str(self) -> str:
         return self.utf8_deserializer.loads(self.sockfile)
 
     def _serialize_to_bytes(self, schema: StructType, data: Tuple) -> bytes:
+        from pyspark.testing.utils import have_numpy
+
         converted = []
-        if has_numpy:
+
+        if have_numpy:
             import numpy as np
 
             # In order to convert NumPy types to Python primitive types.
@@ -422,6 +455,24 @@ def _send_arrow_state(self, schema: StructType, state: List[Tuple]) -> None:
     def _read_arrow_state(self) -> Any:
         return self.serializer.load_stream(self.sockfile)
 
+    # Parse a string schema into a StructType schema. This method will perform an API call to
+    # JVM side to parse the schema string.
+    def _parse_string_schema(self, schema: str) -> StructType:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        parse_string_schema_call = stateMessage.ParseStringSchema(schema=schema)
+        utils_request = stateMessage.UtilsRequest(parseStringSchema=parse_string_schema_call)
+        message = stateMessage.StateRequest(utilsRequest=utils_request)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message_with_string_value()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error parsing string schema: " f"{response_message[1]}")
+        else:
+            return StructType.fromJson(json.loads(response_message[2]))
+
 
 class ListTimerIterator:
     def __init__(self, stateful_processor_api_client: StatefulProcessorApiClient):
diff --git a/python/pyspark/sql/streaming/stateful_processor_util.py b/python/pyspark/sql/streaming/stateful_processor_util.py
new file mode 100644
index 0000000000000..d69c1a943862c
--- /dev/null
+++ b/python/pyspark/sql/streaming/stateful_processor_util.py
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from enum import Enum
+
+# This file places the utilities for transformWithStateInPandas; we have a separate file to avoid
+# putting internal classes to the stateful_processor.py file which contains public APIs.
+
+
+class TransformWithStateInPandasFuncMode(Enum):
+    PROCESS_DATA = 1
+    PROCESS_TIMER = 2
+    COMPLETE = 3
+    PRE_INIT = 4
diff --git a/python/pyspark/sql/streaming/transform_with_state_driver_worker.py b/python/pyspark/sql/streaming/transform_with_state_driver_worker.py
new file mode 100644
index 0000000000000..99d386f07b5b6
--- /dev/null
+++ b/python/pyspark/sql/streaming/transform_with_state_driver_worker.py
@@ -0,0 +1,102 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import json
+from typing import Any, Iterator, TYPE_CHECKING
+
+from pyspark.util import local_connect_and_auth
+from pyspark.serializers import (
+    write_int,
+    read_int,
+    UTF8Deserializer,
+    CPickleSerializer,
+)
+from pyspark import worker
+from pyspark.util import handle_worker_exception
+from typing import IO
+from pyspark.worker_util import check_python_version
+from pyspark.sql.streaming.stateful_processor_api_client import StatefulProcessorApiClient
+from pyspark.sql.streaming.stateful_processor_util import TransformWithStateInPandasFuncMode
+from pyspark.sql.types import StructType
+
+if TYPE_CHECKING:
+    from pyspark.sql.pandas._typing import (
+        DataFrameLike as PandasDataFrameLike,
+    )
+
+pickle_ser = CPickleSerializer()
+utf8_deserializer = UTF8Deserializer()
+
+
+def main(infile: IO, outfile: IO) -> None:
+    check_python_version(infile)
+
+    log_name = "Streaming TransformWithStateInPandas Python worker"
+    print(f"Starting {log_name}.\n")
+
+    def process(
+        processor: StatefulProcessorApiClient,
+        mode: TransformWithStateInPandasFuncMode,
+        key: Any,
+        input: Iterator["PandasDataFrameLike"],
+    ) -> None:
+        print(f"{log_name} Starting execution of UDF: {func}.\n")
+        func(processor, mode, key, input)
+        print(f"{log_name} Completed execution of UDF: {func}.\n")
+
+    try:
+        func, return_type = worker.read_command(pickle_ser, infile)
+        print(
+            f"{log_name} finish init stage of Python runner. Received UDF from JVM: {func}, "
+            f"received return type of UDF: {return_type}.\n"
+        )
+        # send signal for getting args
+        write_int(0, outfile)
+        outfile.flush()
+
+        # This driver runner will only be used on the first batch of a query,
+        # and the following code block should be only run once for each query run
+        state_server_port = read_int(infile)
+        key_schema = StructType.fromJson(json.loads(utf8_deserializer.loads(infile)))
+        print(
+            f"{log_name} received parameters for UDF. State server port: {state_server_port}, "
+            f"key schema: {key_schema}.\n"
+        )
+
+        stateful_processor_api_client = StatefulProcessorApiClient(state_server_port, key_schema)
+        process(
+            stateful_processor_api_client,
+            TransformWithStateInPandasFuncMode.PRE_INIT,
+            None,
+            iter([]),
+        )
+        write_int(0, outfile)
+        outfile.flush()
+    except Exception as e:
+        handle_worker_exception(e, outfile)
+        outfile.flush()
+
+
+if __name__ == "__main__":
+    # Read information about how to connect back to the JVM from the environment.
+    java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"])
+    auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"]
+    (sock_file, sock) = local_connect_and_auth(java_port, auth_secret)
+    write_int(os.getpid(), sock_file)
+    sock_file.flush()
+    main(sock_file, sock_file)
diff --git a/python/pyspark/sql/streaming/value_state_client.py b/python/pyspark/sql/streaming/value_state_client.py
index fd783af7931da..532a89cf92d22 100644
--- a/python/pyspark/sql/streaming/value_state_client.py
+++ b/python/pyspark/sql/streaming/value_state_client.py
@@ -14,18 +14,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from typing import Union, cast, Tuple, Optional
+from typing import Union, Tuple, Optional
 
 from pyspark.sql.streaming.stateful_processor_api_client import StatefulProcessorApiClient
-from pyspark.sql.types import StructType, _parse_datatype_string
+from pyspark.sql.types import StructType
 from pyspark.errors import PySparkRuntimeError
 
 __all__ = ["ValueStateClient"]
 
 
 class ValueStateClient:
-    def __init__(self, stateful_processor_api_client: StatefulProcessorApiClient) -> None:
+    def __init__(
+        self,
+        stateful_processor_api_client: StatefulProcessorApiClient,
+        schema: Union[StructType, str],
+    ) -> None:
         self._stateful_processor_api_client = stateful_processor_api_client
+        if isinstance(schema, str):
+            self.schema = self._stateful_processor_api_client._parse_string_schema(schema)
+        else:
+            self.schema = schema
 
     def exists(self, state_name: str) -> bool:
         import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
@@ -69,12 +77,10 @@ def get(self, state_name: str) -> Optional[Tuple]:
             # TODO(SPARK-49233): Classify user facing errors.
             raise PySparkRuntimeError(f"Error getting value state: " f"{response_message[1]}")
 
-    def update(self, state_name: str, schema: Union[StructType, str], value: Tuple) -> None:
+    def update(self, state_name: str, value: Tuple) -> None:
         import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
 
-        if isinstance(schema, str):
-            schema = cast(StructType, _parse_datatype_string(schema))
-        bytes = self._stateful_processor_api_client._serialize_to_bytes(schema, value)
+        bytes = self._stateful_processor_api_client._serialize_to_bytes(self.schema, value)
         update_call = stateMessage.ValueStateUpdate(value=bytes)
         value_state_call = stateMessage.ValueStateCall(
             stateName=state_name, valueStateUpdate=update_call
diff --git a/python/pyspark/sql/table_arg.py b/python/pyspark/sql/table_arg.py
new file mode 100644
index 0000000000000..cacfd24b2f1ba
--- /dev/null
+++ b/python/pyspark/sql/table_arg.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import TYPE_CHECKING
+
+from pyspark.sql.tvf_argument import TableValuedFunctionArgument
+from pyspark.sql.utils import get_active_spark_context
+
+
+if TYPE_CHECKING:
+    from py4j.java_gateway import JavaObject
+    from pyspark.sql._typing import ColumnOrName
+
+
+class TableArg(TableValuedFunctionArgument):
+    def __init__(self, j_table_arg: "JavaObject"):
+        self._j_table_arg = j_table_arg
+
+    def partitionBy(self, *cols: "ColumnOrName") -> "TableArg":
+        from pyspark.sql.classic.column import _to_java_column, _to_seq
+
+        sc = get_active_spark_context()
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]
+        j_cols = _to_seq(sc, cols, _to_java_column)
+        new_j_table_arg = self._j_table_arg.partitionBy(j_cols)
+        return TableArg(new_j_table_arg)
+
+    def orderBy(self, *cols: "ColumnOrName") -> "TableArg":
+        from pyspark.sql.classic.column import _to_java_column, _to_seq
+
+        sc = get_active_spark_context()
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]
+        j_cols = _to_seq(sc, cols, _to_java_column)
+        new_j_table_arg = self._j_table_arg.orderBy(j_cols)
+        return TableArg(new_j_table_arg)
+
+    def withSinglePartition(self) -> "TableArg":
+        new_j_table_arg = self._j_table_arg.withSinglePartition()
+        return TableArg(new_j_table_arg)
diff --git a/python/pyspark/sql/tests/arrow/__init__.py b/python/pyspark/sql/tests/arrow/__init__.py
new file mode 100644
index 0000000000000..cce3acad34a49
--- /dev/null
+++ b/python/pyspark/sql/tests/arrow/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/arrow/test_arrow.py
similarity index 99%
rename from python/pyspark/sql/tests/test_arrow.py
rename to python/pyspark/sql/tests/arrow/test_arrow.py
index b71bdb1eece28..a2ee113b6386e 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow.py
@@ -1778,7 +1778,7 @@ def conf(cls):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_arrow import *  # noqa: F401
+    from pyspark.sql.tests.arrow.test_arrow import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore
diff --git a/python/pyspark/sql/tests/test_arrow_cogrouped_map.py b/python/pyspark/sql/tests/arrow/test_arrow_cogrouped_map.py
similarity index 96%
rename from python/pyspark/sql/tests/test_arrow_cogrouped_map.py
rename to python/pyspark/sql/tests/arrow/test_arrow_cogrouped_map.py
index a90574b7f1928..80b12d3a7798b 100644
--- a/python/pyspark/sql/tests/test_arrow_cogrouped_map.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow_cogrouped_map.py
@@ -299,6 +299,16 @@ def summarize(left, right):
             "+---------+------------+----------+-------------+\n",
         )
 
+    def test_self_join(self):
+        df = self.spark.createDataFrame([(1, 1)], ("k", "v"))
+
+        def arrow_func(key, left, right):
+            return pa.Table.from_pydict({"x": [2], "y": [2]})
+
+        df2 = df.groupby("k").cogroup(df.groupby("k")).applyInArrow(arrow_func, "x long, y long")
+
+        self.assertEqual(df2.join(df2).count(), 1)
+
 
 class CogroupedMapInArrowTests(CogroupedMapInArrowTestsMixin, ReusedSQLTestCase):
     @classmethod
@@ -324,7 +334,7 @@ def tearDownClass(cls):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_arrow_cogrouped_map import *  # noqa: F401
+    from pyspark.sql.tests.arrow.test_arrow_cogrouped_map import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/sql/tests/test_arrow_grouped_map.py b/python/pyspark/sql/tests/arrow/test_arrow_grouped_map.py
similarity index 96%
rename from python/pyspark/sql/tests/test_arrow_grouped_map.py
rename to python/pyspark/sql/tests/arrow/test_arrow_grouped_map.py
index f9947d0788b87..c9ad602edfd27 100644
--- a/python/pyspark/sql/tests/test_arrow_grouped_map.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow_grouped_map.py
@@ -255,6 +255,16 @@ def foo(_):
                 self.assertEqual(r.a, "hi")
                 self.assertEqual(r.b, 1)
 
+    def test_self_join(self):
+        df = self.spark.createDataFrame([(1, 1)], ("k", "v"))
+
+        def arrow_func(key, table):
+            return pa.Table.from_pydict({"x": [2], "y": [2]})
+
+        df2 = df.groupby("k").applyInArrow(arrow_func, schema="x long, y long")
+
+        self.assertEqual(df2.join(df2).count(), 1)
+
 
 class GroupedMapInArrowTests(GroupedMapInArrowTestsMixin, ReusedSQLTestCase):
     @classmethod
@@ -280,7 +290,7 @@ def tearDownClass(cls):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_arrow_grouped_map import *  # noqa: F401
+    from pyspark.sql.tests.arrow.test_arrow_grouped_map import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/sql/tests/test_arrow_map.py b/python/pyspark/sql/tests/arrow/test_arrow_map.py
similarity index 98%
rename from python/pyspark/sql/tests/test_arrow_map.py
rename to python/pyspark/sql/tests/arrow/test_arrow_map.py
index 2e82869230db4..71bb36a902e3e 100644
--- a/python/pyspark/sql/tests/test_arrow_map.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow_map.py
@@ -195,7 +195,7 @@ def tearDownClass(cls):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_arrow_map import *  # noqa: F401
+    from pyspark.sql.tests.arrow.test_arrow_map import *  # noqa: F401
 
     try:
         import xmlrunner
diff --git a/python/pyspark/sql/tests/test_arrow_python_udf.py b/python/pyspark/sql/tests/arrow/test_arrow_python_udf.py
similarity index 94%
rename from python/pyspark/sql/tests/test_arrow_python_udf.py
rename to python/pyspark/sql/tests/arrow/test_arrow_python_udf.py
index 095414334848b..1f430d1ab00d9 100644
--- a/python/pyspark/sql/tests/test_arrow_python_udf.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow_python_udf.py
@@ -238,8 +238,22 @@ def tearDownClass(cls):
             super(PythonUDFArrowTests, cls).tearDownClass()
 
 
+class AsyncPythonUDFArrowTests(PythonUDFArrowTests):
+    @classmethod
+    def setUpClass(cls):
+        super(AsyncPythonUDFArrowTests, cls).setUpClass()
+        cls.spark.conf.set("spark.sql.execution.pythonUDF.arrow.concurrency.level", "4")
+
+    @classmethod
+    def tearDownClass(cls):
+        try:
+            cls.spark.conf.unset("spark.sql.execution.pythonUDF.arrow.concurrency.level")
+        finally:
+            super(AsyncPythonUDFArrowTests, cls).tearDownClass()
+
+
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_arrow_python_udf import *  # noqa: F401
+    from pyspark.sql.tests.arrow.test_arrow_python_udf import *  # noqa: F401
 
     try:
         import xmlrunner
diff --git a/python/pyspark/sql/tests/connect/arrow/__init__.py b/python/pyspark/sql/tests/connect/arrow/__init__.py
new file mode 100644
index 0000000000000..cce3acad34a49
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/arrow/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow.py b/python/pyspark/sql/tests/connect/arrow/test_parity_arrow.py
similarity index 97%
rename from python/pyspark/sql/tests/connect/test_parity_arrow.py
rename to python/pyspark/sql/tests/connect/arrow/test_parity_arrow.py
index 885b3001b1db1..fa8cf286b9bd6 100644
--- a/python/pyspark/sql/tests/connect/test_parity_arrow.py
+++ b/python/pyspark/sql/tests/connect/arrow/test_parity_arrow.py
@@ -17,7 +17,7 @@
 
 import unittest
 
-from pyspark.sql.tests.test_arrow import ArrowTestsMixin
+from pyspark.sql.tests.arrow.test_arrow import ArrowTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 
@@ -139,7 +139,7 @@ def test_create_dataframe_namedtuples(self):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_arrow import *  # noqa: F401
+    from pyspark.sql.tests.connect.arrow.test_parity_arrow import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow_cogrouped_map.py b/python/pyspark/sql/tests/connect/arrow/test_parity_arrow_cogrouped_map.py
similarity index 86%
rename from python/pyspark/sql/tests/connect/test_parity_arrow_cogrouped_map.py
rename to python/pyspark/sql/tests/connect/arrow/test_parity_arrow_cogrouped_map.py
index 90c5f2c9b0613..c14c69b5ed4ec 100644
--- a/python/pyspark/sql/tests/connect/test_parity_arrow_cogrouped_map.py
+++ b/python/pyspark/sql/tests/connect/arrow/test_parity_arrow_cogrouped_map.py
@@ -17,7 +17,7 @@
 
 import unittest
 
-from pyspark.sql.tests.test_arrow_cogrouped_map import CogroupedMapInArrowTestsMixin
+from pyspark.sql.tests.arrow.test_arrow_cogrouped_map import CogroupedMapInArrowTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
@@ -26,7 +26,7 @@ class CogroupedMapInArrowParityTests(CogroupedMapInArrowTestsMixin, ReusedConnec
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_arrow_cogrouped_map import *  # noqa: F401
+    from pyspark.sql.tests.connect.arrow.test_parity_arrow_cogrouped_map import *  # noqa: F401
 
     try:
         import xmlrunner
diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow_grouped_map.py b/python/pyspark/sql/tests/connect/arrow/test_parity_arrow_grouped_map.py
similarity index 87%
rename from python/pyspark/sql/tests/connect/test_parity_arrow_grouped_map.py
rename to python/pyspark/sql/tests/connect/arrow/test_parity_arrow_grouped_map.py
index 0fb96ba13b838..ca12a8b06fdbf 100644
--- a/python/pyspark/sql/tests/connect/test_parity_arrow_grouped_map.py
+++ b/python/pyspark/sql/tests/connect/arrow/test_parity_arrow_grouped_map.py
@@ -17,7 +17,7 @@
 
 import unittest
 
-from pyspark.sql.tests.test_arrow_grouped_map import GroupedMapInArrowTestsMixin
+from pyspark.sql.tests.arrow.test_arrow_grouped_map import GroupedMapInArrowTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
@@ -26,7 +26,7 @@ class GroupedApplyInArrowParityTests(GroupedMapInArrowTestsMixin, ReusedConnectT
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_arrow_grouped_map import *  # noqa: F401
+    from pyspark.sql.tests.connect.arrow.test_parity_arrow_grouped_map import *  # noqa: F401
 
     try:
         import xmlrunner
diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow_map.py b/python/pyspark/sql/tests/connect/arrow/test_parity_arrow_map.py
similarity index 88%
rename from python/pyspark/sql/tests/connect/test_parity_arrow_map.py
rename to python/pyspark/sql/tests/connect/arrow/test_parity_arrow_map.py
index ed51d0d3d1996..1da356f524f31 100644
--- a/python/pyspark/sql/tests/connect/test_parity_arrow_map.py
+++ b/python/pyspark/sql/tests/connect/arrow/test_parity_arrow_map.py
@@ -17,7 +17,7 @@
 
 import unittest
 
-from pyspark.sql.tests.test_arrow_map import MapInArrowTestsMixin
+from pyspark.sql.tests.arrow.test_arrow_map import MapInArrowTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
@@ -26,7 +26,7 @@ class ArrowMapParityTests(MapInArrowTestsMixin, ReusedConnectTestCase):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_arrow_map import *  # noqa: F401
+    from pyspark.sql.tests.connect.arrow.test_parity_arrow_map import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py b/python/pyspark/sql/tests/connect/arrow/test_parity_arrow_python_udf.py
similarity index 90%
rename from python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py
rename to python/pyspark/sql/tests/connect/arrow/test_parity_arrow_python_udf.py
index 732008eb05a35..fe81513f005f9 100644
--- a/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py
+++ b/python/pyspark/sql/tests/connect/arrow/test_parity_arrow_python_udf.py
@@ -16,7 +16,7 @@
 #
 
 from pyspark.sql.tests.connect.test_parity_udf import UDFParityTests
-from pyspark.sql.tests.test_arrow_python_udf import PythonUDFArrowTestsMixin
+from pyspark.sql.tests.arrow.test_arrow_python_udf import PythonUDFArrowTestsMixin
 
 
 class ArrowPythonUDFParityTests(UDFParityTests, PythonUDFArrowTestsMixin):
@@ -35,7 +35,7 @@ def tearDownClass(cls):
 
 if __name__ == "__main__":
     import unittest
-    from pyspark.sql.tests.connect.test_parity_arrow_python_udf import *  # noqa: F401
+    from pyspark.sql.tests.connect.arrow.test_parity_arrow_python_udf import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/sql/tests/connect/pandas/__init__.py b/python/pyspark/sql/tests/connect/pandas/__init__.py
new file mode 100644
index 0000000000000..cce3acad34a49
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/pandas/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_cogrouped_map.py
similarity index 93%
rename from python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py
rename to python/pyspark/sql/tests/connect/pandas/test_parity_pandas_cogrouped_map.py
index 00d71bda2d938..a71e6369f5e2f 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_cogrouped_map.py
@@ -28,7 +28,7 @@ class CogroupedApplyInPandasTests(
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_pandas_cogrouped_map import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_cogrouped_map import *  # noqa: F401
 
     try:
         import xmlrunner
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_grouped_map.py
similarity index 94%
rename from python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py
rename to python/pyspark/sql/tests/connect/pandas/test_parity_pandas_grouped_map.py
index 8c76313c5c96b..52110718808be 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_grouped_map.py
@@ -28,7 +28,7 @@ def test_supported_types(self):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_pandas_grouped_map import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_grouped_map import *  # noqa: F401
 
     try:
         import xmlrunner
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_grouped_map_with_state.py
similarity index 92%
rename from python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py
rename to python/pyspark/sql/tests/connect/pandas/test_parity_pandas_grouped_map_with_state.py
index 67d42a7c86138..2da8b4aa3be8a 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py
+++ b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_grouped_map_with_state.py
@@ -29,7 +29,7 @@ class GroupedApplyInPandasWithStateTests(
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_pandas_grouped_map_with_state import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_grouped_map_with_state import *  # noqa: F401,E501
 
     try:
         import xmlrunner
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_map.py b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_map.py
similarity index 93%
rename from python/pyspark/sql/tests/connect/test_parity_pandas_map.py
rename to python/pyspark/sql/tests/connect/pandas/test_parity_pandas_map.py
index 999afd24c6528..965ef5dcf8949 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_map.py
+++ b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_map.py
@@ -28,7 +28,7 @@ class MapInPandasParityTests(
 
 if __name__ == "__main__":
     import unittest
-    from pyspark.sql.tests.connect.test_parity_pandas_map import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_map import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_udf.py
similarity index 93%
rename from python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
rename to python/pyspark/sql/tests/connect/pandas/test_parity_pandas_udf.py
index 364e41716474b..aa2b4748ff3f7 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
+++ b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_udf.py
@@ -25,7 +25,7 @@ class PandasUDFParityTests(PandasUDFTestsMixin, ReusedConnectTestCase):
 
 if __name__ == "__main__":
     import unittest
-    from pyspark.sql.tests.connect.test_parity_pandas_udf import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_udf import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_udf_grouped_agg.py
similarity index 93%
rename from python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py
rename to python/pyspark/sql/tests/connect/pandas/test_parity_pandas_udf_grouped_agg.py
index fdb81bffbce12..dfcb2b94c1bcc 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_udf_grouped_agg.py
@@ -28,7 +28,7 @@ class PandasUDFGroupedAggParityTests(
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_pandas_udf_grouped_agg import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_udf_grouped_agg import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_udf_scalar.py
similarity index 93%
rename from python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py
rename to python/pyspark/sql/tests/connect/pandas/test_parity_pandas_udf_scalar.py
index 451f0f68d6ee5..9cab05f569d46 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_udf_scalar.py
@@ -24,7 +24,7 @@ class PandasUDFScalarParityTests(ScalarPandasUDFTestsMixin, ReusedConnectTestCas
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_pandas_udf_scalar import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_udf_scalar import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_udf_window.py
similarity index 93%
rename from python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py
rename to python/pyspark/sql/tests/connect/pandas/test_parity_pandas_udf_window.py
index b2288c9d949e5..08da1b4648b22 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_window.py
+++ b/python/pyspark/sql/tests/connect/pandas/test_parity_pandas_udf_window.py
@@ -28,7 +28,7 @@ class PandasUDFWindowParityTests(
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.connect.test_parity_pandas_udf_window import *  # noqa: F401
+    from pyspark.sql.tests.connect.pandas.test_parity_pandas_udf_window import *  # noqa: F401
 
     try:
         import xmlrunner  # type: ignore[import]
diff --git a/python/pyspark/sql/tests/connect/test_connect_dataframe_property.py b/python/pyspark/sql/tests/connect/test_connect_dataframe_property.py
index 1a8c7190e31a6..c4c10c963a48b 100644
--- a/python/pyspark/sql/tests/connect/test_connect_dataframe_property.py
+++ b/python/pyspark/sql/tests/connect/test_connect_dataframe_property.py
@@ -110,6 +110,12 @@ def func(iterator):
             cdf1 = cdf.mapInPandas(func, schema)
             self.assertEqual(cdf1._cached_schema, schema)
 
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": "1"}):
+            self.assertTrue(is_remote())
+            cdf1 = cdf.mapInPandas(func, "a int, b string")
+            # Properly cache the parsed schema
+            self.assertEqual(cdf1._cached_schema, schema)
+
         with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": None}):
             # 'mapInPandas' depends on the method 'pandas_udf', which is dispatched
             # based on 'is_remote'. However, in SparkConnectSQLTestCase, the remote
@@ -180,6 +186,12 @@ def normalize(pdf):
             cdf1 = cdf.groupby("id").applyInPandas(normalize, schema)
             self.assertEqual(cdf1._cached_schema, schema)
 
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": "1"}):
+            self.assertTrue(is_remote())
+            cdf1 = cdf.groupby("id").applyInPandas(normalize, "id long, v double")
+            # Properly cache the parsed schema
+            self.assertEqual(cdf1._cached_schema, schema)
+
         with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": None}):
             self.assertFalse(is_remote())
             sdf1 = sdf.groupby("id").applyInPandas(normalize, schema)
diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py b/python/pyspark/sql/tests/connect/test_connect_function.py
index e29873173cc3a..d1e2558305291 100644
--- a/python/pyspark/sql/tests/connect/test_connect_function.py
+++ b/python/pyspark/sql/tests/connect/test_connect_function.py
@@ -54,7 +54,7 @@ def setUpClass(cls):
         # Disable the shared namespace so pyspark.sql.functions, etc point the regular
         # PySpark libraries.
         os.environ["PYSPARK_NO_NAMESPACE_SHARE"] = "1"
-        cls.connect = cls.spark  # Switch Spark Connect session and regular PySpark sesion.
+        cls.connect = cls.spark  # Switch Spark Connect session and regular PySpark session.
         cls.spark = PySparkSession._instantiatedSession
         assert cls.spark is not None
 
@@ -590,6 +590,10 @@ def test_aggregation_functions(self):
             (CF.avg, SF.avg),
             (CF.collect_list, SF.collect_list),
             (CF.collect_set, SF.collect_set),
+            (CF.listagg, SF.listagg),
+            (CF.listagg_distinct, SF.listagg_distinct),
+            (CF.string_agg, SF.string_agg),
+            (CF.string_agg_distinct, SF.string_agg_distinct),
             (CF.count, SF.count),
             (CF.first, SF.first),
             (CF.kurtosis, SF.kurtosis),
diff --git a/python/pyspark/sql/tests/connect/test_connect_readwriter.py b/python/pyspark/sql/tests/connect/test_connect_readwriter.py
index db1e94cb6863e..06266b86de3ff 100644
--- a/python/pyspark/sql/tests/connect/test_connect_readwriter.py
+++ b/python/pyspark/sql/tests/connect/test_connect_readwriter.py
@@ -146,6 +146,16 @@ def test_parquet(self):
                 self.connect.read.parquet(d).toPandas(), self.spark.read.parquet(d).toPandas()
             )
 
+    def test_parquet_compression_option(self):
+        # SPARK-50537: Fix compression option being overwritten in df.write.parquet
+        with tempfile.TemporaryDirectory(prefix="test_parquet") as d:
+            self.connect.range(10).write.mode("overwrite").option("compression", "gzip").parquet(d)
+            self.assertTrue(any(file.endswith(".gz.parquet") for file in os.listdir(d)))
+            # Read the Parquet file as a DataFrame.
+            self.assert_eq(
+                self.connect.read.parquet(d).toPandas(), self.spark.read.parquet(d).toPandas()
+            )
+
     def test_text(self):
         # SPARK-41849: Implement DataFrameReader.text
         with tempfile.TemporaryDirectory(prefix="test_text") as d:
diff --git a/python/pyspark/sql/tests/connect/test_parity_job_cancellation.py b/python/pyspark/sql/tests/connect/test_parity_job_cancellation.py
new file mode 100644
index 0000000000000..ddb4554afa55a
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_job_cancellation.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import threading
+
+from pyspark import inheritable_thread_target
+from pyspark.sql.tests.test_job_cancellation import JobCancellationTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class JobCancellationParityTests(JobCancellationTestsMixin, ReusedConnectTestCase):
+    def test_inheritable_tags_with_deco(self):
+        @inheritable_thread_target(self.spark)
+        def func(target):
+            return target()
+
+        self.check_inheritable_tags(
+            create_thread=lambda target, session: threading.Thread(target=func, args=(target,))
+        )
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_job_cancellation import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_subquery.py b/python/pyspark/sql/tests/connect/test_parity_subquery.py
index 1cba3a7d49956..f3225fcb7f2dd 100644
--- a/python/pyspark/sql/tests/connect/test_parity_subquery.py
+++ b/python/pyspark/sql/tests/connect/test_parity_subquery.py
@@ -17,13 +17,33 @@
 
 import unittest
 
+from pyspark.sql import functions as sf
 from pyspark.sql.tests.test_subquery import SubqueryTestsMixin
+from pyspark.testing import assertDataFrameEqual
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
-@unittest.skip("TODO(SPARK-50134): Support subquery in connect")
 class SubqueryParityTests(SubqueryTestsMixin, ReusedConnectTestCase):
-    pass
+    def test_scalar_subquery_with_missing_outer_reference(self):
+        with self.tempView("l", "r"):
+            self.df1.createOrReplaceTempView("l")
+            self.df2.createOrReplaceTempView("r")
+
+            assertDataFrameEqual(
+                self.spark.table("l").select(
+                    "a",
+                    (
+                        self.spark.table("r")
+                        .where(sf.col("c") == sf.col("a"))
+                        .select(sf.sum("d"))
+                        .scalar()
+                    ),
+                ),
+                self.spark.sql("""SELECT a, (SELECT sum(d) FROM r WHERE c = a) FROM l"""),
+            )
+
+    def test_subquery_in_unpivot(self):
+        self.check_subquery_in_unpivot(None, None)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py b/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
index 274364b181441..5c46130c5b50d 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
@@ -21,9 +21,9 @@
 from pyspark.sql.tests.test_udf_profiler import (
     UDFProfiler2TestsMixin,
     _do_computation,
-    has_flameprof,
 )
 from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.utils import have_flameprof
 
 
 class UDFProfilerParityTests(UDFProfiler2TestsMixin, ReusedConnectTestCase):
@@ -65,7 +65,7 @@ def action(df):
                 io.getvalue(), f"10.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
 
diff --git a/python/pyspark/sql/tests/connect/test_parity_udtf.py b/python/pyspark/sql/tests/connect/test_parity_udtf.py
index 6955e7377b4c4..6f4e4133335eb 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udtf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udtf.py
@@ -76,6 +76,14 @@ def test_udtf_with_analyze_using_file(self):
     def test_udtf_access_spark_session(self):
         super().test_udtf_access_spark_session()
 
+    @unittest.skip("TODO(SPARK-50393): support df.asTable() in Spark Connect")
+    def test_df_asTable(self):
+        super().test_df_asTable()
+
+    @unittest.skip("TODO(SPARK-50393): support df.asTable() in Spark Connect")
+    def test_df_asTable_chaining_methods(self):
+        super().test_df_asTable_chaining_methods()
+
     def _add_pyfile(self, path):
         self.spark.addArtifacts(path, pyfile=True)
 
diff --git a/python/pyspark/sql/tests/connect/test_session.py b/python/pyspark/sql/tests/connect/test_session.py
index 6f0e4aaad3f89..e327c868895f4 100644
--- a/python/pyspark/sql/tests/connect/test_session.py
+++ b/python/pyspark/sql/tests/connect/test_session.py
@@ -14,18 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import threading
-import time
+
 import unittest
 from typing import Optional
 
-from pyspark import InheritableThread, inheritable_thread_target
 from pyspark.sql.connect.client import DefaultChannelBuilder
 from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
-from pyspark.testing.connectutils import should_test_connect
-
-if should_test_connect:
-    from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
 class CustomChannelBuilder(DefaultChannelBuilder):
@@ -104,178 +98,3 @@ def test_default_session_expires_when_client_closes(self):
         s3 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
 
         self.assertIsNot(s1, s3)
-
-
-class JobCancellationTests(ReusedConnectTestCase):
-    def test_tags(self):
-        self.spark.clearTags()
-        self.spark.addTag("a")
-        self.assertEqual(self.spark.getTags(), {"a"})
-        self.spark.addTag("b")
-        self.spark.removeTag("a")
-        self.assertEqual(self.spark.getTags(), {"b"})
-        self.spark.addTag("c")
-        self.spark.clearTags()
-        self.assertEqual(self.spark.getTags(), set())
-        self.spark.clearTags()
-
-    def test_tags_multithread(self):
-        output1 = None
-        output2 = None
-
-        def tag1():
-            nonlocal output1
-
-            self.spark.addTag("tag1")
-            output1 = self.spark.getTags()
-
-        def tag2():
-            nonlocal output2
-
-            self.spark.addTag("tag2")
-            output2 = self.spark.getTags()
-
-        t1 = threading.Thread(target=tag1)
-        t1.start()
-        t1.join()
-        t2 = threading.Thread(target=tag2)
-        t2.start()
-        t2.join()
-
-        self.assertIsNotNone(output1)
-        self.assertEquals(output1, {"tag1"})
-        self.assertIsNotNone(output2)
-        self.assertEquals(output2, {"tag2"})
-
-    def test_interrupt_tag(self):
-        thread_ids = range(4)
-        self.check_job_cancellation(
-            lambda job_group: self.spark.addTag(job_group),
-            lambda job_group: self.spark.interruptTag(job_group),
-            thread_ids,
-            [i for i in thread_ids if i % 2 == 0],
-            [i for i in thread_ids if i % 2 != 0],
-        )
-        self.spark.clearTags()
-
-    def test_interrupt_all(self):
-        thread_ids = range(4)
-        self.check_job_cancellation(
-            lambda job_group: None,
-            lambda job_group: self.spark.interruptAll(),
-            thread_ids,
-            thread_ids,
-            [],
-        )
-        self.spark.clearTags()
-
-    def check_job_cancellation(
-        self, setter, canceller, thread_ids, thread_ids_to_cancel, thread_ids_to_run
-    ):
-        job_id_a = "job_ids_to_cancel"
-        job_id_b = "job_ids_to_run"
-        threads = []
-
-        # A list which records whether job is cancelled.
-        # The index of the array is the thread index which job run in.
-        is_job_cancelled = [False for _ in thread_ids]
-
-        def run_job(job_id, index):
-            """
-            Executes a job with the group ``job_group``. Each job waits for 3 seconds
-            and then exits.
-            """
-            try:
-                setter(job_id)
-
-                def func(itr):
-                    for pdf in itr:
-                        time.sleep(pdf._1.iloc[0])
-                        yield pdf
-
-                self.spark.createDataFrame([[20]]).repartition(1).mapInPandas(
-                    func, schema="_1 LONG"
-                ).collect()
-                is_job_cancelled[index] = False
-            except Exception:
-                # Assume that exception means job cancellation.
-                is_job_cancelled[index] = True
-
-        # Test if job succeeded when not cancelled.
-        run_job(job_id_a, 0)
-        self.assertFalse(is_job_cancelled[0])
-        self.spark.clearTags()
-
-        # Run jobs
-        for i in thread_ids_to_cancel:
-            t = threading.Thread(target=run_job, args=(job_id_a, i))
-            t.start()
-            threads.append(t)
-
-        for i in thread_ids_to_run:
-            t = threading.Thread(target=run_job, args=(job_id_b, i))
-            t.start()
-            threads.append(t)
-
-        # Wait to make sure all jobs are executed.
-        time.sleep(10)
-        # And then, cancel one job group.
-        canceller(job_id_a)
-
-        # Wait until all threads launching jobs are finished.
-        for t in threads:
-            t.join()
-
-        for i in thread_ids_to_cancel:
-            self.assertTrue(
-                is_job_cancelled[i], "Thread {i}: Job in group A was not cancelled.".format(i=i)
-            )
-
-        for i in thread_ids_to_run:
-            self.assertFalse(
-                is_job_cancelled[i], "Thread {i}: Job in group B did not succeeded.".format(i=i)
-            )
-
-    def test_inheritable_tags(self):
-        self.check_inheritable_tags(
-            create_thread=lambda target, session: InheritableThread(target, session=session)
-        )
-        self.check_inheritable_tags(
-            create_thread=lambda target, session: threading.Thread(
-                target=inheritable_thread_target(session)(target)
-            )
-        )
-
-        # Test decorator usage
-        @inheritable_thread_target(self.spark)
-        def func(target):
-            return target()
-
-        self.check_inheritable_tags(
-            create_thread=lambda target, session: threading.Thread(target=func, args=(target,))
-        )
-
-    def check_inheritable_tags(self, create_thread):
-        spark = self.spark
-        spark.addTag("a")
-        first = set()
-        second = set()
-
-        def get_inner_local_prop():
-            spark.addTag("c")
-            second.update(spark.getTags())
-
-        def get_outer_local_prop():
-            spark.addTag("b")
-            first.update(spark.getTags())
-            t2 = create_thread(target=get_inner_local_prop, session=spark)
-            t2.start()
-            t2.join()
-
-        t1 = create_thread(target=get_outer_local_prop, session=spark)
-        t1.start()
-        t1.join()
-
-        self.assertEqual(spark.getTags(), {"a"})
-        self.assertEqual(first, {"a", "b"})
-        self.assertEqual(second, {"a", "b", "c"})
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
index f85a7b03eddab..1f9532352679a 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
@@ -154,7 +154,7 @@ def merge_pandas(lft, _):
         ):
             (left.groupby("id", "k").cogroup(right.groupby("id"))).applyInPandas(
                 merge_pandas, "id long, k int, v int"
-            ).schema
+            ).count()
 
     def test_apply_in_pandas_not_returning_pandas_dataframe(self):
         with self.quiet():
diff --git a/python/pyspark/sql/tests/test_pandas_sqlmetrics.py b/python/pyspark/sql/tests/pandas/test_pandas_sqlmetrics.py
similarity index 96%
rename from python/pyspark/sql/tests/test_pandas_sqlmetrics.py
rename to python/pyspark/sql/tests/pandas/test_pandas_sqlmetrics.py
index 22a0e92e818db..cb2f2ff285684 100644
--- a/python/pyspark/sql/tests/test_pandas_sqlmetrics.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_sqlmetrics.py
@@ -57,7 +57,7 @@ def test_pandas(col1):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_sqlmetrics import *  # noqa: F401
+    from pyspark.sql.tests.pandas.test_pandas_sqlmetrics import *  # noqa: F401
 
     try:
         import xmlrunner
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py b/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py
index 8901f09e9272d..516a95a91a5e7 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 
+import json
 import os
 import time
 import tempfile
@@ -26,14 +27,8 @@
 
 from pyspark import SparkConf
 from pyspark.errors import PySparkRuntimeError
-from pyspark.sql.functions import split
-from pyspark.sql.types import (
-    StringType,
-    StructType,
-    StructField,
-    Row,
-    IntegerType,
-)
+from pyspark.sql.functions import array_sort, col, explode, split
+from pyspark.sql.types import StringType, StructType, StructField, Row, IntegerType, TimestampType
 from pyspark.testing import assertDataFrameEqual
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
@@ -61,6 +56,7 @@ def conf(cls):
             "org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider",
         )
         cfg.set("spark.sql.execution.arrow.transformWithStateInPandas.maxRecordsPerBatch", "2")
+        cfg.set("spark.sql.session.timeZone", "UTC")
         return cfg
 
     def _prepare_input_data(self, input_path, col1, col2):
@@ -104,9 +100,17 @@ def build_test_df_with_3_cols(self, input_path):
         return df_final
 
     def _test_transform_with_state_in_pandas_basic(
-        self, stateful_processor, check_results, single_batch=False, timeMode="None"
+        self,
+        stateful_processor,
+        check_results,
+        single_batch=False,
+        timeMode="None",
+        checkpoint_path=None,
+        initial_state=None,
     ):
         input_path = tempfile.mkdtemp()
+        if checkpoint_path is None:
+            checkpoint_path = tempfile.mkdtemp()
         self._prepare_test_resource1(input_path)
         if not single_batch:
             time.sleep(2)
@@ -132,8 +136,10 @@ def _test_transform_with_state_in_pandas_basic(
                 outputStructType=output_schema,
                 outputMode="Update",
                 timeMode=timeMode,
+                initialState=initial_state,
             )
             .writeStream.queryName("this_query")
+            .option("checkpointLocation", checkpoint_path)
             .foreachBatch(check_results)
             .outputMode("update")
             .start()
@@ -247,11 +253,15 @@ def check_results(batch_df, _):
 
     # test list state with ttl has the same behavior as list state when state doesn't expire.
     def test_transform_with_state_in_pandas_list_state_large_ttl(self):
-        def check_results(batch_df, _):
-            assert set(batch_df.sort("id").collect()) == {
-                Row(id="0", countAsString="2"),
-                Row(id="1", countAsString="2"),
-            }
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", countAsString="2"),
+                    Row(id="1", countAsString="2"),
+                }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
 
         self._test_transform_with_state_in_pandas_basic(
             ListStateLargeTTLProcessor(), check_results, True, "processingTime"
@@ -268,11 +278,15 @@ def check_results(batch_df, _):
 
     # test map state with ttl has the same behavior as map state when state doesn't expire.
     def test_transform_with_state_in_pandas_map_state_large_ttl(self):
-        def check_results(batch_df, _):
-            assert set(batch_df.sort("id").collect()) == {
-                Row(id="0", countAsString="2"),
-                Row(id="1", countAsString="2"),
-            }
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", countAsString="2"),
+                    Row(id="1", countAsString="2"),
+                }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
 
         self._test_transform_with_state_in_pandas_basic(
             MapStateLargeTTLProcessor(), check_results, True, "processingTime"
@@ -287,16 +301,22 @@ def check_results(batch_df, batch_id):
                     Row(id="0", countAsString="2"),
                     Row(id="1", countAsString="2"),
                 }
-            else:
+            elif batch_id == 1:
                 assert set(batch_df.sort("id").collect()) == {
                     Row(id="0", countAsString="3"),
                     Row(id="1", countAsString="2"),
                 }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
 
         self._test_transform_with_state_in_pandas_basic(
             SimpleTTLStatefulProcessor(), check_results, False, "processingTime"
         )
 
+    @unittest.skipIf(
+        "COVERAGE_PROCESS_START" in os.environ, "Flaky with coverage enabled, skipping for now."
+    )
     def test_value_state_ttl_expiration(self):
         def check_results(batch_df, batch_id):
             if batch_id == 0:
@@ -348,8 +368,11 @@ def check_results(batch_df, batch_id):
                         Row(id="ttl-map-state-count-1", count=3),
                     ],
                 )
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
             if batch_id == 0 or batch_id == 1:
-                time.sleep(6)
+                time.sleep(4)
 
         input_dir = tempfile.TemporaryDirectory()
         input_path = input_dir.name
@@ -466,7 +489,7 @@ def check_results(batch_df, batch_id):
                 ).first()["timeValues"]
                 check_timestamp(batch_df)
 
-            else:
+            elif batch_id == 2:
                 assert set(batch_df.sort("id").select("id", "countAsString").collect()) == {
                     Row(id="0", countAsString="3"),
                     Row(id="0", countAsString="-1"),
@@ -480,6 +503,10 @@ def check_results(batch_df, batch_id):
                 ).first()["timeValues"]
                 assert current_batch_expired_timestamp > self.first_expired_timestamp
 
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
+
         self._test_transform_with_state_in_pandas_proc_timer(
             ProcTimeStatefulProcessor(), check_results
         )
@@ -546,31 +573,56 @@ def prepare_batch3(input_path):
     def test_transform_with_state_in_pandas_event_time(self):
         def check_results(batch_df, batch_id):
             if batch_id == 0:
-                assert set(batch_df.sort("id").collect()) == {Row(id="a", timestamp="20")}
-            elif batch_id == 1:
+                # watermark for late event = 0
+                # watermark for eviction = 0
+                # timer is registered with expiration time = 0, hence expired at the same batch
                 assert set(batch_df.sort("id").collect()) == {
                     Row(id="a", timestamp="20"),
                     Row(id="a-expired", timestamp="0"),
                 }
+            elif batch_id == 1:
+                # watermark for late event = 0
+                # watermark for eviction = 10 (20 - 10)
+                # timer is registered with expiration time = 10, hence expired at the same batch
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="a", timestamp="4"),
+                    Row(id="a-expired", timestamp="10000"),
+                }
+            elif batch_id == 2:
+                # watermark for late event = 10
+                # watermark for eviction = 10 (unchanged as 4 < 10)
+                # timer is registered with expiration time = 10, hence expired at the same batch
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="a", timestamp="15"),
+                    Row(id="a-expired", timestamp="10000"),
+                }
             else:
-                # watermark has not progressed, so timer registered in batch 1(watermark = 10)
-                # has not yet expired
-                assert set(batch_df.sort("id").collect()) == {Row(id="a", timestamp="15")}
+                for q in self.spark.streams.active:
+                    q.stop()
 
         self._test_transform_with_state_in_pandas_event_time(
             EventTimeStatefulProcessor(), check_results
         )
 
-    def _test_transform_with_state_init_state_in_pandas(self, stateful_processor, check_results):
+    def _test_transform_with_state_init_state_in_pandas(
+        self,
+        stateful_processor,
+        check_results,
+        time_mode="None",
+        checkpoint_path=None,
+        initial_state=None,
+    ):
         input_path = tempfile.mkdtemp()
+        if checkpoint_path is None:
+            checkpoint_path = tempfile.mkdtemp()
         self._prepare_test_resource1(input_path)
         time.sleep(2)
         self._prepare_input_data(input_path + "/text-test2.txt", [0, 3], [67, 12])
 
-        df = self._build_test_df(input_path)
-
         for q in self.spark.streams.active:
             q.stop()
+
+        df = self._build_test_df(input_path)
         self.assertTrue(df.isStreaming)
 
         output_schema = StructType(
@@ -580,8 +632,9 @@ def _test_transform_with_state_init_state_in_pandas(self, stateful_processor, ch
             ]
         )
 
-        data = [("0", 789), ("3", 987)]
-        initial_state = self.spark.createDataFrame(data, "id string, initVal int").groupBy("id")
+        if initial_state is None:
+            data = [("0", 789), ("3", 987)]
+            initial_state = self.spark.createDataFrame(data, "id string, initVal int").groupBy("id")
 
         q = (
             df.groupBy("id")
@@ -589,10 +642,11 @@ def _test_transform_with_state_init_state_in_pandas(self, stateful_processor, ch
                 statefulProcessor=stateful_processor,
                 outputStructType=output_schema,
                 outputMode="Update",
-                timeMode="None",
+                timeMode=time_mode,
                 initialState=initial_state,
             )
             .writeStream.queryName("this_query")
+            .option("checkpointLocation", checkpoint_path)
             .foreachBatch(check_results)
             .outputMode("update")
             .start()
@@ -677,6 +731,9 @@ def check_results(batch_df, batch_id):
                     Row(id1="0", id2="1", value=str(123 + 46)),
                     Row(id1="1", id2="2", value=str(146 + 346)),
                 }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
 
         self._test_transform_with_state_non_contiguous_grouping_cols(
             SimpleStatefulProcessorWithInitialState(), check_results
@@ -690,6 +747,9 @@ def check_results(batch_df, batch_id):
                     Row(id1="0", id2="1", value=str(789 + 123 + 46)),
                     Row(id1="1", id2="2", value=str(146 + 346)),
                 }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
 
         # grouping key of initial state is also not starting from the beginning of attributes
         data = [(789, "0", "1"), (987, "3", "2")]
@@ -701,6 +761,539 @@ def check_results(batch_df, batch_id):
             SimpleStatefulProcessorWithInitialState(), check_results, initial_state
         )
 
+    def _test_transform_with_state_in_pandas_chaining_ops(
+        self, stateful_processor, check_results, timeMode="None", grouping_cols=["outputTimestamp"]
+    ):
+        import pyspark.sql.functions as f
+
+        input_path = tempfile.mkdtemp()
+        self._prepare_input_data(input_path + "/text-test3.txt", ["a", "b"], [10, 15])
+        time.sleep(2)
+        self._prepare_input_data(input_path + "/text-test4.txt", ["a", "c"], [11, 25])
+        time.sleep(2)
+        self._prepare_input_data(input_path + "/text-test1.txt", ["a"], [5])
+
+        df = self._build_test_df(input_path)
+        df = df.select(
+            "id", f.from_unixtime(f.col("temperature")).alias("eventTime").cast("timestamp")
+        ).withWatermark("eventTime", "5 seconds")
+
+        for q in self.spark.streams.active:
+            q.stop()
+        self.assertTrue(df.isStreaming)
+
+        output_schema = StructType(
+            [
+                StructField("id", StringType(), True),
+                StructField("outputTimestamp", TimestampType(), True),
+            ]
+        )
+
+        q = (
+            df.groupBy("id")
+            .transformWithStateInPandas(
+                statefulProcessor=stateful_processor,
+                outputStructType=output_schema,
+                outputMode="Append",
+                timeMode=timeMode,
+                eventTimeColumnName="outputTimestamp",
+            )
+            .groupBy(grouping_cols)
+            .count()
+            .writeStream.queryName("chaining_ops_query")
+            .foreachBatch(check_results)
+            .outputMode("append")
+            .start()
+        )
+
+        self.assertEqual(q.name, "chaining_ops_query")
+        self.assertTrue(q.isActive)
+        q.processAllAvailable()
+        q.awaitTermination(10)
+
+    def test_transform_with_state_in_pandas_chaining_ops(self):
+        def check_results(batch_df, batch_id):
+            import datetime
+
+            if batch_id == 0:
+                assert batch_df.isEmpty()
+            elif batch_id == 1:
+                # eviction watermark = 15 - 5 = 10 (max event time from batch 0),
+                # late event watermark = 0 (eviction event time from batch 0)
+                assert set(
+                    batch_df.sort("outputTimestamp").select("outputTimestamp", "count").collect()
+                ) == {
+                    Row(outputTimestamp=datetime.datetime(1970, 1, 1, 0, 0, 10), count=1),
+                }
+            elif batch_id == 2:
+                # eviction watermark = 25 - 5 = 20, late event watermark = 10;
+                # row with watermark=5<10 is dropped so it does not show up in the results;
+                # row with eventTime<=20 are finalized and emitted
+                assert set(
+                    batch_df.sort("outputTimestamp").select("outputTimestamp", "count").collect()
+                ) == {
+                    Row(outputTimestamp=datetime.datetime(1970, 1, 1, 0, 0, 11), count=1),
+                    Row(outputTimestamp=datetime.datetime(1970, 1, 1, 0, 0, 15), count=1),
+                }
+
+        self._test_transform_with_state_in_pandas_chaining_ops(
+            StatefulProcessorChainingOps(), check_results, "eventTime"
+        )
+        self._test_transform_with_state_in_pandas_chaining_ops(
+            StatefulProcessorChainingOps(), check_results, "eventTime", ["outputTimestamp", "id"]
+        )
+
+    def test_transform_with_state_init_state_with_timers(self):
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                # timers are registered and handled in the first batch for
+                # rows in initial state; For key=0 and key=3 which contains
+                # expired timers, both should be handled by handleExpiredTimers
+                # regardless of whether key exists in the data rows or not
+                expired_df = batch_df.filter(batch_df["id"].contains("expired"))
+                data_df = batch_df.filter(~batch_df["id"].contains("expired"))
+                assert set(expired_df.sort("id").select("id").collect()) == {
+                    Row(id="0-expired"),
+                    Row(id="3-expired"),
+                }
+                assert set(data_df.sort("id").collect()) == {
+                    Row(id="0", value=str(789 + 123 + 46)),
+                    Row(id="1", value=str(146 + 346)),
+                }
+            elif batch_id == 1:
+                # handleInitialState is only processed in the first batch,
+                # no more timer is registered so no more expired timers
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", value=str(789 + 123 + 46 + 67)),
+                    Row(id="3", value=str(987 + 12)),
+                }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
+
+        self._test_transform_with_state_init_state_in_pandas(
+            StatefulProcessorWithInitialStateTimers(), check_results, "processingTime"
+        )
+
+    def test_transform_with_state_in_pandas_batch_query(self):
+        data = [("0", 123), ("0", 46), ("1", 146), ("1", 346)]
+        df = self.spark.createDataFrame(data, "id string, temperature int")
+
+        output_schema = StructType(
+            [
+                StructField("id", StringType(), True),
+                StructField("countAsString", StringType(), True),
+            ]
+        )
+        batch_result = df.groupBy("id").transformWithStateInPandas(
+            statefulProcessor=MapStateProcessor(),
+            outputStructType=output_schema,
+            outputMode="Update",
+            timeMode="None",
+        )
+        assert set(batch_result.sort("id").collect()) == {
+            Row(id="0", countAsString="2"),
+            Row(id="1", countAsString="2"),
+        }
+
+    def test_transform_with_state_in_pandas_batch_query_initial_state(self):
+        data = [("0", 123), ("0", 46), ("1", 146), ("1", 346)]
+        df = self.spark.createDataFrame(data, "id string, temperature int")
+
+        init_data = [("0", 789), ("3", 987)]
+        initial_state = self.spark.createDataFrame(init_data, "id string, initVal int").groupBy(
+            "id"
+        )
+
+        output_schema = StructType(
+            [
+                StructField("id", StringType(), True),
+                StructField("value", StringType(), True),
+            ]
+        )
+        batch_result = df.groupBy("id").transformWithStateInPandas(
+            statefulProcessor=SimpleStatefulProcessorWithInitialState(),
+            outputStructType=output_schema,
+            outputMode="Update",
+            timeMode="None",
+            initialState=initial_state,
+        )
+        assert set(batch_result.sort("id").collect()) == {
+            Row(id="0", value=str(789 + 123 + 46)),
+            Row(id="1", value=str(146 + 346)),
+        }
+
+    # This test covers mapState with TTL, an empty state variable
+    # and additional test against initial state python runner
+    @unittest.skipIf(
+        "COVERAGE_PROCESS_START" in os.environ, "Flaky with coverage enabled, skipping for now."
+    )
+    def test_transform_with_map_state_metadata(self):
+        checkpoint_path = tempfile.mktemp()
+
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", countAsString="2"),
+                    Row(id="1", countAsString="2"),
+                }
+            else:
+                # check for state metadata source
+                metadata_df = self.spark.read.format("state-metadata").load(checkpoint_path)
+                assert set(
+                    metadata_df.select(
+                        "operatorId",
+                        "operatorName",
+                        "stateStoreName",
+                        "numPartitions",
+                        "minBatchId",
+                        "maxBatchId",
+                    ).collect()
+                ) == {
+                    Row(
+                        operatorId=0,
+                        operatorName="transformWithStateInPandasExec",
+                        stateStoreName="default",
+                        numPartitions=5,
+                        minBatchId=0,
+                        maxBatchId=0,
+                    )
+                }
+                operator_properties_json_obj = json.loads(
+                    metadata_df.select("operatorProperties").collect()[0][0]
+                )
+                assert operator_properties_json_obj["timeMode"] == "ProcessingTime"
+                assert operator_properties_json_obj["outputMode"] == "Update"
+
+                state_var_list = operator_properties_json_obj["stateVariables"]
+                assert len(state_var_list) == 3
+                for state_var in state_var_list:
+                    if state_var["stateName"] == "mapState":
+                        assert state_var["stateVariableType"] == "MapState"
+                        assert state_var["ttlEnabled"]
+                    elif state_var["stateName"] == "listState":
+                        assert state_var["stateVariableType"] == "ListState"
+                        assert not state_var["ttlEnabled"]
+                    else:
+                        assert state_var["stateName"] == "$procTimers_keyToTimestamp"
+                        assert state_var["stateVariableType"] == "TimerState"
+
+                # check for state data source
+                map_state_df = (
+                    self.spark.read.format("statestore")
+                    .option("path", checkpoint_path)
+                    .option("stateVarName", "mapState")
+                    .load()
+                )
+                assert map_state_df.selectExpr(
+                    "key.id AS groupingKey",
+                    "user_map_key.name AS mapKey",
+                    "user_map_value.value.count AS mapValue",
+                ).sort("groupingKey").collect() == [
+                    Row(groupingKey="0", mapKey="key2", mapValue=2),
+                    Row(groupingKey="1", mapKey="key2", mapValue=2),
+                ]
+
+                # check for map state with flatten option
+                map_state_df_non_flatten = (
+                    self.spark.read.format("statestore")
+                    .option("path", checkpoint_path)
+                    .option("stateVarName", "mapState")
+                    .option("flattenCollectionTypes", False)
+                    .load()
+                )
+                assert map_state_df_non_flatten.select(
+                    "key.id", explode(col("map_value")).alias("map_key", "map_value")
+                ).selectExpr(
+                    "id AS groupingKey",
+                    "map_key.name AS mapKey",
+                    "map_value.value.count AS mapValue",
+                ).sort(
+                    "groupingKey"
+                ).collect() == [
+                    Row(groupingKey="0", mapKey="key2", mapValue=2),
+                    Row(groupingKey="1", mapKey="key2", mapValue=2),
+                ]
+
+                ttl_df = map_state_df.selectExpr(
+                    "user_map_value.ttlExpirationMs AS TTLVal"
+                ).collect()
+                # check if there are two rows containing TTL value in map state dataframe
+                assert len(ttl_df) == 2
+                # check if two rows are of the same TTL value
+                assert len(set(ttl_df)) == 1
+
+                list_state_df = (
+                    self.spark.read.format("statestore")
+                    .option("path", checkpoint_path)
+                    .option("stateVarName", "listState")
+                    .load()
+                )
+                assert list_state_df.isEmpty()
+
+                for q in self.spark.streams.active:
+                    q.stop()
+
+        self._test_transform_with_state_in_pandas_basic(
+            MapStateLargeTTLProcessor(),
+            check_results,
+            True,
+            "processingTime",
+            checkpoint_path=checkpoint_path,
+            initial_state=None,
+        )
+
+        # run the same test suite again but with no-op initial state
+        # TWS with initial state is using a different python runner
+        init_data = [("0", 789), ("3", 987)]
+        initial_state = self.spark.createDataFrame(init_data, "id string, temperature int").groupBy(
+            "id"
+        )
+        self._test_transform_with_state_in_pandas_basic(
+            MapStateLargeTTLProcessor(),
+            check_results,
+            True,
+            "processingTime",
+            checkpoint_path=checkpoint_path,
+            initial_state=initial_state,
+        )
+
+    # This test covers multiple list state variables and flatten option
+    def test_transform_with_list_state_metadata(self):
+        checkpoint_path = tempfile.mktemp()
+
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", countAsString="2"),
+                    Row(id="1", countAsString="2"),
+                }
+            else:
+                # check for state metadata source
+                metadata_df = self.spark.read.format("state-metadata").load(checkpoint_path)
+                operator_properties_json_obj = json.loads(
+                    metadata_df.select("operatorProperties").collect()[0][0]
+                )
+                state_var_list = operator_properties_json_obj["stateVariables"]
+                assert len(state_var_list) == 3
+                for state_var in state_var_list:
+                    if state_var["stateName"] in ["listState1", "listState2"]:
+                        state_var["stateVariableType"] == "ListState"
+                    else:
+                        assert state_var["stateName"] == "$procTimers_keyToTimestamp"
+                        assert state_var["stateVariableType"] == "TimerState"
+
+                # check for state data source and flatten option
+                list_state_1_df = (
+                    self.spark.read.format("statestore")
+                    .option("path", checkpoint_path)
+                    .option("stateVarName", "listState1")
+                    .option("flattenCollectionTypes", True)
+                    .load()
+                )
+                assert list_state_1_df.selectExpr(
+                    "key.id AS groupingKey",
+                    "list_element.temperature AS listElement",
+                ).sort("groupingKey", "listElement").collect() == [
+                    Row(groupingKey="0", listElement=20),
+                    Row(groupingKey="0", listElement=20),
+                    Row(groupingKey="0", listElement=111),
+                    Row(groupingKey="0", listElement=120),
+                    Row(groupingKey="0", listElement=120),
+                    Row(groupingKey="1", listElement=20),
+                    Row(groupingKey="1", listElement=20),
+                    Row(groupingKey="1", listElement=111),
+                    Row(groupingKey="1", listElement=120),
+                    Row(groupingKey="1", listElement=120),
+                ]
+
+                list_state_2_df = (
+                    self.spark.read.format("statestore")
+                    .option("path", checkpoint_path)
+                    .option("stateVarName", "listState2")
+                    .option("flattenCollectionTypes", False)
+                    .load()
+                )
+                assert list_state_2_df.selectExpr(
+                    "key.id AS groupingKey", "list_value.temperature AS valueList"
+                ).sort("groupingKey").withColumn(
+                    "valueSortedList", array_sort(col("valueList"))
+                ).select(
+                    "groupingKey", "valueSortedList"
+                ).collect() == [
+                    Row(groupingKey="0", valueSortedList=[20, 20, 120, 120, 222]),
+                    Row(groupingKey="1", valueSortedList=[20, 20, 120, 120, 222]),
+                ]
+
+                for q in self.spark.streams.active:
+                    q.stop()
+
+        self._test_transform_with_state_in_pandas_basic(
+            ListStateProcessor(),
+            check_results,
+            True,
+            "processingTime",
+            checkpoint_path=checkpoint_path,
+            initial_state=None,
+        )
+
+    # This test covers value state variable and read change feed,
+    # snapshotStartBatchId related options
+    def test_transform_with_value_state_metadata(self):
+        checkpoint_path = tempfile.mktemp()
+
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", countAsString="2"),
+                    Row(id="1", countAsString="2"),
+                }
+            else:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", countAsString="3"),
+                    Row(id="1", countAsString="2"),
+                }
+
+                # check for state metadata source
+                metadata_df = self.spark.read.format("state-metadata").load(checkpoint_path)
+                operator_properties_json_obj = json.loads(
+                    metadata_df.select("operatorProperties").collect()[0][0]
+                )
+                state_var_list = operator_properties_json_obj["stateVariables"]
+
+                assert len(state_var_list) == 3
+                for state_var in state_var_list:
+                    if state_var["stateName"] in ["numViolations", "tempState"]:
+                        state_var["stateVariableType"] == "ValueState"
+                    else:
+                        assert state_var["stateName"] == "$procTimers_keyToTimestamp"
+                        assert state_var["stateVariableType"] == "TimerState"
+
+                # check for state data source and readChangeFeed
+                value_state_df = (
+                    self.spark.read.format("statestore")
+                    .option("path", checkpoint_path)
+                    .option("stateVarName", "numViolations")
+                    .option("readChangeFeed", True)
+                    .option("changeStartBatchId", 0)
+                    .load()
+                ).selectExpr(
+                    "change_type", "key.id AS groupingKey", "value.value AS value", "partition_id"
+                )
+
+                assert value_state_df.select("change_type", "groupingKey", "value").sort(
+                    "groupingKey"
+                ).collect() == [
+                    Row(change_type="update", groupingKey="0", value=1),
+                    Row(change_type="update", groupingKey="1", value=2),
+                ]
+
+                partition_id_list = [
+                    row["partition_id"] for row in value_state_df.select("partition_id").collect()
+                ]
+
+                for partition_id in partition_id_list:
+                    # check for state data source and snapshotStartBatchId options
+                    state_snapshot_df = (
+                        self.spark.read.format("statestore")
+                        .option("path", checkpoint_path)
+                        .option("stateVarName", "numViolations")
+                        .option("snapshotPartitionId", partition_id)
+                        .option("snapshotStartBatchId", 0)
+                        .load()
+                    )
+
+                    assert (
+                        value_state_df.select("partition_id", "groupingKey", "value")
+                        .filter(value_state_df["partition_id"] == partition_id)
+                        .sort("groupingKey")
+                        .collect()
+                        == state_snapshot_df.selectExpr(
+                            "partition_id", "key.id AS groupingKey", "value.value AS value"
+                        )
+                        .sort("groupingKey")
+                        .collect()
+                    )
+
+                for q in self.spark.streams.active:
+                    q.stop()
+
+        with self.sql_conf(
+            {"spark.sql.streaming.stateStore.rocksdb.changelogCheckpointing.enabled": "true"}
+        ):
+            self._test_transform_with_state_in_pandas_basic(
+                SimpleStatefulProcessor(),
+                check_results,
+                False,
+                "processingTime",
+                checkpoint_path=checkpoint_path,
+            )
+
+    def test_transform_with_state_restart_with_multiple_rows_init_state(self):
+        def check_results(batch_df, _):
+            assert set(batch_df.sort("id").collect()) == {
+                Row(id="0", countAsString="2"),
+                Row(id="1", countAsString="2"),
+            }
+
+        def check_results_for_new_query(batch_df, batch_id):
+            if batch_id == 0:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", value=str(123 + 46)),
+                    Row(id="1", value=str(146 + 346)),
+                }
+            else:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", value=str(123 + 46 + 67)),
+                    Row(id="3", value=str(12)),
+                }
+                # verify values in initial state is appended into list state for all keys
+                df = (
+                    self.spark.read.format("statestore")
+                    .option("path", new_checkpoint_path)
+                    .option("stateVarName", "list_state")
+                    .load()
+                ).selectExpr("key.id AS id", "list_element.value AS value")
+
+                def dataframe_to_value_list(output_df):
+                    return [
+                        row["value"] for row in output_df.sort("value").select("value").collect()
+                    ]
+
+                assert dataframe_to_value_list(df.filter(df.id == "0")) == [20, 20, 111, 120, 120]
+                assert dataframe_to_value_list(df.filter(df.id == "1")) == [20, 20, 111, 120, 120]
+
+        # run a tws query and read state data source dataframe from its checkpoint
+        checkpoint_path = tempfile.mkdtemp()
+        self._test_transform_with_state_in_pandas_basic(
+            ListStateProcessor(), check_results, True, checkpoint_path=checkpoint_path
+        )
+        list_state_df = (
+            self.spark.read.format("statestore")
+            .option("path", checkpoint_path)
+            .option("stateVarName", "listState1")
+            .load()
+        ).selectExpr("key.id AS id", "list_element.temperature AS initVal")
+        init_df = list_state_df.groupBy("id")
+
+        # run a new tws query and pass state data source dataframe as initial state
+        # multiple rows exist in the initial state with the same grouping key
+        new_checkpoint_path = tempfile.mkdtemp()
+        self._test_transform_with_state_init_state_in_pandas(
+            StatefulProcessorWithListStateInitialState(),
+            check_results_for_new_query,
+            checkpoint_path=new_checkpoint_path,
+            initial_state=init_df,
+        )
+
+    # run the same test suites again but with single shuffle partition
+    def test_transform_with_state_with_timers_single_partition(self):
+        with self.sql_conf({"spark.sql.shuffle.partitions": "1"}):
+            self.test_transform_with_state_init_state_with_timers()
+            self.test_transform_with_state_in_pandas_event_time()
+            self.test_transform_with_state_in_pandas_proc_timer()
+            self.test_transform_with_state_restart_with_multiple_rows_init_state()
+
 
 class SimpleStatefulProcessorWithInitialState(StatefulProcessor):
     # this dict is the same as input initial state dataframe
@@ -709,10 +1302,9 @@ class SimpleStatefulProcessorWithInitialState(StatefulProcessor):
     def init(self, handle: StatefulProcessorHandle) -> None:
         state_schema = StructType([StructField("value", IntegerType(), True)])
         self.value_state = handle.getValueState("value_state", state_schema)
+        self.handle = handle
 
-    def handleInputRows(
-        self, key, rows, timer_values, expired_timer_info
-    ) -> Iterator[pd.DataFrame]:
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
         exists = self.value_state.exists()
         if exists:
             value_row = self.value_state.get()
@@ -735,7 +1327,7 @@ def handleInputRows(
         else:
             yield pd.DataFrame({"id": key, "value": str(accumulated_value)})
 
-    def handleInitialState(self, key, initialState) -> None:
+    def handleInitialState(self, key, initialState, timer_values) -> None:
         init_val = initialState.at[0, "initVal"]
         self.value_state.update((init_val,))
         if len(key) == 1:
@@ -745,6 +1337,30 @@ def close(self) -> None:
         pass
 
 
+class StatefulProcessorWithInitialStateTimers(SimpleStatefulProcessorWithInitialState):
+    def handleExpiredTimer(self, key, timer_values, expired_timer_info) -> Iterator[pd.DataFrame]:
+        self.handle.deleteTimer(expired_timer_info.get_expiry_time_in_ms())
+        str_key = f"{str(key[0])}-expired"
+        yield pd.DataFrame(
+            {"id": (str_key,), "value": str(expired_timer_info.get_expiry_time_in_ms())}
+        )
+
+    def handleInitialState(self, key, initialState, timer_values) -> None:
+        super().handleInitialState(key, initialState, timer_values)
+        self.handle.registerTimer(timer_values.get_current_processing_time_in_ms() - 1)
+
+
+class StatefulProcessorWithListStateInitialState(SimpleStatefulProcessorWithInitialState):
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        super().init(handle)
+        list_ele_schema = StructType([StructField("value", IntegerType(), True)])
+        self.list_state = handle.getListState("list_state", list_ele_schema)
+
+    def handleInitialState(self, key, initialState, timer_values) -> None:
+        for val in initialState["initVal"].tolist():
+            self.list_state.append_value((val,))
+
+
 # A stateful processor that output the max event time it has seen. Register timer for
 # current watermark. Clear max state if timer expires.
 class EventTimeStatefulProcessor(StatefulProcessor):
@@ -753,33 +1369,30 @@ def init(self, handle: StatefulProcessorHandle) -> None:
         self.handle = handle
         self.max_state = handle.getValueState("max_state", state_schema)
 
-    def handleInputRows(
-        self, key, rows, timer_values, expired_timer_info
-    ) -> Iterator[pd.DataFrame]:
-        if expired_timer_info.is_valid():
-            self.max_state.clear()
-            self.handle.deleteTimer(expired_timer_info.get_expiry_time_in_ms())
-            str_key = f"{str(key[0])}-expired"
-            yield pd.DataFrame(
-                {"id": (str_key,), "timestamp": str(expired_timer_info.get_expiry_time_in_ms())}
-            )
+    def handleExpiredTimer(self, key, timer_values, expired_timer_info) -> Iterator[pd.DataFrame]:
+        self.max_state.clear()
+        self.handle.deleteTimer(expired_timer_info.get_expiry_time_in_ms())
+        str_key = f"{str(key[0])}-expired"
+        yield pd.DataFrame(
+            {"id": (str_key,), "timestamp": str(expired_timer_info.get_expiry_time_in_ms())}
+        )
 
-        else:
-            timestamp_list = []
-            for pdf in rows:
-                # int64 will represent timestamp in nanosecond, restore to second
-                timestamp_list.extend((pdf["eventTime"].astype("int64") // 10**9).tolist())
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
+        timestamp_list = []
+        for pdf in rows:
+            # int64 will represent timestamp in nanosecond, restore to second
+            timestamp_list.extend((pdf["eventTime"].astype("int64") // 10**9).tolist())
 
-            if self.max_state.exists():
-                cur_max = int(self.max_state.get()[0])
-            else:
-                cur_max = 0
-            max_event_time = str(max(cur_max, max(timestamp_list)))
+        if self.max_state.exists():
+            cur_max = int(self.max_state.get()[0])
+        else:
+            cur_max = 0
+        max_event_time = str(max(cur_max, max(timestamp_list)))
 
-            self.max_state.update((max_event_time,))
-            self.handle.registerTimer(timer_values.get_current_watermark_in_ms())
+        self.max_state.update((max_event_time,))
+        self.handle.registerTimer(timer_values.get_current_watermark_in_ms())
 
-            yield pd.DataFrame({"id": key, "timestamp": max_event_time})
+        yield pd.DataFrame({"id": key, "timestamp": max_event_time})
 
     def close(self) -> None:
         pass
@@ -793,54 +1406,49 @@ def init(self, handle: StatefulProcessorHandle) -> None:
         self.handle = handle
         self.count_state = handle.getValueState("count_state", state_schema)
 
-    def handleInputRows(
-        self, key, rows, timer_values, expired_timer_info
-    ) -> Iterator[pd.DataFrame]:
-        if expired_timer_info.is_valid():
-            # reset count state each time the timer is expired
-            timer_list_1 = [e for e in self.handle.listTimers()]
-            timer_list_2 = []
-            idx = 0
-            for e in self.handle.listTimers():
-                timer_list_2.append(e)
-                # check multiple iterator on the same grouping key works
-                assert timer_list_2[idx] == timer_list_1[idx]
-                idx += 1
-
-            if len(timer_list_1) > 0:
-                # before deleting the expiring timers, there are 2 timers -
-                # one timer we just registered, and one that is going to be deleted
-                assert len(timer_list_1) == 2
-            self.count_state.clear()
-            self.handle.deleteTimer(expired_timer_info.get_expiry_time_in_ms())
-            yield pd.DataFrame(
-                {
-                    "id": key,
-                    "countAsString": str("-1"),
-                    "timeValues": str(expired_timer_info.get_expiry_time_in_ms()),
-                }
-            )
+    def handleExpiredTimer(self, key, timer_values, expired_timer_info) -> Iterator[pd.DataFrame]:
+        # reset count state each time the timer is expired
+        timer_list_1 = [e for e in self.handle.listTimers()]
+        timer_list_2 = []
+        idx = 0
+        for e in self.handle.listTimers():
+            timer_list_2.append(e)
+            # check multiple iterator on the same grouping key works
+            assert timer_list_2[idx] == timer_list_1[idx]
+            idx += 1
+
+        if len(timer_list_1) > 0:
+            assert len(timer_list_1) == 2
+        self.count_state.clear()
+        self.handle.deleteTimer(expired_timer_info.get_expiry_time_in_ms())
+        yield pd.DataFrame(
+            {
+                "id": key,
+                "countAsString": str("-1"),
+                "timeValues": str(expired_timer_info.get_expiry_time_in_ms()),
+            }
+        )
 
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
+        if not self.count_state.exists():
+            count = 0
         else:
-            if not self.count_state.exists():
-                count = 0
-            else:
-                count = int(self.count_state.get()[0])
+            count = int(self.count_state.get()[0])
 
-            if key == ("0",):
-                self.handle.registerTimer(timer_values.get_current_processing_time_in_ms())
+        if key == ("0",):
+            self.handle.registerTimer(timer_values.get_current_processing_time_in_ms() + 1)
 
-            rows_count = 0
-            for pdf in rows:
-                pdf_count = len(pdf)
-                rows_count += pdf_count
+        rows_count = 0
+        for pdf in rows:
+            pdf_count = len(pdf)
+            rows_count += pdf_count
 
-            count = count + rows_count
+        count = count + rows_count
 
-            self.count_state.update((str(count),))
-            timestamp = str(timer_values.get_current_processing_time_in_ms())
+        self.count_state.update((str(count),))
+        timestamp = str(timer_values.get_current_processing_time_in_ms())
 
-            yield pd.DataFrame({"id": key, "countAsString": str(count), "timeValues": timestamp})
+        yield pd.DataFrame({"id": key, "countAsString": str(count), "timeValues": timestamp})
 
     def close(self) -> None:
         pass
@@ -851,14 +1459,13 @@ class SimpleStatefulProcessor(StatefulProcessor, unittest.TestCase):
     batch_id = 0
 
     def init(self, handle: StatefulProcessorHandle) -> None:
+        # Test both string type and struct type schemas
+        self.num_violations_state = handle.getValueState("numViolations", "value int")
         state_schema = StructType([StructField("value", IntegerType(), True)])
-        self.num_violations_state = handle.getValueState("numViolations", state_schema)
         self.temp_state = handle.getValueState("tempState", state_schema)
         handle.deleteIfExists("tempState")
 
-    def handleInputRows(
-        self, key, rows, timer_values, expired_timer_info
-    ) -> Iterator[pd.DataFrame]:
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
         with self.assertRaisesRegex(PySparkRuntimeError, "Error checking value state exists"):
             self.temp_state.exists()
         new_violations = 0
@@ -886,6 +1493,19 @@ def close(self) -> None:
         pass
 
 
+class StatefulProcessorChainingOps(StatefulProcessor):
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        pass
+
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
+        for pdf in rows:
+            timestamp_list = pdf["eventTime"].tolist()
+        yield pd.DataFrame({"id": key, "outputTimestamp": timestamp_list[0]})
+
+    def close(self) -> None:
+        pass
+
+
 # A stateful processor that inherit all behavior of SimpleStatefulProcessor except that it use
 # ttl state with a large timeout.
 class SimpleTTLStatefulProcessor(SimpleStatefulProcessor, unittest.TestCase):
@@ -907,9 +1527,7 @@ def init(self, handle: StatefulProcessorHandle) -> None:
             "ttl-map-state", user_key_schema, state_schema, 10000
         )
 
-    def handleInputRows(
-        self, key, rows, timer_values, expired_timer_info
-    ) -> Iterator[pd.DataFrame]:
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
         count = 0
         ttl_count = 0
         ttl_list_state_count = 0
@@ -959,9 +1577,7 @@ def init(self, handle: StatefulProcessorHandle) -> None:
         state_schema = StructType([StructField("value", IntegerType(), True)])
         self.num_violations_state = handle.getValueState("numViolations", state_schema)
 
-    def handleInputRows(
-        self, key, rows, timer_values, expired_timer_info
-    ) -> Iterator[pd.DataFrame]:
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
         count = 0
         exists = self.num_violations_state.exists()
         assert not exists
@@ -985,9 +1601,7 @@ def init(self, handle: StatefulProcessorHandle) -> None:
         self.list_state1 = handle.getListState("listState1", state_schema)
         self.list_state2 = handle.getListState("listState2", state_schema)
 
-    def handleInputRows(
-        self, key, rows, timer_values, expired_timer_info
-    ) -> Iterator[pd.DataFrame]:
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
         count = 0
         for pdf in rows:
             list_state_rows = [(120,), (20,)]
@@ -1038,13 +1652,10 @@ def init(self, handle: StatefulProcessorHandle) -> None:
 
 class MapStateProcessor(StatefulProcessor):
     def init(self, handle: StatefulProcessorHandle):
-        key_schema = StructType([StructField("name", StringType(), True)])
-        value_schema = StructType([StructField("count", IntegerType(), True)])
-        self.map_state = handle.getMapState("mapState", key_schema, value_schema)
+        # Test string type schemas
+        self.map_state = handle.getMapState("mapState", "name string", "count int")
 
-    def handleInputRows(
-        self, key, rows, timer_values, expired_timer_info
-    ) -> Iterator[pd.DataFrame]:
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
         count = 0
         key1 = ("key1",)
         key2 = ("key2",)
@@ -1084,6 +1695,7 @@ def init(self, handle: StatefulProcessorHandle) -> None:
         key_schema = StructType([StructField("name", StringType(), True)])
         value_schema = StructType([StructField("count", IntegerType(), True)])
         self.map_state = handle.getMapState("mapState", key_schema, value_schema, 30000)
+        self.list_state = handle.getListState("listState", key_schema)
 
 
 class TransformWithStateInPandasTests(TransformWithStateInPandasTestsMixin, ReusedSQLTestCase):
diff --git a/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py b/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
index fd264c3488823..3dafd71c1a329 100644
--- a/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
+++ b/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
@@ -301,6 +301,7 @@ def test_area_plot(self):
         self._check_fig_data(fig["data"][2], **expected_fig_data)
 
     def test_pie_plot(self):
+        # single column as 'y'
         fig = self.sdf3.plot(kind="pie", x="date", y="sales")
         expected_x = [
             datetime(2018, 1, 31, 0, 0),
@@ -308,13 +309,39 @@ def test_pie_plot(self):
             datetime(2018, 3, 31, 0, 0),
             datetime(2018, 4, 30, 0, 0),
         ]
-        expected_fig_data = {
+        expected_fig_data_sales = {
             "name": "",
             "labels": expected_x,
             "values": [3, 2, 3, 9],
             "type": "pie",
         }
-        self._check_fig_data(fig["data"][0], **expected_fig_data)
+        self._check_fig_data(fig["data"][0], **expected_fig_data_sales)
+
+        # all numeric columns as 'y'
+        expected_fig_data_signups = {
+            "name": "",
+            "labels": expected_x,
+            "values": [5, 5, 6, 12],
+            "type": "pie",
+        }
+        expected_fig_data_visits = {
+            "name": "",
+            "labels": expected_x,
+            "values": [20, 42, 28, 62],
+            "type": "pie",
+        }
+        fig = self.sdf3.plot(kind="pie", x="date", subplots=True)
+        self._check_fig_data(fig["data"][0], **expected_fig_data_sales)
+        self._check_fig_data(fig["data"][1], **expected_fig_data_signups)
+        self._check_fig_data(fig["data"][2], **expected_fig_data_visits)
+
+        # not specify subplots
+        with self.assertRaises(PySparkValueError) as pe:
+            self.sdf3.plot(kind="pie", x="date")
+
+        self.check_error(
+            exception=pe.exception, errorClass="UNSUPPORTED_PIE_PLOT_PARAM", messageParameters={}
+        )
 
         # y is not a numerical column
         with self.assertRaises(PySparkTypeError) as pe:
@@ -322,8 +349,12 @@ def test_pie_plot(self):
 
         self.check_error(
             exception=pe.exception,
-            errorClass="PLOT_NOT_NUMERIC_COLUMN_ARGUMENT",
-            messageParameters={"arg_name": "y", "arg_type": "StringType"},
+            errorClass="PLOT_INVALID_TYPE_COLUMN",
+            messageParameters={
+                "col_name": "category",
+                "valid_types": "NumericType",
+                "col_type": "StringType",
+            },
         )
 
     def test_box_plot(self):
diff --git a/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py b/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py
index de8f30baebca5..9db66aa252ee6 100644
--- a/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py
+++ b/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py
@@ -146,7 +146,7 @@ def func(df: DataFrame, batch_id: int):
     def my_test_function_2():
         return 2
 
-    def test_streaming_foreach_batch_fuction_calling(self):
+    def test_streaming_foreach_batch_function_calling(self):
         def my_test_function_3():
             return 3
 
diff --git a/python/pyspark/sql/tests/test_connect_compatibility.py b/python/pyspark/sql/tests/test_connect_compatibility.py
index 3d74e796cd7a0..4ac68292b4020 100644
--- a/python/pyspark/sql/tests/test_connect_compatibility.py
+++ b/python/pyspark/sql/tests/test_connect_compatibility.py
@@ -264,18 +264,11 @@ def test_spark_session_compatibility(self):
         expected_missing_connect_methods = {
             "addArtifact",
             "addArtifacts",
-            "addTag",
             "clearProgressHandlers",
-            "clearTags",
             "copyFromLocalToFs",
-            "getTags",
-            "interruptAll",
-            "interruptOperation",
-            "interruptTag",
             "newSession",
             "registerProgressHandler",
             "removeProgressHandler",
-            "removeTag",
         }
         expected_missing_classic_methods = set()
         self.check_compatibility(
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index cd6a57429cfa9..e85877cc87e09 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -1044,6 +1044,38 @@ def test_transpose(self):
             messageParameters={"dt1": '"STRING"', "dt2": '"BIGINT"'},
         )
 
+    def test_transpose_with_invalid_index_columns(self):
+        # SPARK-50602: invalid index columns
+        df = self.spark.createDataFrame([{"a": "x", "b": "y", "c": "z"}])
+
+        with self.assertRaises(AnalysisException) as pe:
+            df.transpose(col("a") + 1).collect()
+        self.check_error(
+            exception=pe.exception,
+            errorClass="TRANSPOSE_INVALID_INDEX_COLUMN",
+            messageParameters={"reason": "Index column must be an atomic attribute"},
+        )
+
+    def test_metadata_column(self):
+        with self.sql_conf(
+            {"spark.sql.catalog.testcat": "org.apache.spark.sql.connector.catalog.InMemoryCatalog"}
+        ):
+            tbl = "testcat.t"
+            with self.table(tbl):
+                self.spark.sql(
+                    f"""
+                    CREATE TABLE {tbl} (index bigint, data string)
+                    PARTITIONED BY (bucket(4, index), index)
+                    """
+                )
+                self.spark.sql(f"""INSERT INTO {tbl} VALUES (1, 'a'), (2, 'b'), (3, 'c')""")
+
+                df = self.spark.sql(f"""SELECT * FROM {tbl}""")
+                assertDataFrameEqual(
+                    df.select(df.metadataColumn("index")),
+                    [Row(0), Row(0), Row(0)],
+                )
+
 
 class DataFrameTests(DataFrameTestsMixin, ReusedSQLTestCase):
     def test_query_execution_unsupported_in_classic(self):
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index cf8f685ea4499..39db72b235bf9 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -30,8 +30,9 @@
 from pyspark.sql.avro.functions import from_avro, to_avro
 from pyspark.sql.column import Column
 from pyspark.sql.functions.builtin import nullifzero, randstr, uniform, zeroifnull
+from pyspark.sql.types import StructType, StructField, StringType
 from pyspark.testing.sqlutils import ReusedSQLTestCase, SQLTestUtils
-from pyspark.testing.utils import have_numpy
+from pyspark.testing.utils import have_numpy, assertDataFrameEqual
 
 
 class FunctionsTestsMixin:
@@ -338,29 +339,29 @@ def test_try_parse_url(self):
             [("https://spark.apache.org/path?query=1", "QUERY", "query")],
             ["url", "part", "key"],
         )
-        actual = df.select(F.try_parse_url(df.url, df.part, df.key)).collect()
-        self.assertEqual(actual, [Row("1")])
+        actual = df.select(F.try_parse_url(df.url, df.part, df.key))
+        assertDataFrameEqual(actual, [Row("1")])
         df = self.spark.createDataFrame(
             [("inva lid://spark.apache.org/path?query=1", "QUERY", "query")],
             ["url", "part", "key"],
         )
-        actual = df.select(F.try_parse_url(df.url, df.part, df.key)).collect()
-        self.assertEqual(actual, [Row(None)])
+        actual = df.select(F.try_parse_url(df.url, df.part, df.key))
+        assertDataFrameEqual(actual, [Row(None)])
 
     def test_try_make_timestamp(self):
         data = [(2024, 5, 22, 10, 30, 0)]
         df = self.spark.createDataFrame(data, ["year", "month", "day", "hour", "minute", "second"])
         actual = df.select(
             F.try_make_timestamp(df.year, df.month, df.day, df.hour, df.minute, df.second)
-        ).collect()
-        self.assertEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30))])
+        )
+        assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30))])
 
         data = [(2024, 13, 22, 10, 30, 0)]
         df = self.spark.createDataFrame(data, ["year", "month", "day", "hour", "minute", "second"])
         actual = df.select(
             F.try_make_timestamp(df.year, df.month, df.day, df.hour, df.minute, df.second)
-        ).collect()
-        self.assertEqual(actual, [Row(None)])
+        )
+        assertDataFrameEqual(actual, [Row(None)])
 
     def test_try_make_timestamp_ltz(self):
         # use local timezone here to avoid flakiness
@@ -372,8 +373,8 @@ def test_try_make_timestamp_ltz(self):
             F.try_make_timestamp_ltz(
                 df.year, df.month, df.day, df.hour, df.minute, df.second, df.timezone
             )
-        ).collect()
-        self.assertEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 0))])
+        )
+        assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 0))])
 
         # use local timezone here to avoid flakiness
         data = [(2024, 13, 22, 10, 30, 0, datetime.datetime.now().astimezone().tzinfo.__str__())]
@@ -384,23 +385,23 @@ def test_try_make_timestamp_ltz(self):
             F.try_make_timestamp_ltz(
                 df.year, df.month, df.day, df.hour, df.minute, df.second, df.timezone
             )
-        ).collect()
-        self.assertEqual(actual, [Row(None)])
+        )
+        assertDataFrameEqual(actual, [Row(None)])
 
     def test_try_make_timestamp_ntz(self):
         data = [(2024, 5, 22, 10, 30, 0)]
         df = self.spark.createDataFrame(data, ["year", "month", "day", "hour", "minute", "second"])
         actual = df.select(
             F.try_make_timestamp_ntz(df.year, df.month, df.day, df.hour, df.minute, df.second)
-        ).collect()
-        self.assertEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30))])
+        )
+        assertDataFrameEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30))])
 
         data = [(2024, 13, 22, 10, 30, 0)]
         df = self.spark.createDataFrame(data, ["year", "month", "day", "hour", "minute", "second"])
         actual = df.select(
             F.try_make_timestamp_ntz(df.year, df.month, df.day, df.hour, df.minute, df.second)
-        ).collect()
-        self.assertEqual(actual, [Row(None)])
+        )
+        assertDataFrameEqual(actual, [Row(None)])
 
     def test_string_functions(self):
         string_functions = [
@@ -442,51 +443,51 @@ def test_string_functions(self):
         )
 
         for name in string_functions:
-            self.assertEqual(
-                df.select(getattr(F, name)("name")).first()[0],
-                df.select(getattr(F, name)(F.col("name"))).first()[0],
+            assertDataFrameEqual(
+                df.select(getattr(F, name)("name")),
+                df.select(getattr(F, name)(F.col("name"))),
             )
 
     def test_collation(self):
         df = self.spark.createDataFrame([("a",), ("b",)], ["name"])
-        actual = df.select(F.collation(F.collate("name", "UNICODE"))).distinct().collect()
-        self.assertEqual([Row("UNICODE")], actual)
+        actual = df.select(F.collation(F.collate("name", "UNICODE"))).distinct()
+        assertDataFrameEqual([Row("SYSTEM.BUILTIN.UNICODE")], actual)
 
     def test_try_make_interval(self):
         df = self.spark.createDataFrame([(2147483647,)], ["num"])
-        actual = df.select(F.isnull(F.try_make_interval("num"))).collect()
-        self.assertEqual([Row(True)], actual)
+        actual = df.select(F.isnull(F.try_make_interval("num")))
+        assertDataFrameEqual([Row(True)], actual)
 
     def test_octet_length_function(self):
         # SPARK-36751: add octet length api for python
         df = self.spark.createDataFrame([("cat",), ("\U0001F408",)], ["cat"])
-        actual = df.select(F.octet_length("cat")).collect()
-        self.assertEqual([Row(3), Row(4)], actual)
+        actual = df.select(F.octet_length("cat"))
+        assertDataFrameEqual([Row(3), Row(4)], actual)
 
     def test_bit_length_function(self):
         # SPARK-36751: add bit length api for python
         df = self.spark.createDataFrame([("cat",), ("\U0001F408",)], ["cat"])
-        actual = df.select(F.bit_length("cat")).collect()
-        self.assertEqual([Row(24), Row(32)], actual)
+        actual = df.select(F.bit_length("cat"))
+        assertDataFrameEqual([Row(24), Row(32)], actual)
 
     def test_array_contains_function(self):
         df = self.spark.createDataFrame([(["1", "2", "3"],), ([],)], ["data"])
-        actual = df.select(F.array_contains(df.data, "1").alias("b")).collect()
-        self.assertEqual([Row(b=True), Row(b=False)], actual)
+        actual = df.select(F.array_contains(df.data, "1").alias("b"))
+        assertDataFrameEqual([Row(b=True), Row(b=False)], actual)
 
     def test_levenshtein_function(self):
         df = self.spark.createDataFrame([("kitten", "sitting")], ["l", "r"])
-        actual_without_threshold = df.select(F.levenshtein(df.l, df.r).alias("b")).collect()
-        self.assertEqual([Row(b=3)], actual_without_threshold)
-        actual_with_threshold = df.select(F.levenshtein(df.l, df.r, 2).alias("b")).collect()
-        self.assertEqual([Row(b=-1)], actual_with_threshold)
+        actual_without_threshold = df.select(F.levenshtein(df.l, df.r).alias("b"))
+        assertDataFrameEqual([Row(b=3)], actual_without_threshold)
+        actual_with_threshold = df.select(F.levenshtein(df.l, df.r, 2).alias("b"))
+        assertDataFrameEqual([Row(b=-1)], actual_with_threshold)
 
     def test_between_function(self):
         df = self.spark.createDataFrame(
             [Row(a=1, b=2, c=3), Row(a=2, b=1, c=3), Row(a=4, b=1, c=4)]
         )
-        self.assertEqual(
-            [Row(a=2, b=1, c=3), Row(a=4, b=1, c=4)], df.filter(df.a.between(df.b, df.c)).collect()
+        assertDataFrameEqual(
+            [Row(a=2, b=1, c=3), Row(a=4, b=1, c=4)], df.filter(df.a.between(df.b, df.c))
         )
 
     def test_dayofweek(self):
@@ -602,7 +603,7 @@ def test_first_last_ignorenulls(self):
             F.last(df2.id, False).alias("c"),
             F.last(df2.id, True).alias("d"),
         )
-        self.assertEqual([Row(a=None, b=1, c=None, d=98)], df3.collect())
+        assertDataFrameEqual([Row(a=None, b=1, c=None, d=98)], df3)
 
     def test_approxQuantile(self):
         df = self.spark.createDataFrame([Row(a=i, b=i + 10) for i in range(10)])
@@ -660,20 +661,20 @@ def test_sort_with_nulls_order(self):
         df = self.spark.createDataFrame(
             [("Tom", 80), (None, 60), ("Alice", 50)], ["name", "height"]
         )
-        self.assertEqual(
-            df.select(df.name).orderBy(F.asc_nulls_first("name")).collect(),
+        assertDataFrameEqual(
+            df.select(df.name).orderBy(F.asc_nulls_first("name")),
             [Row(name=None), Row(name="Alice"), Row(name="Tom")],
         )
-        self.assertEqual(
-            df.select(df.name).orderBy(F.asc_nulls_last("name")).collect(),
+        assertDataFrameEqual(
+            df.select(df.name).orderBy(F.asc_nulls_last("name")),
             [Row(name="Alice"), Row(name="Tom"), Row(name=None)],
         )
-        self.assertEqual(
-            df.select(df.name).orderBy(F.desc_nulls_first("name")).collect(),
+        assertDataFrameEqual(
+            df.select(df.name).orderBy(F.desc_nulls_first("name")),
             [Row(name=None), Row(name="Tom"), Row(name="Alice")],
         )
-        self.assertEqual(
-            df.select(df.name).orderBy(F.desc_nulls_last("name")).collect(),
+        assertDataFrameEqual(
+            df.select(df.name).orderBy(F.desc_nulls_last("name")),
             [Row(name="Tom"), Row(name="Alice"), Row(name=None)],
         )
 
@@ -710,20 +711,16 @@ def test_slice(self):
         )
 
         expected = [Row(sliced=[2, 3]), Row(sliced=[5])]
-        self.assertEqual(df.select(F.slice(df.x, 2, 2).alias("sliced")).collect(), expected)
-        self.assertEqual(
-            df.select(F.slice(df.x, F.lit(2), F.lit(2)).alias("sliced")).collect(), expected
-        )
-        self.assertEqual(
-            df.select(F.slice("x", "index", "len").alias("sliced")).collect(), expected
-        )
+        assertDataFrameEqual(df.select(F.slice(df.x, 2, 2).alias("sliced")), expected)
+        assertDataFrameEqual(df.select(F.slice(df.x, F.lit(2), F.lit(2)).alias("sliced")), expected)
+        assertDataFrameEqual(df.select(F.slice("x", "index", "len").alias("sliced")), expected)
 
-        self.assertEqual(
-            df.select(F.slice(df.x, F.size(df.x) - 1, F.lit(1)).alias("sliced")).collect(),
+        assertDataFrameEqual(
+            df.select(F.slice(df.x, F.size(df.x) - 1, F.lit(1)).alias("sliced")),
             [Row(sliced=[2]), Row(sliced=[4])],
         )
-        self.assertEqual(
-            df.select(F.slice(df.x, F.lit(1), F.size(df.x) - 1).alias("sliced")).collect(),
+        assertDataFrameEqual(
+            df.select(F.slice(df.x, F.lit(1), F.size(df.x) - 1).alias("sliced")),
             [Row(sliced=[1, 2]), Row(sliced=[4])],
         )
 
@@ -732,11 +729,9 @@ def test_array_repeat(self):
         df = df.withColumn("repeat_n", F.lit(3))
 
         expected = [Row(val=[0, 0, 0])]
-        self.assertEqual(df.select(F.array_repeat("id", 3).alias("val")).collect(), expected)
-        self.assertEqual(df.select(F.array_repeat("id", F.lit(3)).alias("val")).collect(), expected)
-        self.assertEqual(
-            df.select(F.array_repeat("id", "repeat_n").alias("val")).collect(), expected
-        )
+        assertDataFrameEqual(df.select(F.array_repeat("id", 3).alias("val")), expected)
+        assertDataFrameEqual(df.select(F.array_repeat("id", F.lit(3)).alias("val")), expected)
+        assertDataFrameEqual(df.select(F.array_repeat("id", "repeat_n").alias("val")), expected)
 
     def test_input_file_name_udf(self):
         df = self.spark.read.text("python/test_support/hello/hello.txt")
@@ -748,11 +743,11 @@ def test_least(self):
         df = self.spark.createDataFrame([(1, 4, 3)], ["a", "b", "c"])
 
         expected = [Row(least=1)]
-        self.assertEqual(df.select(F.least(df.a, df.b, df.c).alias("least")).collect(), expected)
-        self.assertEqual(
-            df.select(F.least(F.lit(3), F.lit(5), F.lit(1)).alias("least")).collect(), expected
+        assertDataFrameEqual(df.select(F.least(df.a, df.b, df.c).alias("least")), expected)
+        assertDataFrameEqual(
+            df.select(F.least(F.lit(3), F.lit(5), F.lit(1)).alias("least")), expected
         )
-        self.assertEqual(df.select(F.least("a", "b", "c").alias("least")).collect(), expected)
+        assertDataFrameEqual(df.select(F.least("a", "b", "c").alias("least")), expected)
 
         with self.assertRaises(PySparkValueError) as pe:
             df.select(F.least(df.a).alias("least")).collect()
@@ -794,11 +789,9 @@ def test_overlay(self):
         df = self.spark.createDataFrame([("SPARK_SQL", "CORE", 7, 0)], ("x", "y", "pos", "len"))
 
         exp = [Row(ol="SPARK_CORESQL")]
-        self.assertEqual(df.select(F.overlay(df.x, df.y, 7, 0).alias("ol")).collect(), exp)
-        self.assertEqual(
-            df.select(F.overlay(df.x, df.y, F.lit(7), F.lit(0)).alias("ol")).collect(), exp
-        )
-        self.assertEqual(df.select(F.overlay("x", "y", "pos", "len").alias("ol")).collect(), exp)
+        assertDataFrameEqual(df.select(F.overlay(df.x, df.y, 7, 0).alias("ol")), exp)
+        assertDataFrameEqual(df.select(F.overlay(df.x, df.y, F.lit(7), F.lit(0)).alias("ol")), exp)
+        assertDataFrameEqual(df.select(F.overlay("x", "y", "pos", "len").alias("ol")), exp)
 
         with self.assertRaises(PySparkTypeError) as pe:
             df.select(F.overlay(df.x, df.y, 7.5, 0).alias("ol")).collect()
@@ -1147,6 +1140,70 @@ def test_collect_functions(self):
             ["1", "2", "2", "2"],
         )
 
+    def test_listagg_functions(self):
+        df = self.spark.createDataFrame(
+            [(1, "1"), (2, "2"), (None, None), (1, "2")], ["key", "value"]
+        )
+        df_with_bytes = self.spark.createDataFrame(
+            [(b"\x01",), (b"\x02",), (None,), (b"\x03",), (b"\x02",)], ["bytes"]
+        )
+        df_with_nulls = self.spark.createDataFrame(
+            [(None,), (None,), (None,), (None,), (None,)],
+            StructType([StructField("nulls", StringType(), True)]),
+        )
+        # listagg and string_agg are aliases
+        for listagg_ref in [F.listagg, F.string_agg]:
+            self.assertEqual(df.select(listagg_ref(df.key).alias("r")).collect()[0].r, "121")
+            self.assertEqual(df.select(listagg_ref(df.value).alias("r")).collect()[0].r, "122")
+            self.assertEqual(
+                df.select(listagg_ref(df.value, ",").alias("r")).collect()[0].r, "1,2,2"
+            )
+            self.assertEqual(
+                df_with_bytes.select(listagg_ref(df_with_bytes.bytes, b"\x42").alias("r"))
+                .collect()[0]
+                .r,
+                b"\x01\x42\x02\x42\x03\x42\x02",
+            )
+            self.assertEqual(
+                df_with_nulls.select(listagg_ref(df_with_nulls.nulls).alias("r")).collect()[0].r,
+                None,
+            )
+
+    def test_listagg_distinct_functions(self):
+        df = self.spark.createDataFrame(
+            [(1, "1"), (2, "2"), (None, None), (1, "2")], ["key", "value"]
+        )
+        df_with_bytes = self.spark.createDataFrame(
+            [(b"\x01",), (b"\x02",), (None,), (b"\x03",), (b"\x02",)], ["bytes"]
+        )
+        df_with_nulls = self.spark.createDataFrame(
+            [(None,), (None,), (None,), (None,), (None,)],
+            StructType([StructField("nulls", StringType(), True)]),
+        )
+        # listagg_distinct and string_agg_distinct are aliases
+        for listagg_distinct_ref in [F.listagg_distinct, F.string_agg_distinct]:
+            self.assertEqual(
+                df.select(listagg_distinct_ref(df.key).alias("r")).collect()[0].r, "12"
+            )
+            self.assertEqual(
+                df.select(listagg_distinct_ref(df.value).alias("r")).collect()[0].r, "12"
+            )
+            self.assertEqual(
+                df.select(listagg_distinct_ref(df.value, ",").alias("r")).collect()[0].r, "1,2"
+            )
+            self.assertEqual(
+                df_with_bytes.select(listagg_distinct_ref(df_with_bytes.bytes, b"\x42").alias("r"))
+                .collect()[0]
+                .r,
+                b"\x01\x42\x02\x42\x03",
+            )
+            self.assertEqual(
+                df_with_nulls.select(listagg_distinct_ref(df_with_nulls.nulls).alias("r"))
+                .collect()[0]
+                .r,
+                None,
+            )
+
     def test_datetime_functions(self):
         df = self.spark.range(1).selectExpr("'2017-01-22' as dateCol")
         parse_result = df.select(F.to_date(F.col("dateCol"))).first()
@@ -1158,8 +1215,8 @@ def test_assert_true(self):
     def check_assert_true(self, tpe):
         df = self.spark.range(3)
 
-        self.assertEqual(
-            df.select(F.assert_true(df.id < 3)).toDF("val").collect(),
+        assertDataFrameEqual(
+            df.select(F.assert_true(df.id < 3)).toDF("val"),
             [Row(val=None), Row(val=None), Row(val=None)],
         )
 
@@ -1296,17 +1353,17 @@ def test_np_scalar_input(self):
 
         df = self.spark.createDataFrame([([1, 2, 3],), ([],)], ["data"])
         for dtype in [np.int8, np.int16, np.int32, np.int64]:
-            res = df.select(F.array_contains(df.data, dtype(1)).alias("b")).collect()
-            self.assertEqual([Row(b=True), Row(b=False)], res)
-            res = df.select(F.array_position(df.data, dtype(1)).alias("c")).collect()
-            self.assertEqual([Row(c=1), Row(c=0)], res)
+            res = df.select(F.array_contains(df.data, dtype(1)).alias("b"))
+            assertDataFrameEqual([Row(b=True), Row(b=False)], res)
+            res = df.select(F.array_position(df.data, dtype(1)).alias("c"))
+            assertDataFrameEqual([Row(c=1), Row(c=0)], res)
 
         df = self.spark.createDataFrame([([1.0, 2.0, 3.0],), ([],)], ["data"])
         for dtype in [np.float32, np.float64]:
-            res = df.select(F.array_contains(df.data, dtype(1)).alias("b")).collect()
-            self.assertEqual([Row(b=True), Row(b=False)], res)
-            res = df.select(F.array_position(df.data, dtype(1)).alias("c")).collect()
-            self.assertEqual([Row(c=1), Row(c=0)], res)
+            res = df.select(F.array_contains(df.data, dtype(1)).alias("b"))
+            assertDataFrameEqual([Row(b=True), Row(b=False)], res)
+            res = df.select(F.array_position(df.data, dtype(1)).alias("c"))
+            assertDataFrameEqual([Row(c=1), Row(c=0)], res)
 
     @unittest.skipIf(not have_numpy, "NumPy not installed")
     def test_ndarray_input(self):
@@ -1723,46 +1780,42 @@ class IntEnum(Enum):
 
     def test_nullifzero_zeroifnull(self):
         df = self.spark.createDataFrame([(0,), (1,)], ["a"])
-        result = df.select(nullifzero(df.a).alias("r")).collect()
-        self.assertEqual([Row(r=None), Row(r=1)], result)
+        result = df.select(nullifzero(df.a).alias("r"))
+        assertDataFrameEqual([Row(r=None), Row(r=1)], result)
 
         df = self.spark.createDataFrame([(None,), (1,)], ["a"])
-        result = df.select(zeroifnull(df.a).alias("r")).collect()
-        self.assertEqual([Row(r=0), Row(r=1)], result)
+        result = df.select(zeroifnull(df.a).alias("r"))
+        assertDataFrameEqual([Row(r=0), Row(r=1)], result)
 
     def test_randstr_uniform(self):
         df = self.spark.createDataFrame([(0,)], ["a"])
-        result = df.select(randstr(F.lit(5), F.lit(0)).alias("x")).selectExpr("length(x)").collect()
-        self.assertEqual([Row(5)], result)
+        result = df.select(randstr(F.lit(5), F.lit(0)).alias("x")).selectExpr("length(x)")
+        assertDataFrameEqual([Row(5)], result)
         # The random seed is optional.
-        result = df.select(randstr(F.lit(5)).alias("x")).selectExpr("length(x)").collect()
-        self.assertEqual([Row(5)], result)
+        result = df.select(randstr(F.lit(5)).alias("x")).selectExpr("length(x)")
+        assertDataFrameEqual([Row(5)], result)
 
         df = self.spark.createDataFrame([(0,)], ["a"])
-        result = (
-            df.select(uniform(F.lit(10), F.lit(20), F.lit(0)).alias("x"))
-            .selectExpr("x > 5")
-            .collect()
-        )
-        self.assertEqual([Row(True)], result)
+        result = df.select(uniform(F.lit(10), F.lit(20), F.lit(0)).alias("x")).selectExpr("x > 5")
+        assertDataFrameEqual([Row(True)], result)
         # The random seed is optional.
-        result = df.select(uniform(F.lit(10), F.lit(20)).alias("x")).selectExpr("x > 5").collect()
-        self.assertEqual([Row(True)], result)
+        result = df.select(uniform(F.lit(10), F.lit(20)).alias("x")).selectExpr("x > 5")
+        assertDataFrameEqual([Row(True)], result)
 
     def test_string_validation(self):
         df = self.spark.createDataFrame([("abc",)], ["a"])
         # test is_valid_utf8
-        result_is_valid_utf8 = df.select(F.is_valid_utf8(df.a).alias("r")).collect()
-        self.assertEqual([Row(r=True)], result_is_valid_utf8)
+        result_is_valid_utf8 = df.select(F.is_valid_utf8(df.a).alias("r"))
+        assertDataFrameEqual([Row(r=True)], result_is_valid_utf8)
         # test make_valid_utf8
-        result_make_valid_utf8 = df.select(F.make_valid_utf8(df.a).alias("r")).collect()
-        self.assertEqual([Row(r="abc")], result_make_valid_utf8)
+        result_make_valid_utf8 = df.select(F.make_valid_utf8(df.a).alias("r"))
+        assertDataFrameEqual([Row(r="abc")], result_make_valid_utf8)
         # test validate_utf8
-        result_validate_utf8 = df.select(F.validate_utf8(df.a).alias("r")).collect()
-        self.assertEqual([Row(r="abc")], result_validate_utf8)
+        result_validate_utf8 = df.select(F.validate_utf8(df.a).alias("r"))
+        assertDataFrameEqual([Row(r="abc")], result_validate_utf8)
         # test try_validate_utf8
-        result_try_validate_utf8 = df.select(F.try_validate_utf8(df.a).alias("r")).collect()
-        self.assertEqual([Row(r="abc")], result_try_validate_utf8)
+        result_try_validate_utf8 = df.select(F.try_validate_utf8(df.a).alias("r"))
+        assertDataFrameEqual([Row(r="abc")], result_try_validate_utf8)
 
 
 class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin):
diff --git a/python/pyspark/sql/tests/test_group.py b/python/pyspark/sql/tests/test_group.py
index 8e3d2d8d00033..bbc089b00c133 100644
--- a/python/pyspark/sql/tests/test_group.py
+++ b/python/pyspark/sql/tests/test_group.py
@@ -36,11 +36,11 @@ def test_agg_func(self):
         data = [Row(key=1, value=10), Row(key=1, value=20), Row(key=1, value=30)]
         df = self.spark.createDataFrame(data)
         g = df.groupBy("key")
-        self.assertEqual(g.max("value").collect(), [Row(**{"key": 1, "max(value)": 30})])
-        self.assertEqual(g.min("value").collect(), [Row(**{"key": 1, "min(value)": 10})])
-        self.assertEqual(g.sum("value").collect(), [Row(**{"key": 1, "sum(value)": 60})])
-        self.assertEqual(g.count().collect(), [Row(key=1, count=3)])
-        self.assertEqual(g.mean("value").collect(), [Row(**{"key": 1, "avg(value)": 20.0})])
+        assertDataFrameEqual(g.max("value"), [Row(**{"key": 1, "max(value)": 30})])
+        assertDataFrameEqual(g.min("value"), [Row(**{"key": 1, "min(value)": 10})])
+        assertDataFrameEqual(g.sum("value"), [Row(**{"key": 1, "sum(value)": 60})])
+        assertDataFrameEqual(g.count(), [Row(key=1, count=3)])
+        assertDataFrameEqual(g.mean("value"), [Row(**{"key": 1, "avg(value)": 20.0})])
 
         data = [
             Row(electronic="Smartphone", year=2018, sales=150000),
@@ -59,7 +59,7 @@ def test_aggregator(self):
         df = self.df
         g = df.groupBy()
         self.assertEqual([99, 100], sorted(g.agg({"key": "max", "value": "count"}).collect()[0]))
-        self.assertEqual([Row(**{"AVG(key#0)": 49.5})], g.mean().collect())
+        assertDataFrameEqual([Row(**{"AVG(key#0)": 49.5})], g.mean().collect())
 
         from pyspark.sql import functions
 
diff --git a/python/pyspark/sql/tests/test_job_cancellation.py b/python/pyspark/sql/tests/test_job_cancellation.py
new file mode 100644
index 0000000000000..3f30f78808892
--- /dev/null
+++ b/python/pyspark/sql/tests/test_job_cancellation.py
@@ -0,0 +1,205 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+import threading
+import time
+
+from pyspark import InheritableThread, inheritable_thread_target
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+
+
+class JobCancellationTestsMixin:
+    def test_tags(self):
+        self.spark.clearTags()
+        self.spark.addTag("a")
+        self.assertEqual(self.spark.getTags(), {"a"})
+        self.spark.addTag("b")
+        self.spark.removeTag("a")
+        self.assertEqual(self.spark.getTags(), {"b"})
+        self.spark.addTag("c")
+        self.spark.clearTags()
+        self.assertEqual(self.spark.getTags(), set())
+        self.spark.clearTags()
+
+    def test_tags_multithread(self):
+        output1 = None
+        output2 = None
+
+        def tag1():
+            nonlocal output1
+
+            self.spark.addTag("tag1")
+            output1 = self.spark.getTags()
+
+        def tag2():
+            nonlocal output2
+
+            self.spark.addTag("tag2")
+            output2 = self.spark.getTags()
+
+        t1 = threading.Thread(target=tag1)
+        t1.start()
+        t1.join()
+        t2 = threading.Thread(target=tag2)
+        t2.start()
+        t2.join()
+
+        self.assertIsNotNone(output1)
+        self.assertEquals(output1, {"tag1"})
+        self.assertIsNotNone(output2)
+        self.assertEquals(output2, {"tag2"})
+
+    def check_job_cancellation(
+        self, setter, canceller, thread_ids, thread_ids_to_cancel, thread_ids_to_run
+    ):
+        job_id_a = "job_ids_to_cancel"
+        job_id_b = "job_ids_to_run"
+        threads = []
+
+        # A list which records whether job is cancelled.
+        # The index of the array is the thread index which job run in.
+        is_job_cancelled = [False for _ in thread_ids]
+
+        def run_job(job_id, index):
+            """
+            Executes a job with the group ``job_group``. Each job waits for 3 seconds
+            and then exits.
+            """
+            try:
+                setter(job_id)
+
+                def func(itr):
+                    for pdf in itr:
+                        time.sleep(pdf._1.iloc[0])
+                        yield pdf
+
+                self.spark.createDataFrame([[20]]).repartition(1).mapInPandas(
+                    func, schema="_1 LONG"
+                ).collect()
+                is_job_cancelled[index] = False
+            except Exception:
+                # Assume that exception means job cancellation.
+                is_job_cancelled[index] = True
+
+        # Test if job succeeded when not cancelled.
+        run_job(job_id_a, 0)
+        self.assertFalse(is_job_cancelled[0])
+        self.spark.clearTags()
+
+        # Run jobs
+        for i in thread_ids_to_cancel:
+            t = threading.Thread(target=run_job, args=(job_id_a, i))
+            t.start()
+            threads.append(t)
+
+        for i in thread_ids_to_run:
+            t = threading.Thread(target=run_job, args=(job_id_b, i))
+            t.start()
+            threads.append(t)
+
+        # Wait to make sure all jobs are executed.
+        time.sleep(10)
+        # And then, cancel one job group.
+        canceller(job_id_a)
+
+        # Wait until all threads launching jobs are finished.
+        for t in threads:
+            t.join()
+
+        for i in thread_ids_to_cancel:
+            self.assertTrue(
+                is_job_cancelled[i], "Thread {i}: Job in group A was not cancelled.".format(i=i)
+            )
+
+        for i in thread_ids_to_run:
+            self.assertFalse(
+                is_job_cancelled[i], "Thread {i}: Job in group B did not succeeded.".format(i=i)
+            )
+
+    def test_inheritable_tags(self):
+        self.check_inheritable_tags(
+            create_thread=lambda target, session: InheritableThread(target, session=session)
+        )
+        self.check_inheritable_tags(
+            create_thread=lambda target, session: threading.Thread(
+                target=inheritable_thread_target(session)(target)
+            )
+        )
+
+    def check_inheritable_tags(self, create_thread):
+        spark = self.spark
+        spark.addTag("a")
+        first = set()
+        second = set()
+
+        def get_inner_local_prop():
+            spark.addTag("c")
+            second.update(spark.getTags())
+
+        def get_outer_local_prop():
+            spark.addTag("b")
+            first.update(spark.getTags())
+            t2 = create_thread(target=get_inner_local_prop, session=spark)
+            t2.start()
+            t2.join()
+
+        t1 = create_thread(target=get_outer_local_prop, session=spark)
+        t1.start()
+        t1.join()
+
+        self.assertEqual(spark.getTags(), {"a"})
+        self.assertEqual(first, {"a", "b"})
+        self.assertEqual(second, {"a", "b", "c"})
+
+    def test_interrupt_tag(self):
+        thread_ids = range(4)
+        self.check_job_cancellation(
+            lambda job_group: self.spark.addTag(job_group),
+            lambda job_group: self.spark.interruptTag(job_group),
+            thread_ids,
+            [i for i in thread_ids if i % 2 == 0],
+            [i for i in thread_ids if i % 2 != 0],
+        )
+        self.spark.clearTags()
+
+    def test_interrupt_all(self):
+        thread_ids = range(4)
+        self.check_job_cancellation(
+            lambda job_group: None,
+            lambda job_group: self.spark.interruptAll(),
+            thread_ids,
+            thread_ids,
+            [],
+        )
+        self.spark.clearTags()
+
+
+class JobCancellationTests(JobCancellationTestsMixin, ReusedSQLTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_job_cancellation import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_python_datasource.py b/python/pyspark/sql/tests/test_python_datasource.py
index 140c7680b181b..a636b852a1e50 100644
--- a/python/pyspark/sql/tests/test_python_datasource.py
+++ b/python/pyspark/sql/tests/test_python_datasource.py
@@ -25,6 +25,7 @@
     DataSourceReader,
     InputPartition,
     DataSourceWriter,
+    DataSourceArrowWriter,
     WriterCommitMessage,
     CaseInsensitiveDict,
 )
@@ -277,7 +278,7 @@ def write(self, iterator):
                 from pyspark import TaskContext
 
                 context = TaskContext.get()
-                output_path = os.path.join(self.path, f"{context.partitionId}.json")
+                output_path = os.path.join(self.path, f"{context.partitionId()}.json")
                 count = 0
                 with open(output_path, "w") as file:
                     for row in iterator:
@@ -436,6 +437,37 @@ def partitions(self):
         ):
             self.spark.read.format("arrowbatch").schema("key int, dummy string").load().show()
 
+    def test_arrow_batch_sink(self):
+        class TestDataSource(DataSource):
+            @classmethod
+            def name(cls):
+                return "arrow_sink"
+
+            def writer(self, schema, overwrite):
+                return TestArrowWriter(self.options["path"])
+
+        class TestArrowWriter(DataSourceArrowWriter):
+            def __init__(self, path):
+                self.path = path
+
+            def write(self, iterator):
+                from pyspark import TaskContext
+
+                context = TaskContext.get()
+                output_path = os.path.join(self.path, f"{context.partitionId()}.json")
+                with open(output_path, "w") as file:
+                    for batch in iterator:
+                        df = batch.to_pandas()
+                        df.to_json(file, orient="records", lines=True)
+                return WriterCommitMessage()
+
+        self.spark.dataSource.register(TestDataSource)
+        df = self.spark.range(3)
+        with tempfile.TemporaryDirectory(prefix="test_arrow_batch_sink") as d:
+            df.write.format("arrow_sink").mode("append").save(d)
+            df2 = self.spark.read.format("json").load(d)
+            assertDataFrameEqual(df2, df)
+
     def test_data_source_type_mismatch(self):
         class TestDataSource(DataSource):
             @classmethod
diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py
index 2fca6b57decf9..683c925eefc23 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -23,6 +23,7 @@
 from pyspark.sql.functions import col, lit
 from pyspark.sql.readwriter import DataFrameWriterV2
 from pyspark.sql.types import StructType, StructField, StringType
+from pyspark.testing import assertDataFrameEqual
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 
 
@@ -34,15 +35,15 @@ def test_save_and_load(self):
         try:
             df.write.json(tmpPath)
             actual = self.spark.read.json(tmpPath)
-            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+            assertDataFrameEqual(df, actual)
 
             schema = StructType([StructField("value", StringType(), True)])
             actual = self.spark.read.json(tmpPath, schema)
-            self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect()))
+            assertDataFrameEqual(df.select("value"), actual)
 
             df.write.json(tmpPath, "overwrite")
             actual = self.spark.read.json(tmpPath)
-            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+            assertDataFrameEqual(df, actual)
 
             df.write.save(
                 format="json",
@@ -53,11 +54,11 @@ def test_save_and_load(self):
             actual = self.spark.read.load(
                 format="json", path=tmpPath, noUse="this options will not be used in load."
             )
-            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+            assertDataFrameEqual(df, actual)
 
             with self.sql_conf({"spark.sql.sources.default": "org.apache.spark.sql.json"}):
                 actual = self.spark.read.load(path=tmpPath)
-                self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+                assertDataFrameEqual(df, actual)
 
             csvpath = os.path.join(tempfile.mkdtemp(), "data")
             df.write.option("quote", None).format("csv").save(csvpath)
@@ -71,15 +72,15 @@ def test_save_and_load_builder(self):
         try:
             df.write.json(tmpPath)
             actual = self.spark.read.json(tmpPath)
-            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+            assertDataFrameEqual(df, actual)
 
             schema = StructType([StructField("value", StringType(), True)])
             actual = self.spark.read.json(tmpPath, schema)
-            self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect()))
+            assertDataFrameEqual(df.select("value"), actual)
 
             df.write.mode("overwrite").json(tmpPath)
             actual = self.spark.read.json(tmpPath)
-            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+            assertDataFrameEqual(df, actual)
 
             df.write.mode("overwrite").options(
                 noUse="this options will not be used in save."
@@ -89,11 +90,11 @@ def test_save_and_load_builder(self):
             actual = self.spark.read.format("json").load(
                 path=tmpPath, noUse="this options will not be used in load."
             )
-            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+            assertDataFrameEqual(df, actual)
 
             with self.sql_conf({"spark.sql.sources.default": "org.apache.spark.sql.json"}):
                 actual = self.spark.read.load(path=tmpPath)
-                self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+                assertDataFrameEqual(df, actual)
         finally:
             shutil.rmtree(tmpPath)
 
diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py
index de94b0d1882ff..c21247e3159c0 100644
--- a/python/pyspark/sql/tests/test_session.py
+++ b/python/pyspark/sql/tests/test_session.py
@@ -227,12 +227,6 @@ def test_unsupported_api(self):
                 (lambda: session.client, "client"),
                 (session.addArtifacts, "addArtifact(s)"),
                 (lambda: session.copyFromLocalToFs("", ""), "copyFromLocalToFs"),
-                (lambda: session.interruptTag(""), "interruptTag"),
-                (lambda: session.interruptOperation(""), "interruptOperation"),
-                (lambda: session.addTag(""), "addTag"),
-                (lambda: session.removeTag(""), "removeTag"),
-                (session.getTags, "getTags"),
-                (session.clearTags, "clearTags"),
             ]
 
             for func, name in unsupported:
diff --git a/python/pyspark/sql/tests/test_subquery.py b/python/pyspark/sql/tests/test_subquery.py
index f58ff6364aed7..7c63ddb69458e 100644
--- a/python/pyspark/sql/tests/test_subquery.py
+++ b/python/pyspark/sql/tests/test_subquery.py
@@ -47,18 +47,21 @@ def df2(self):
             ["c", "d"],
         )
 
-    def test_unanalyzable_expression(self):
-        sub = self.spark.range(1).where(sf.col("id") == sf.col("id").outer())
+    def test_noop_outer(self):
+        assertDataFrameEqual(
+            self.spark.range(1).select(sf.col("id").outer()),
+            self.spark.range(1).select(sf.col("id")),
+        )
 
         with self.assertRaises(AnalysisException) as pe:
-            sub.schema
+            self.spark.range(1).select(sf.col("outer_col").outer()).collect()
 
         self.check_error(
             exception=pe.exception,
-            errorClass="UNANALYZABLE_EXPRESSION",
-            messageParameters={"expr": '"outer(id)"'},
+            errorClass="UNRESOLVED_COLUMN.WITH_SUGGESTION",
+            messageParameters={"objectName": "`outer_col`", "proposal": "`id`"},
             query_context_type=QueryContextType.DataFrame,
-            fragment="outer",
+            fragment="col",
         )
 
     def test_simple_uncorrelated_scalar_subquery(self):
@@ -189,7 +192,7 @@ def test_scalar_subquery_against_local_relations(self):
                     "c1",
                     (
                         self.spark.table("t2")
-                        .where(sf.col("c2").outer() == sf.col("c2"))
+                        .where(sf.col("t1.c2").outer() == sf.col("t2.c2"))
                         .select(sf.max("c1"))
                         .scalar()
                     ),
@@ -205,45 +208,72 @@ def test_correlated_scalar_subquery(self):
             self.df2.createOrReplaceTempView("r")
 
             with self.subTest("in where"):
-                assertDataFrameEqual(
-                    self.spark.table("l").where(
-                        sf.col("b")
-                        < (
-                            self.spark.table("r")
-                            .where(sf.col("a").outer() == sf.col("c"))
-                            .select(sf.max("d"))
-                            .scalar()
+                for cond in [
+                    sf.col("a").outer() == sf.col("c"),
+                    (sf.col("a") == sf.col("c")).outer(),
+                    sf.expr("a = c").outer(),
+                ]:
+                    with self.subTest(cond=cond):
+                        assertDataFrameEqual(
+                            self.spark.table("l").where(
+                                sf.col("b")
+                                < self.spark.table("r").where(cond).select(sf.max("d")).scalar()
+                            ),
+                            self.spark.sql(
+                                """select * from l where b < (select max(d) from r where a = c)"""
+                            ),
                         )
-                    ),
-                    self.spark.sql(
-                        """select * from l where b < (select max(d) from r where a = c)"""
-                    ),
-                )
 
             with self.subTest("in select"):
+                df1 = self.spark.table("l").alias("t1")
+                df2 = self.spark.table("l").alias("t2")
+
+                for cond in [
+                    sf.col("t1.a") == sf.col("t2.a").outer(),
+                    (sf.col("t1.a") == sf.col("t2.a")).outer(),
+                    sf.expr("t1.a = t2.a").outer(),
+                ]:
+                    with self.subTest(cond=cond):
+                        assertDataFrameEqual(
+                            df1.select(
+                                "a",
+                                df2.where(cond).select(sf.sum("b")).scalar().alias("sum_b"),
+                            ),
+                            self.spark.sql(
+                                """
+                                select
+                                    a, (select sum(b) from l t2 where t2.a = t1.a) sum_b
+                                from l t1
+                                """
+                            ),
+                        )
+
+            with self.subTest("without .outer()"):
                 assertDataFrameEqual(
                     self.spark.table("l").select(
                         "a",
                         (
-                            self.spark.table("l")
-                            .where(sf.col("a") == sf.col("a").outer())
-                            .select(sf.sum("b"))
+                            self.spark.table("r")
+                            .where(sf.col("b") == sf.col("a").outer())
+                            .select(sf.sum("d"))
                             .scalar()
-                            .alias("sum_b")
+                            .alias("sum_d")
                         ),
                     ),
                     self.spark.sql(
-                        """select a, (select sum(b) from l l2 where l2.a = l1.a) sum_b from l l1"""
+                        """select a, (select sum(d) from r where b = l.a) sum_d from l"""
                     ),
                 )
 
             with self.subTest("in select (null safe)"):
+                df1 = self.spark.table("l").alias("t1")
+                df2 = self.spark.table("l").alias("t2")
+
                 assertDataFrameEqual(
-                    self.spark.table("l").select(
+                    df1.select(
                         "a",
                         (
-                            self.spark.table("l")
-                            .where(sf.col("a").eqNullSafe(sf.col("a").outer()))
+                            df2.where(sf.col("t2.a").eqNullSafe(sf.col("t1.a").outer()))
                             .select(sf.sum("b"))
                             .scalar()
                             .alias("sum_b")
@@ -278,15 +308,13 @@ def test_correlated_scalar_subquery(self):
                 )
 
             with self.subTest("non-aggregated"):
+                df1 = self.spark.table("l").alias("t1")
+                df2 = self.spark.table("l").alias("t2")
+
                 with self.assertRaises(SparkRuntimeException) as pe:
-                    self.spark.table("l").select(
+                    df1.select(
                         "a",
-                        (
-                            self.spark.table("l")
-                            .where(sf.col("a") == sf.col("a").outer())
-                            .select("b")
-                            .scalar()
-                        ),
+                        df2.where(sf.col("t1.a") == sf.col("t2.a").outer()).select("b").scalar(),
                     ).collect()
 
                 self.check_error(
@@ -296,19 +324,21 @@ def test_correlated_scalar_subquery(self):
                 )
 
             with self.subTest("non-equal"):
+                df1 = self.spark.table("l").alias("t1")
+                df2 = self.spark.table("l").alias("t2")
+
                 assertDataFrameEqual(
-                    self.spark.table("l").select(
+                    df1.select(
                         "a",
                         (
-                            self.spark.table("l")
-                            .where(sf.col("a") < sf.col("a").outer())
+                            df2.where(sf.col("t2.a") < sf.col("t1.a").outer())
                             .select(sf.sum("b"))
                             .scalar()
                             .alias("sum_b")
                         ),
                     ),
                     self.spark.sql(
-                        """select a, (select sum(b) from l l2 where l2.a < l1.a) sum_b from l l1"""
+                        """select a, (select sum(b) from l t2 where t2.a < t1.a) sum_b from l t1"""
                     ),
                 )
 
@@ -343,26 +373,30 @@ def test_exists_subquery(self):
             self.df2.createOrReplaceTempView("r")
 
             with self.subTest("EXISTS"):
-                assertDataFrameEqual(
-                    self.spark.table("l").where(
-                        self.spark.table("r").where(sf.col("a").outer() == sf.col("c")).exists()
-                    ),
-                    self.spark.sql(
-                        """select * from l where exists (select * from r where l.a = r.c)"""
-                    ),
-                )
+                for cond in [
+                    sf.col("a").outer() == sf.col("c"),
+                    (sf.col("a") == sf.col("c")).outer(),
+                    sf.expr("a = c").outer(),
+                ]:
+                    with self.subTest(cond=cond):
+                        assertDataFrameEqual(
+                            self.spark.table("l").where(self.spark.table("r").where(cond).exists()),
+                            self.spark.sql(
+                                """select * from l where exists (select * from r where l.a = r.c)"""
+                            ),
+                        )
 
-                assertDataFrameEqual(
-                    self.spark.table("l").where(
-                        self.spark.table("r").where(sf.col("a").outer() == sf.col("c")).exists()
-                        & (sf.col("a") <= sf.lit(2))
-                    ),
-                    self.spark.sql(
-                        """
+                        assertDataFrameEqual(
+                            self.spark.table("l").where(
+                                self.spark.table("r").where(cond).exists()
+                                & (sf.col("a") <= sf.lit(2))
+                            ),
+                            self.spark.sql(
+                                """
                         select * from l where exists (select * from r where l.a = r.c) and l.a <= 2
                         """
-                    ),
-                )
+                            ),
+                        )
 
             with self.subTest("NOT EXISTS"):
                 assertDataFrameEqual(
@@ -425,70 +459,537 @@ def test_exists_subquery(self):
                     ),
                 )
 
-    def test_scalar_subquery_with_outer_reference_errors(self):
+    def test_scalar_subquery_with_missing_outer_reference(self):
         with self.tempView("l", "r"):
             self.df1.createOrReplaceTempView("l")
             self.df2.createOrReplaceTempView("r")
 
-            with self.subTest("missing `outer()`"):
-                with self.assertRaises(AnalysisException) as pe:
-                    self.spark.table("l").select(
-                        "a",
-                        (
-                            self.spark.table("r")
-                            .where(sf.col("c") == sf.col("a"))
-                            .select(sf.sum("d"))
-                            .scalar()
-                        ),
-                    ).collect()
+            with self.assertRaises(AnalysisException) as pe:
+                self.spark.table("l").select(
+                    "a",
+                    (
+                        self.spark.table("r")
+                        .where(sf.col("c") == sf.col("a"))
+                        .select(sf.sum("d"))
+                        .scalar()
+                    ),
+                ).collect()
+
+            self.check_error(
+                exception=pe.exception,
+                errorClass="UNRESOLVED_COLUMN.WITH_SUGGESTION",
+                messageParameters={"objectName": "`a`", "proposal": "`c`, `d`"},
+                query_context_type=QueryContextType.DataFrame,
+                fragment="col",
+            )
 
-                self.check_error(
-                    exception=pe.exception,
-                    errorClass="UNRESOLVED_COLUMN.WITH_SUGGESTION",
-                    messageParameters={"objectName": "`a`", "proposal": "`c`, `d`"},
-                    query_context_type=QueryContextType.DataFrame,
-                    fragment="col",
-                )
+    def table1(self):
+        t1 = self.spark.sql("VALUES (0, 1), (1, 2) AS t1(c1, c2)")
+        t1.createOrReplaceTempView("t1")
+        return self.spark.table("t1")
 
-            with self.subTest("extra `outer()`"):
-                with self.assertRaises(AnalysisException) as pe:
-                    self.spark.table("l").select(
-                        "a",
-                        (
-                            self.spark.table("r")
-                            .where(sf.col("c").outer() == sf.col("a").outer())
-                            .select(sf.sum("d"))
-                            .scalar()
-                        ),
-                    ).collect()
+    def table2(self):
+        t2 = self.spark.sql("VALUES (0, 2), (0, 3) AS t2(c1, c2)")
+        t2.createOrReplaceTempView("t2")
+        return self.spark.table("t2")
 
-                self.check_error(
-                    exception=pe.exception,
-                    errorClass="UNRESOLVED_COLUMN.WITH_SUGGESTION",
-                    messageParameters={"objectName": "`c`", "proposal": "`a`, `b`"},
-                    query_context_type=QueryContextType.DataFrame,
-                    fragment="outer",
-                )
+    def table3(self):
+        t3 = self.spark.sql(
+            "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)) AS t3(c1, c2)"
+        )
+        t3.createOrReplaceTempView("t3")
+        return self.spark.table("t3")
 
-            with self.subTest("missing `outer()` for another outer"):
-                with self.assertRaises(AnalysisException) as pe:
-                    self.spark.table("l").select(
-                        "a",
-                        (
-                            self.spark.table("r")
-                            .where(sf.col("b") == sf.col("a").outer())
-                            .select(sf.sum("d"))
-                            .scalar()
-                        ),
-                    ).collect()
+    def test_lateral_join_with_single_column_select(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.table1()
+            t2 = self.table2()
 
-                self.check_error(
-                    exception=pe.exception,
-                    errorClass="UNRESOLVED_COLUMN.WITH_SUGGESTION",
-                    messageParameters={"objectName": "`b`", "proposal": "`c`, `d`"},
-                    query_context_type=QueryContextType.DataFrame,
-                    fragment="col",
+            assertDataFrameEqual(
+                t1.lateralJoin(self.spark.range(1).select(sf.col("c1").outer())),
+                self.spark.sql("""SELECT * FROM t1, LATERAL (SELECT c1)"""),
+            )
+            assertDataFrameEqual(
+                t1.lateralJoin(t2.select(sf.col("t1.c1").outer())),
+                self.spark.sql("""SELECT * FROM t1, LATERAL (SELECT t1.c1 FROM t2)"""),
+            )
+            assertDataFrameEqual(
+                t1.lateralJoin(t2.select(sf.col("t1.c1").outer() + sf.col("t2.c1"))),
+                self.spark.sql("""SELECT * FROM t1, LATERAL (SELECT t1.c1 + t2.c1 FROM t2)"""),
+            )
+
+    def test_lateral_join_with_star_expansion(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.table1()
+            t2 = self.table2()
+
+            assertDataFrameEqual(
+                t1.lateralJoin(self.spark.range(1).select().select(sf.col("*"))),
+                self.spark.sql("""SELECT * FROM t1, LATERAL (SELECT *)"""),
+            )
+            assertDataFrameEqual(
+                t1.lateralJoin(t2.select(sf.col("*"))),
+                self.spark.sql("""SELECT * FROM t1, LATERAL (SELECT * FROM t2)"""),
+            )
+            assertDataFrameEqual(
+                t1.lateralJoin(t2.select(sf.col("t1.*").outer(), sf.col("t2.*"))),
+                self.spark.sql("""SELECT * FROM t1, LATERAL (SELECT t1.*, t2.* FROM t2)"""),
+            )
+            assertDataFrameEqual(
+                t1.lateralJoin(t2.alias("t1").select(sf.col("t1.*"))),
+                self.spark.sql("""SELECT * FROM t1, LATERAL (SELECT t1.* FROM t2 AS t1)"""),
+            )
+
+    def test_lateral_join_with_different_join_types(self):
+        with self.tempView("t1"):
+            t1 = self.table1()
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.range(1).select(
+                        (sf.col("c1").outer() + sf.col("c2").outer()).alias("c3")
+                    ),
+                    sf.col("c2") == sf.col("c3"),
+                ),
+                self.spark.sql(
+                    """SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3"""
+                ),
+            )
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.range(1).select(
+                        (sf.col("c1").outer() + sf.col("c2").outer()).alias("c3")
+                    ),
+                    sf.col("c2") == sf.col("c3"),
+                    "left",
+                ),
+                self.spark.sql(
+                    """SELECT * FROM t1 LEFT JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3"""
+                ),
+            )
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.range(1).select(
+                        (sf.col("c1").outer() + sf.col("c2").outer()).alias("c3")
+                    ),
+                    how="cross",
+                ),
+                self.spark.sql("""SELECT * FROM t1 CROSS JOIN LATERAL (SELECT c1 + c2 AS c3)"""),
+            )
+
+            with self.assertRaises(AnalysisException) as pe:
+                t1.lateralJoin(
+                    self.spark.range(1).select(
+                        (sf.col("c1").outer() + sf.col("c2").outer()).alias("c3")
+                    ),
+                    how="right",
+                ).collect()
+
+            self.check_error(
+                pe.exception,
+                errorClass="UNSUPPORTED_JOIN_TYPE",
+                messageParameters={
+                    "typ": "right",
+                    "supported": "'inner', 'leftouter', 'left', 'left_outer', 'cross'",
+                },
+            )
+
+    def test_lateral_join_with_subquery_alias(self):
+        with self.tempView("t1"):
+            t1 = self.table1()
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.range(1)
+                    .select(sf.col("c1").outer(), sf.col("c2").outer())
+                    .toDF("a", "b")
+                    .alias("s")
+                ).select("a", "b"),
+                self.spark.sql("""SELECT a, b FROM t1, LATERAL (SELECT c1, c2) s(a, b)"""),
+            )
+
+    def test_lateral_join_with_correlated_predicates(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.table1()
+            t2 = self.table2()
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    t2.where(sf.col("t1.c1").outer() == sf.col("t2.c1")).select(sf.col("c2"))
+                ),
+                self.spark.sql(
+                    """SELECT * FROM t1, LATERAL (SELECT c2 FROM t2 WHERE t1.c1 = t2.c1)"""
+                ),
+            )
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    t2.where(sf.col("t1.c1").outer() < sf.col("t2.c1")).select(sf.col("c2"))
+                ),
+                self.spark.sql(
+                    """SELECT * FROM t1, LATERAL (SELECT c2 FROM t2 WHERE t1.c1 < t2.c1)"""
+                ),
+            )
+
+    def test_lateral_join_with_aggregation_and_correlated_predicates(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.table1()
+            t2 = self.table2()
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    t2.where(sf.col("t1.c2").outer() < sf.col("t2.c2")).select(
+                        sf.max(sf.col("c2")).alias("m")
+                    )
+                ),
+                self.spark.sql(
+                    """
+                    SELECT * FROM t1, LATERAL (SELECT max(c2) AS m FROM t2 WHERE t1.c2 < t2.c2)
+                    """
+                ),
+            )
+
+    def test_lateral_join_reference_preceding_from_clause_items(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.table1()
+            t2 = self.table2()
+
+            assertDataFrameEqual(
+                t1.join(t2).lateralJoin(
+                    self.spark.range(1).select(sf.col("t1.c2").outer() + sf.col("t2.c2").outer())
+                ),
+                self.spark.sql("""SELECT * FROM t1 JOIN t2 JOIN LATERAL (SELECT t1.c2 + t2.c2)"""),
+            )
+
+    def test_multiple_lateral_joins(self):
+        with self.tempView("t1"):
+            t1 = self.table1()
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.range(1).select(
+                        (sf.col("c1").outer() + sf.col("c2").outer()).alias("a")
+                    )
+                )
+                .lateralJoin(
+                    self.spark.range(1).select(
+                        (sf.col("c1").outer() - sf.col("c2").outer()).alias("b")
+                    )
                 )
+                .lateralJoin(
+                    self.spark.range(1).select(
+                        (sf.col("a").outer() * sf.col("b").outer()).alias("c")
+                    )
+                ),
+                self.spark.sql(
+                    """
+                    SELECT * FROM t1,
+                    LATERAL (SELECT c1 + c2 AS a),
+                    LATERAL (SELECT c1 - c2 AS b),
+                    LATERAL (SELECT a * b AS c)
+                    """
+                ),
+            )
+
+    def test_lateral_join_in_between_regular_joins(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.table1()
+            t2 = self.table2()
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    t2.where(sf.col("t1.c1").outer() == sf.col("t2.c1"))
+                    .select(sf.col("c2"))
+                    .alias("s"),
+                    how="left",
+                ).join(t1.alias("t3"), sf.col("s.c2") == sf.col("t3.c2"), how="left"),
+                self.spark.sql(
+                    """
+                    SELECT * FROM t1
+                    LEFT OUTER JOIN LATERAL (SELECT c2 FROM t2 WHERE t1.c1 = t2.c1) s
+                    LEFT OUTER JOIN t1 t3 ON s.c2 = t3.c2
+                    """
+                ),
+            )
+
+    def test_nested_lateral_joins(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.table1()
+            t2 = self.table2()
+
+            assertDataFrameEqual(
+                t1.lateralJoin(t2.lateralJoin(self.spark.range(1).select(sf.col("c1").outer()))),
+                self.spark.sql(
+                    """SELECT * FROM t1, LATERAL (SELECT * FROM t2, LATERAL (SELECT c1))"""
+                ),
+            )
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.range(1)
+                    .select((sf.col("c1").outer() + sf.lit(1)).alias("c1"))
+                    .lateralJoin(self.spark.range(1).select(sf.col("c1").outer()))
+                ),
+                self.spark.sql(
+                    """
+                    SELECT * FROM t1,
+                    LATERAL (SELECT * FROM (SELECT c1 + 1 AS c1), LATERAL (SELECT c1))
+                    """
+                ),
+            )
+
+    def test_scalar_subquery_inside_lateral_join(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.table1()
+            t2 = self.table2()
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.range(1).select(
+                        sf.col("c2").outer(), t2.select(sf.min(sf.col("c2"))).scalar()
+                    )
+                ),
+                self.spark.sql(
+                    """SELECT * FROM t1, LATERAL (SELECT c2, (SELECT MIN(c2) FROM t2))"""
+                ),
+            )
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.range(1)
+                    .select(sf.col("c1").outer().alias("a"))
+                    .select(
+                        t2.where(sf.col("c1") == sf.col("a").outer())
+                        .select(sf.sum(sf.col("c2")))
+                        .scalar()
+                    )
+                ),
+                self.spark.sql(
+                    """
+                    SELECT * FROM t1, LATERAL (
+                        SELECT (SELECT SUM(c2) FROM t2 WHERE c1 = a) FROM (SELECT c1 AS a)
+                    )
+                    """
+                ),
+            )
+
+    def test_lateral_join_inside_subquery(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.table1()
+            t2 = self.table2()
+
+            assertDataFrameEqual(
+                t1.where(
+                    sf.col("c1")
+                    == (
+                        t2.lateralJoin(self.spark.range(1).select(sf.col("c1").outer().alias("a")))
+                        .select(sf.min(sf.col("a")))
+                        .scalar()
+                    )
+                ),
+                self.spark.sql(
+                    """
+                    SELECT * FROM t1 WHERE c1 = (SELECT MIN(a) FROM t2, LATERAL (SELECT c1 AS a))
+                    """
+                ),
+            )
+            assertDataFrameEqual(
+                t1.where(
+                    sf.col("c1")
+                    == (
+                        t2.lateralJoin(self.spark.range(1).select(sf.col("c1").outer().alias("a")))
+                        .where(sf.col("c1") == sf.col("t1.c1").outer())
+                        .select(sf.min(sf.col("a")))
+                        .scalar()
+                    )
+                ),
+                self.spark.sql(
+                    """
+                    SELECT * FROM t1
+                    WHERE c1 = (SELECT MIN(a) FROM t2, LATERAL (SELECT c1 AS a) WHERE c1 = t1.c1)
+                    """
+                ),
+            )
+
+    def test_lateral_join_with_table_valued_functions(self):
+        with self.tempView("t1", "t3"):
+            t1 = self.table1()
+            t3 = self.table3()
+
+            assertDataFrameEqual(
+                t1.lateralJoin(self.spark.tvf.range(3)),
+                self.spark.sql("""SELECT * FROM t1, LATERAL RANGE(3)"""),
+            )
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.tvf.explode(sf.array(sf.col("c1").outer(), sf.col("c2").outer()))
+                ).toDF("c1", "c2", "c3"),
+                self.spark.sql("""SELECT * FROM t1, LATERAL EXPLODE(ARRAY(c1, c2)) t2(c3)"""),
+            )
+            assertDataFrameEqual(
+                t3.lateralJoin(self.spark.tvf.explode_outer(sf.col("c2").outer())).toDF(
+                    "c1", "c2", "v"
+                ),
+                self.spark.sql("""SELECT * FROM t3, LATERAL EXPLODE_OUTER(c2) t2(v)"""),
+            )
+            assertDataFrameEqual(
+                self.spark.tvf.explode(sf.array(sf.lit(1), sf.lit(2)))
+                .toDF("v")
+                .lateralJoin(self.spark.range(1).select((sf.col("v").outer() + 1).alias("v"))),
+                self.spark.sql(
+                    """SELECT * FROM EXPLODE(ARRAY(1, 2)) t(v), LATERAL (SELECT v + 1 AS v)"""
+                ),
+            )
+
+    def test_lateral_join_with_table_valued_functions_and_join_conditions(self):
+        with self.tempView("t1", "t3"):
+            t1 = self.table1()
+            t3 = self.table3()
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.tvf.explode(sf.array(sf.col("c1").outer(), sf.col("c2").outer())),
+                    sf.col("c1") == sf.col("col"),
+                ).toDF("c1", "c2", "c3"),
+                self.spark.sql(
+                    """SELECT * FROM t1 JOIN LATERAL EXPLODE(ARRAY(c1, c2)) t(c3) ON t1.c1 = c3"""
+                ),
+            )
+            assertDataFrameEqual(
+                t3.lateralJoin(
+                    self.spark.tvf.explode(sf.col("c2").outer()),
+                    sf.col("c1") == sf.col("col"),
+                ).toDF("c1", "c2", "c3"),
+                self.spark.sql("""SELECT * FROM t3 JOIN LATERAL EXPLODE(c2) t(c3) ON t3.c1 = c3"""),
+            )
+            assertDataFrameEqual(
+                t3.lateralJoin(
+                    self.spark.tvf.explode(sf.col("c2").outer()),
+                    sf.col("c1") == sf.col("col"),
+                    "left",
+                ).toDF("c1", "c2", "c3"),
+                self.spark.sql(
+                    """SELECT * FROM t3 LEFT JOIN LATERAL EXPLODE(c2) t(c3) ON t3.c1 = c3"""
+                ),
+            )
+
+    def test_subquery_with_generator_and_tvf(self):
+        with self.tempView("t1"):
+            t1 = self.table1()
+
+            assertDataFrameEqual(
+                self.spark.range(1).select(sf.explode(t1.select(sf.collect_list("c2")).scalar())),
+                self.spark.sql("""SELECT EXPLODE((SELECT COLLECT_LIST(c2) FROM t1))"""),
+            )
+            assertDataFrameEqual(
+                self.spark.tvf.explode(t1.select(sf.collect_list("c2")).scalar()),
+                self.spark.sql("""SELECT * FROM EXPLODE((SELECT COLLECT_LIST(c2) FROM t1))"""),
+            )
+
+    def test_subquery_in_join_condition(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.table1()
+            t2 = self.table2()
+
+            assertDataFrameEqual(
+                t1.join(t2, sf.col("t1.c1") == t1.select(sf.max("c1")).scalar()),
+                self.spark.sql("""SELECT * FROM t1 JOIN t2 ON t1.c1 = (SELECT MAX(c1) FROM t1)"""),
+            )
+
+    def test_subquery_in_unpivot(self):
+        self.check_subquery_in_unpivot(QueryContextType.DataFrame, "exists")
+
+    def check_subquery_in_unpivot(self, query_context_type, fragment):
+        with self.tempView("t1", "t2"):
+            t1 = self.table1()
+            t2 = self.table2()
+
+            with self.assertRaises(AnalysisException) as pe:
+                t1.unpivot("c1", t2.exists(), "c1", "c2").collect()
+
+            self.check_error(
+                exception=pe.exception,
+                errorClass=(
+                    "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_IN_EXISTS_SUBQUERY"
+                ),
+                messageParameters={"treeNode": "Expand.*"},
+                query_context_type=query_context_type,
+                fragment=fragment,
+                matchPVals=True,
+            )
+
+    def test_subquery_in_transpose(self):
+        with self.tempView("t1"):
+            t1 = self.table1()
+
+            with self.assertRaises(AnalysisException) as pe:
+                t1.transpose(t1.select(sf.max("c1")).scalar()).collect()
+
+            self.check_error(
+                exception=pe.exception,
+                errorClass="TRANSPOSE_INVALID_INDEX_COLUMN",
+                messageParameters={"reason": "Index column must be an atomic attribute"},
+            )
+
+    def test_subquery_in_with_columns(self):
+        with self.tempView("t1"):
+            t1 = self.table1()
+
+            assertDataFrameEqual(
+                t1.withColumn(
+                    "scalar",
+                    self.spark.range(1)
+                    .select(sf.col("c1").outer() + sf.col("c2").outer())
+                    .scalar(),
+                ),
+                t1.select("*", (sf.col("c1") + sf.col("c2")).alias("scalar")),
+            )
+            assertDataFrameEqual(
+                t1.withColumn(
+                    "scalar",
+                    self.spark.range(1)
+                    .withColumn("c1", sf.col("c1").outer())
+                    .select(sf.col("c1") + sf.col("c2").outer())
+                    .scalar(),
+                ),
+                t1.select("*", (sf.col("c1") + sf.col("c2")).alias("scalar")),
+            )
+            assertDataFrameEqual(
+                t1.withColumn(
+                    "scalar",
+                    self.spark.range(1)
+                    .select(sf.col("c1").outer().alias("c1"))
+                    .withColumn("c2", sf.col("c2").outer())
+                    .select(sf.col("c1") + sf.col("c2"))
+                    .scalar(),
+                ),
+                t1.select("*", (sf.col("c1") + sf.col("c2")).alias("scalar")),
+            )
+
+    def test_subquery_in_with_columns_renamed(self):
+        with self.tempView("t1"):
+            t1 = self.table1()
+
+            assertDataFrameEqual(
+                t1.withColumn(
+                    "scalar",
+                    self.spark.range(1)
+                    .select(sf.col("c1").outer().alias("c1"), sf.col("c2").outer().alias("c2"))
+                    .withColumnsRenamed({"c1": "x", "c2": "y"})
+                    .select(sf.col("x") + sf.col("y"))
+                    .scalar(),
+                ),
+                t1.select("*", (sf.col("c1").alias("x") + sf.col("c2").alias("y")).alias("scalar")),
+            )
+
+    def test_subquery_in_drop(self):
+        with self.tempView("t1"):
+            t1 = self.table1()
+
+            assertDataFrameEqual(t1.drop(self.spark.range(1).select(sf.lit("c1")).scalar()), t1)
+
+    def test_subquery_in_repartition(self):
+        with self.tempView("t1"):
+            t1 = self.table1()
+
+            assertDataFrameEqual(t1.repartition(self.spark.range(1).select(sf.lit(1)).scalar()), t1)
 
 
 class SubqueryTests(SubqueryTestsMixin, ReusedSQLTestCase):
diff --git a/python/pyspark/sql/tests/test_tvf.py b/python/pyspark/sql/tests/test_tvf.py
index 5c709437fc4db..c7274c0810cfb 100644
--- a/python/pyspark/sql/tests/test_tvf.py
+++ b/python/pyspark/sql/tests/test_tvf.py
@@ -52,6 +52,39 @@ def test_explode(self):
         expected = self.spark.sql("""SELECT * FROM explode(null :: map<string, int>)""")
         assertDataFrameEqual(actual=actual, expected=expected)
 
+    def test_explode_with_lateral_join(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.spark.sql("VALUES (0, 1), (1, 2) AS t1(c1, c2)")
+            t1.createOrReplaceTempView("t1")
+            t3 = self.spark.sql(
+                "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)) "
+                "AS t3(c1, c2)"
+            )
+            t3.createOrReplaceTempView("t3")
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.tvf.explode(sf.array(sf.col("c1").outer(), sf.col("c2").outer()))
+                    .toDF("c3")
+                    .alias("t2")
+                ),
+                self.spark.sql("""SELECT * FROM t1, LATERAL EXPLODE(ARRAY(c1, c2)) t2(c3)"""),
+            )
+            assertDataFrameEqual(
+                t3.lateralJoin(self.spark.tvf.explode(sf.col("c2").outer()).toDF("v").alias("t2")),
+                self.spark.sql("""SELECT * FROM t3, LATERAL EXPLODE(c2) t2(v)"""),
+            )
+            assertDataFrameEqual(
+                self.spark.tvf.explode(sf.array(sf.lit(1), sf.lit(2)))
+                .toDF("v")
+                .lateralJoin(
+                    self.spark.range(1).select((sf.col("v").outer() + sf.lit(1)).alias("v2"))
+                ),
+                self.spark.sql(
+                    """SELECT * FROM EXPLODE(ARRAY(1, 2)) t(v), LATERAL (SELECT v + 1 AS v2)"""
+                ),
+            )
+
     def test_explode_outer(self):
         actual = self.spark.tvf.explode_outer(sf.array(sf.lit(1), sf.lit(2)))
         expected = self.spark.sql("""SELECT * FROM explode_outer(array(1, 2))""")
@@ -81,6 +114,45 @@ def test_explode_outer(self):
         expected = self.spark.sql("""SELECT * FROM explode_outer(null :: map<string, int>)""")
         assertDataFrameEqual(actual=actual, expected=expected)
 
+    def test_explode_outer_with_lateral_join(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.spark.sql("VALUES (0, 1), (1, 2) AS t1(c1, c2)")
+            t1.createOrReplaceTempView("t1")
+            t3 = self.spark.sql(
+                "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)) "
+                "AS t3(c1, c2)"
+            )
+            t3.createOrReplaceTempView("t3")
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.tvf.explode_outer(
+                        sf.array(sf.col("c1").outer(), sf.col("c2").outer())
+                    )
+                    .toDF("c3")
+                    .alias("t2")
+                ),
+                self.spark.sql("""SELECT * FROM t1, LATERAL EXPLODE_OUTER(ARRAY(c1, c2)) t2(c3)"""),
+            )
+            assertDataFrameEqual(
+                t3.lateralJoin(
+                    self.spark.tvf.explode_outer(sf.col("c2").outer()).toDF("v").alias("t2")
+                ),
+                self.spark.sql("""SELECT * FROM t3, LATERAL EXPLODE_OUTER(c2) t2(v)"""),
+            )
+            assertDataFrameEqual(
+                self.spark.tvf.explode_outer(sf.array(sf.lit(1), sf.lit(2)))
+                .toDF("v")
+                .lateralJoin(
+                    self.spark.range(1).select((sf.col("v").outer() + sf.lit(1)).alias("v2"))
+                ),
+                self.spark.sql(
+                    """
+                    SELECT * FROM EXPLODE_OUTER(ARRAY(1, 2)) t(v), LATERAL (SELECT v + 1 AS v2)
+                    """
+                ),
+            )
+
     def test_inline(self):
         actual = self.spark.tvf.inline(
             sf.array(sf.struct(sf.lit(1), sf.lit("a")), sf.struct(sf.lit(2), sf.lit("b")))
@@ -107,6 +179,35 @@ def test_inline(self):
         )
         assertDataFrameEqual(actual=actual, expected=expected)
 
+    def test_inline_with_lateral_join(self):
+        with self.tempView("array_struct"):
+            array_struct = self.spark.sql(
+                """
+                VALUES
+                (1, ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b'))),
+                (2, ARRAY()),
+                (3, ARRAY(STRUCT(3, 'c'))) AS array_struct(id, arr)
+                """
+            )
+            array_struct.createOrReplaceTempView("array_struct")
+
+            assertDataFrameEqual(
+                array_struct.lateralJoin(self.spark.tvf.inline(sf.col("arr").outer())),
+                self.spark.sql("""SELECT * FROM array_struct JOIN LATERAL INLINE(arr)"""),
+            )
+            assertDataFrameEqual(
+                array_struct.lateralJoin(
+                    self.spark.tvf.inline(sf.col("arr").outer()).toDF("k", "v").alias("t"),
+                    sf.col("id") == sf.col("k"),
+                    "left",
+                ),
+                self.spark.sql(
+                    """
+                    SELECT * FROM array_struct LEFT JOIN LATERAL INLINE(arr) t(k, v) ON id = k
+                    """
+                ),
+            )
+
     def test_inline_outer(self):
         actual = self.spark.tvf.inline_outer(
             sf.array(sf.struct(sf.lit(1), sf.lit("a")), sf.struct(sf.lit(2), sf.lit("b")))
@@ -137,6 +238,35 @@ def test_inline_outer(self):
         )
         assertDataFrameEqual(actual=actual, expected=expected)
 
+    def test_inline_outer_with_lateral_join(self):
+        with self.tempView("array_struct"):
+            array_struct = self.spark.sql(
+                """
+                VALUES
+                (1, ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b'))),
+                (2, ARRAY()),
+                (3, ARRAY(STRUCT(3, 'c'))) AS array_struct(id, arr)
+                """
+            )
+            array_struct.createOrReplaceTempView("array_struct")
+
+            assertDataFrameEqual(
+                array_struct.lateralJoin(self.spark.tvf.inline_outer(sf.col("arr").outer())),
+                self.spark.sql("""SELECT * FROM array_struct JOIN LATERAL INLINE_OUTER(arr)"""),
+            )
+            assertDataFrameEqual(
+                array_struct.lateralJoin(
+                    self.spark.tvf.inline_outer(sf.col("arr").outer()).toDF("k", "v").alias("t"),
+                    sf.col("id") == sf.col("k"),
+                    "left",
+                ),
+                self.spark.sql(
+                    """
+                    SELECT * FROM array_struct LEFT JOIN LATERAL INLINE_OUTER(arr) t(k, v) ON id = k
+                    """
+                ),
+            )
+
     def test_json_tuple(self):
         actual = self.spark.tvf.json_tuple(sf.lit('{"a":1, "b":2}'), sf.lit("a"), sf.lit("b"))
         expected = self.spark.sql("""SELECT json_tuple('{"a":1, "b":2}', 'a', 'b')""")
@@ -151,6 +281,64 @@ def test_json_tuple(self):
             messageParameters={"item": "field"},
         )
 
+    def test_json_tuple_with_lateral_join(self):
+        with self.tempView("json_table"):
+            json_table = self.spark.sql(
+                """
+                VALUES
+                ('1', '{"f1": "1", "f2": "2", "f3": 3, "f5": 5.23}'),
+                ('2', '{"f1": "1", "f3": "3", "f2": 2, "f4": 4.01}'),
+                ('3', '{"f1": 3, "f4": "4", "f3": "3", "f2": 2, "f5": 5.01}'),
+                ('4', cast(null as string)),
+                ('5', '{"f1": null, "f5": ""}'),
+                ('6', '[invalid JSON string]') AS json_table(key, jstring)
+                """
+            )
+            json_table.createOrReplaceTempView("json_table")
+
+            assertDataFrameEqual(
+                json_table.alias("t1")
+                .lateralJoin(
+                    self.spark.tvf.json_tuple(
+                        sf.col("jstring").outer(),
+                        sf.lit("f1"),
+                        sf.lit("f2"),
+                        sf.lit("f3"),
+                        sf.lit("f4"),
+                        sf.lit("f5"),
+                    ).alias("t2")
+                )
+                .select("t1.key", "t2.*"),
+                self.spark.sql(
+                    """
+                    SELECT t1.key, t2.* FROM json_table t1,
+                    LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2
+                    """
+                ),
+            )
+            assertDataFrameEqual(
+                json_table.alias("t1")
+                .lateralJoin(
+                    self.spark.tvf.json_tuple(
+                        sf.col("jstring").outer(),
+                        sf.lit("f1"),
+                        sf.lit("f2"),
+                        sf.lit("f3"),
+                        sf.lit("f4"),
+                        sf.lit("f5"),
+                    ).alias("t2")
+                )
+                .where(sf.col("t2.c0").isNotNull())
+                .select("t1.key", "t2.*"),
+                self.spark.sql(
+                    """
+                    SELECT t1.key, t2.* FROM json_table t1,
+                    LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2
+                    WHERE t2.c0 IS NOT NULL
+                    """
+                ),
+            )
+
     def test_posexplode(self):
         actual = self.spark.tvf.posexplode(sf.array(sf.lit(1), sf.lit(2)))
         expected = self.spark.sql("""SELECT * FROM posexplode(array(1, 2))""")
@@ -180,6 +368,39 @@ def test_posexplode(self):
         expected = self.spark.sql("""SELECT * FROM posexplode(null :: map<string, int>)""")
         assertDataFrameEqual(actual=actual, expected=expected)
 
+    def test_posexplode_with_lateral_join(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.spark.sql("VALUES (0, 1), (1, 2) AS t1(c1, c2)")
+            t1.createOrReplaceTempView("t1")
+            t3 = self.spark.sql(
+                "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)) "
+                "AS t3(c1, c2)"
+            )
+            t3.createOrReplaceTempView("t3")
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.tvf.posexplode(sf.array(sf.col("c1").outer(), sf.col("c2").outer()))
+                ),
+                self.spark.sql("""SELECT * FROM t1, LATERAL POSEXPLODE(ARRAY(c1, c2))"""),
+            )
+            assertDataFrameEqual(
+                t3.lateralJoin(self.spark.tvf.posexplode(sf.col("c2").outer())),
+                self.spark.sql("""SELECT * FROM t3, LATERAL POSEXPLODE(c2)"""),
+            )
+            assertDataFrameEqual(
+                self.spark.tvf.posexplode(sf.array(sf.lit(1), sf.lit(2)))
+                .toDF("p", "v")
+                .lateralJoin(
+                    self.spark.range(1).select((sf.col("v").outer() + sf.lit(1)).alias("v2"))
+                ),
+                self.spark.sql(
+                    """
+                    SELECT * FROM POSEXPLODE(ARRAY(1, 2)) t(p, v), LATERAL (SELECT v + 1 AS v2)
+                    """
+                ),
+            )
+
     def test_posexplode_outer(self):
         actual = self.spark.tvf.posexplode_outer(sf.array(sf.lit(1), sf.lit(2)))
         expected = self.spark.sql("""SELECT * FROM posexplode_outer(array(1, 2))""")
@@ -209,11 +430,95 @@ def test_posexplode_outer(self):
         expected = self.spark.sql("""SELECT * FROM posexplode_outer(null :: map<string, int>)""")
         assertDataFrameEqual(actual=actual, expected=expected)
 
+    def test_posexplode_outer_with_lateral_join(self):
+        with self.tempView("t1", "t2"):
+            t1 = self.spark.sql("VALUES (0, 1), (1, 2) AS t1(c1, c2)")
+            t1.createOrReplaceTempView("t1")
+            t3 = self.spark.sql(
+                "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)) "
+                "AS t3(c1, c2)"
+            )
+            t3.createOrReplaceTempView("t3")
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.tvf.posexplode_outer(
+                        sf.array(sf.col("c1").outer(), sf.col("c2").outer())
+                    )
+                ),
+                self.spark.sql("""SELECT * FROM t1, LATERAL POSEXPLODE_OUTER(ARRAY(c1, c2))"""),
+            )
+            assertDataFrameEqual(
+                t3.lateralJoin(self.spark.tvf.posexplode_outer(sf.col("c2").outer())),
+                self.spark.sql("""SELECT * FROM t3, LATERAL POSEXPLODE_OUTER(c2)"""),
+            )
+            assertDataFrameEqual(
+                self.spark.tvf.posexplode_outer(sf.array(sf.lit(1), sf.lit(2)))
+                .toDF("p", "v")
+                .lateralJoin(
+                    self.spark.range(1).select((sf.col("v").outer() + sf.lit(1)).alias("v2"))
+                ),
+                self.spark.sql(
+                    """
+                    SELECT * FROM POSEXPLODE_OUTER(ARRAY(1, 2)) t(p, v),
+                        LATERAL (SELECT v + 1 AS v2)
+                    """
+                ),
+            )
+
     def test_stack(self):
         actual = self.spark.tvf.stack(sf.lit(2), sf.lit(1), sf.lit(2), sf.lit(3))
         expected = self.spark.sql("""SELECT * FROM stack(2, 1, 2, 3)""")
         assertDataFrameEqual(actual=actual, expected=expected)
 
+    def test_stack_with_lateral_join(self):
+        with self.tempView("t1", "t3"):
+            t1 = self.spark.sql("VALUES (0, 1), (1, 2) AS t1(c1, c2)")
+            t1.createOrReplaceTempView("t1")
+            t3 = self.spark.sql(
+                "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)) "
+                "AS t3(c1, c2)"
+            )
+            t3.createOrReplaceTempView("t3")
+
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.tvf.stack(
+                        sf.lit(2),
+                        sf.lit("Key"),
+                        sf.col("c1").outer(),
+                        sf.lit("Value"),
+                        sf.col("c2").outer(),
+                    ).alias("t")
+                ).select("t.*"),
+                self.spark.sql(
+                    """SELECT t.* FROM t1, LATERAL stack(2, 'Key', c1, 'Value', c2) t"""
+                ),
+            )
+            assertDataFrameEqual(
+                t1.lateralJoin(
+                    self.spark.tvf.stack(sf.lit(1), sf.col("c1").outer(), sf.col("c2").outer())
+                    .toDF("x", "y")
+                    .alias("t")
+                ).select("t.*"),
+                self.spark.sql("""SELECT t.* FROM t1 JOIN LATERAL stack(1, c1, c2) t(x, y)"""),
+            )
+            assertDataFrameEqual(
+                t1.join(t3, sf.col("t1.c1") == sf.col("t3.c1"))
+                .lateralJoin(
+                    self.spark.tvf.stack(
+                        sf.lit(1), sf.col("t1.c2").outer(), sf.col("t3.c2").outer()
+                    ).alias("t")
+                )
+                .select("t.*"),
+                self.spark.sql(
+                    """
+                    SELECT t.* FROM t1 JOIN t3 ON t1.c1 = t3.c1
+                        JOIN LATERAL stack(1, t1.c2, t3.c2) t
+                    """
+                ),
+            )
+
     def test_collations(self):
         actual = self.spark.tvf.collations()
         expected = self.spark.sql("""SELECT * FROM collations()""")
@@ -256,6 +561,31 @@ def test_variant_explode(self):
         expected = self.spark.sql("""SELECT * FROM variant_explode(parse_json('1'))""")
         assertDataFrameEqual(actual=actual, expected=expected)
 
+    def test_variant_explode_with_lateral_join(self):
+        with self.tempView("variant_table"):
+            variant_table = self.spark.sql(
+                """
+                SELECT id, parse_json(v) AS v FROM VALUES
+                    (0, '["hello", "world"]'), (1, '{"a": true, "b": 3.14}'),
+                    (2, '[]'), (3, '{}'),
+                    (4, NULL), (5, '1')
+                    AS t(id, v)
+                """
+            )
+            variant_table.createOrReplaceTempView("variant_table")
+
+            assertDataFrameEqual(
+                variant_table.alias("t1")
+                .lateralJoin(self.spark.tvf.variant_explode(sf.col("v").outer()).alias("t"))
+                .select("t1.id", "t.*"),
+                self.spark.sql(
+                    """
+                    SELECT t1.id, t.* FROM variant_table AS t1,
+                        LATERAL variant_explode(v) AS t
+                    """
+                ),
+            )
+
     def test_variant_explode_outer(self):
         actual = self.spark.tvf.variant_explode_outer(sf.parse_json(sf.lit('["hello", "world"]')))
         expected = self.spark.sql(
@@ -290,6 +620,31 @@ def test_variant_explode_outer(self):
         expected = self.spark.sql("""SELECT * FROM variant_explode_outer(parse_json('1'))""")
         assertDataFrameEqual(actual=actual, expected=expected)
 
+    def test_variant_explode_outer_with_lateral_join(self):
+        with self.tempView("variant_table"):
+            variant_table = self.spark.sql(
+                """
+                SELECT id, parse_json(v) AS v FROM VALUES
+                    (0, '["hello", "world"]'), (1, '{"a": true, "b": 3.14}'),
+                    (2, '[]'), (3, '{}'),
+                    (4, NULL), (5, '1')
+                    AS t(id, v)
+                """
+            )
+            variant_table.createOrReplaceTempView("variant_table")
+
+            assertDataFrameEqual(
+                variant_table.alias("t1")
+                .lateralJoin(self.spark.tvf.variant_explode_outer(sf.col("v").outer()).alias("t"))
+                .select("t1.id", "t.*"),
+                self.spark.sql(
+                    """
+                    SELECT t1.id, t.* FROM variant_table AS t1,
+                        LATERAL variant_explode_outer(v) AS t
+                    """
+                ),
+            )
+
 
 class TVFTests(TVFTestsMixin, ReusedSQLTestCase):
     pass
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index dcc383b7add5a..ab05502ad229d 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -2240,6 +2240,60 @@ def test_variant_type(self):
             PySparkValueError, lambda: str(VariantVal(bytes([32, 10, 1, 0, 0, 0]), metadata))
         )
 
+        # check parse_json
+        for key, json, obj in expected_values:
+            self.assertEqual(VariantVal.parseJson(json).toJson(), json)
+            self.assertEqual(VariantVal.parseJson(json).toPython(), obj)
+
+        # compare the parse_json in Spark vs python. `json_str` contains all of `expected_values`.
+        parse_json_spark_output = variants[0]
+        parse_json_python_output = VariantVal.parseJson(json_str)
+        self.assertEqual(parse_json_spark_output.value, parse_json_python_output.value)
+        self.assertEqual(parse_json_spark_output.metadata, parse_json_python_output.metadata)
+
+        # Test createDataFrame
+        create_df_variants = self.spark.createDataFrame(
+            [
+                (
+                    VariantVal.parseJson("2"),
+                    [VariantVal.parseJson("3")],
+                    {"v": VariantVal.parseJson("4")},
+                    {"v": VariantVal.parseJson("5")},
+                ),
+                (None, [None], {"v": None}, {"v": None}),
+                (None, None, None, None),
+            ],
+            "v variant, a array<variant>, s struct<v variant>, m map<string, variant>",
+        ).collect()
+        self.assertEqual(create_df_variants[0][0].toJson(), "2")
+        self.assertEqual(create_df_variants[0][1][0].toJson(), "3")
+        self.assertEqual(create_df_variants[0][2][0].toJson(), "4")
+        self.assertEqual(create_df_variants[0][3]["v"].toJson(), "5")
+        self.assertEqual(create_df_variants[1][0], None)
+        self.assertEqual(create_df_variants[1][1][0], None)
+        self.assertEqual(create_df_variants[1][2][0], None)
+        self.assertEqual(create_df_variants[1][3]["v"], None)
+        self.assertEqual(create_df_variants[2][0], None)
+        self.assertEqual(create_df_variants[2][1], None)
+        self.assertEqual(create_df_variants[2][2], None)
+        self.assertEqual(create_df_variants[2][3], None)
+
+    def test_to_ddl(self):
+        schema = StructType().add("a", NullType()).add("b", BooleanType()).add("c", BinaryType())
+        self.assertEqual(schema.toDDL(), "a VOID,b BOOLEAN,c BINARY")
+
+        schema = StructType().add("a", IntegerType()).add("b", StringType())
+        self.assertEqual(schema.toDDL(), "a INT,b STRING")
+
+        schema = StructType().add("a", FloatType()).add("b", LongType(), False)
+        self.assertEqual(schema.toDDL(), "a FLOAT,b BIGINT NOT NULL")
+
+        schema = StructType().add("a", ArrayType(DoubleType()), False).add("b", DateType())
+        self.assertEqual(schema.toDDL(), "a ARRAY<DOUBLE> NOT NULL,b DATE")
+
+        schema = StructType().add("a", TimestampType()).add("b", TimestampNTZType())
+        self.assertEqual(schema.toDDL(), "a TIMESTAMP,b TIMESTAMP_NTZ")
+
     def test_from_ddl(self):
         self.assertEqual(DataType.fromDDL("long"), LongType())
         self.assertEqual(
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index 78aa2546128a1..67d243cd29244 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -220,7 +220,7 @@ def test_udf_in_filter_on_top_of_outer_join(self):
         right = self.spark.createDataFrame([Row(a=1)])
         df = left.join(right, on="a", how="left_outer")
         df = df.withColumn("b", udf(lambda x: "x")(df.a))
-        self.assertEqual(df.filter('b = "x"').collect(), [Row(a=1, b="x")])
+        assertDataFrameEqual(df.filter('b = "x"'), [Row(a=1, b="x")])
 
     def test_udf_in_filter_on_top_of_join(self):
         # regression test for SPARK-18589
@@ -228,7 +228,7 @@ def test_udf_in_filter_on_top_of_join(self):
         right = self.spark.createDataFrame([Row(b=1)])
         f = udf(lambda a, b: a == b, BooleanType())
         df = left.crossJoin(right).filter(f("a", "b"))
-        self.assertEqual(df.collect(), [Row(a=1, b=1)])
+        assertDataFrameEqual(df, [Row(a=1, b=1)])
 
     def test_udf_in_join_condition(self):
         # regression test for SPARK-25314
@@ -243,7 +243,7 @@ def test_udf_in_join_condition(self):
                 df.collect()
         with self.sql_conf({"spark.sql.crossJoin.enabled": True}):
             df = left.join(right, f("a", "b"))
-            self.assertEqual(df.collect(), [Row(a=1, b=1)])
+            assertDataFrameEqual(df, [Row(a=1, b=1)])
 
     def test_udf_in_left_outer_join_condition(self):
         # regression test for SPARK-26147
@@ -256,7 +256,7 @@ def test_udf_in_left_outer_join_condition(self):
         # The Python UDF only refer to attributes from one side, so it's evaluable.
         df = left.join(right, f("a") == col("b").cast("string"), how="left_outer")
         with self.sql_conf({"spark.sql.crossJoin.enabled": True}):
-            self.assertEqual(df.collect(), [Row(a=1, b=1)])
+            assertDataFrameEqual(df, [Row(a=1, b=1)])
 
     def test_udf_and_common_filter_in_join_condition(self):
         # regression test for SPARK-25314
@@ -266,7 +266,7 @@ def test_udf_and_common_filter_in_join_condition(self):
         f = udf(lambda a, b: a == b, BooleanType())
         df = left.join(right, [f("a", "b"), left.a1 == right.b1])
         # do not need spark.sql.crossJoin.enabled=true for udf is not the only join condition.
-        self.assertEqual(df.collect(), [Row(a=1, a1=1, a2=1, b=1, b1=1, b2=1)])
+        assertDataFrameEqual(df, [Row(a=1, a1=1, a2=1, b=1, b1=1, b2=1)])
 
     def test_udf_not_supported_in_join_condition(self):
         # regression test for SPARK-25314
@@ -294,7 +294,7 @@ def test_udf_as_join_condition(self):
         f = udf(lambda a: a, IntegerType())
 
         df = left.join(right, [f("a") == f("b"), left.a1 == right.b1])
-        self.assertEqual(df.collect(), [Row(a=1, a1=1, a2=1, b=1, b1=1, b2=1)])
+        assertDataFrameEqual(df, [Row(a=1, a1=1, a2=1, b=1, b1=1, b2=1)])
 
     def test_udf_without_arguments(self):
         self.spark.catalog.registerFunction("foo", lambda: "bar")
@@ -331,7 +331,7 @@ def test_udf_with_filter_function(self):
 
         my_filter = udf(lambda a: a < 2, BooleanType())
         sel = df.select(col("key"), col("value")).filter((my_filter(col("key"))) & (df.value < "2"))
-        self.assertEqual(sel.collect(), [Row(key=1, value="1")])
+        assertDataFrameEqual(sel, [Row(key=1, value="1")])
 
     def test_udf_with_variant_input(self):
         df = self.spark.range(0, 10).selectExpr("parse_json(cast(id as string)) v")
@@ -461,7 +461,7 @@ def test_udf_with_aggregate_function(self):
 
         my_filter = udf(lambda a: a == 1, BooleanType())
         sel = df.select(col("key")).distinct().filter(my_filter(col("key")))
-        self.assertEqual(sel.collect(), [Row(key=1)])
+        assertDataFrameEqual(sel, [Row(key=1)])
 
         my_copy = udf(lambda x: x, IntegerType())
         my_add = udf(lambda a, b: int(a + b), IntegerType())
@@ -471,7 +471,7 @@ def test_udf_with_aggregate_function(self):
             .agg(sum(my_strlen(col("value"))).alias("s"))
             .select(my_add(col("k"), col("s")).alias("t"))
         )
-        self.assertEqual(sel.collect(), [Row(t=4), Row(t=3)])
+        assertDataFrameEqual(sel, [Row(t=4), Row(t=3)])
 
     def test_udf_in_generate(self):
         from pyspark.sql.functions import explode
@@ -505,7 +505,7 @@ def test_udf_with_order_by_and_limit(self):
         my_copy = udf(lambda x: x, IntegerType())
         df = self.spark.range(10).orderBy("id")
         res = df.select(df.id, my_copy(df.id).alias("copy")).limit(1)
-        self.assertEqual(res.collect(), [Row(id=0, copy=0)])
+        assertDataFrameEqual(res, [Row(id=0, copy=0)])
 
     def test_udf_registration_returns_udf(self):
         df = self.spark.range(10)
@@ -838,12 +838,12 @@ def test_datasource_with_udf(self):
             for df in [filesource_df, datasource_df, datasource_v2_df]:
                 result = df.withColumn("c", c1)
                 expected = df.withColumn("c", lit(2))
-                self.assertEqual(expected.collect(), result.collect())
+                assertDataFrameEqual(expected, result)
 
             for df in [filesource_df, datasource_df, datasource_v2_df]:
                 result = df.withColumn("c", c2)
                 expected = df.withColumn("c", col("i") + 1)
-                self.assertEqual(expected.collect(), result.collect())
+                assertDataFrameEqual(expected, result)
 
             for df in [filesource_df, datasource_df, datasource_v2_df]:
                 for f in [f1, f2]:
@@ -902,7 +902,7 @@ def test_udf_in_subquery(self):
             result = self.spark.sql(
                 "select i from values(0L) as data(i) where i in (select id from v)"
             )
-            self.assertEqual(result.collect(), [Row(i=0)])
+            assertDataFrameEqual(result, [Row(i=0)])
 
     def test_udf_globals_not_overwritten(self):
         @udf("string")
@@ -1229,6 +1229,33 @@ def setUpClass(cls):
         super(BaseUDFTestsMixin, cls).setUpClass()
         cls.spark.conf.set("spark.sql.execution.pythonUDF.arrow.enabled", "false")
 
+    # We cannot check whether the batch size is effective or not. We just run the query with
+    # various batch size and see whether the query runs successfully, and the output is
+    # consistent across different batch sizes.
+    def test_udf_with_various_batch_size(self):
+        self.spark.catalog.registerFunction("twoArgs", lambda x, y: len(x) + y, IntegerType())
+        for batch_size in [1, 33, 1000, 2000]:
+            with self.sql_conf({"spark.sql.execution.python.udf.maxRecordsPerBatch": batch_size}):
+                df = self.spark.range(1000).selectExpr("twoArgs('test', id) AS ret").orderBy("ret")
+                rets = [x["ret"] for x in df.collect()]
+                self.assertEqual(rets, list(range(4, 1004)))
+
+    # We cannot check whether the buffer size is effective or not. We just run the query with
+    # various buffer size and see whether the query runs successfully, and the output is
+    # consistent across different batch sizes.
+    def test_udf_with_various_buffer_size(self):
+        self.spark.catalog.registerFunction("twoArgs", lambda x, y: len(x) + y, IntegerType())
+        for batch_size in [1, 33, 10000]:
+            with self.sql_conf({"spark.sql.execution.python.udf.buffer.size": batch_size}):
+                df = (
+                    self.spark.range(1000)
+                    .repartition(1)
+                    .selectExpr("twoArgs('test', id) AS ret")
+                    .orderBy("ret")
+                )
+                rets = [x["ret"] for x in df.collect()]
+                self.assertEqual(rets, list(range(4, 1004)))
+
 
 class UDFInitializationTests(unittest.TestCase):
     def tearDown(self):
diff --git a/python/pyspark/sql/tests/test_udf_profiler.py b/python/pyspark/sql/tests/test_udf_profiler.py
index 7e752b2edca1f..7c741bce51f77 100644
--- a/python/pyspark/sql/tests/test_udf_profiler.py
+++ b/python/pyspark/sql/tests/test_udf_profiler.py
@@ -31,21 +31,15 @@
 from pyspark.sql.functions import col, pandas_udf, udf
 from pyspark.sql.window import Window
 from pyspark.profiler import UDFBasicProfiler
-from pyspark.testing.sqlutils import (
-    ReusedSQLTestCase,
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.testing.utils import (
     have_pandas,
     have_pyarrow,
+    have_flameprof,
     pandas_requirement_message,
     pyarrow_requirement_message,
 )
 
-try:
-    import flameprof  # noqa: F401
-
-    has_flameprof = True
-except ImportError:
-    has_flameprof = False
-
 
 def _do_computation(spark, *, action=lambda df: df.collect(), use_arrow=False):
     @udf("long", useArrow=use_arrow)
@@ -208,7 +202,7 @@ def test_perf_profiler_udf(self):
                 )
                 self.assertTrue(f"udf_{id}_perf.pstats" in os.listdir(d))
 
-                if has_flameprof:
+                if have_flameprof:
                     self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -230,7 +224,7 @@ def test_perf_profiler_udf_with_arrow(self):
                 io.getvalue(), f"10.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     def test_perf_profiler_udf_multiple_actions(self):
@@ -252,7 +246,7 @@ def action(df):
                 io.getvalue(), f"20.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     def test_perf_profiler_udf_registered(self):
@@ -276,7 +270,7 @@ def add1(x):
                 io.getvalue(), f"10.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -309,7 +303,7 @@ def add2(x):
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -345,7 +339,7 @@ def add2(iter: Iterator[pd.Series]) -> Iterator[pd.Series]:
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -395,7 +389,7 @@ def mean_udf(v: pd.Series) -> float:
                 io.getvalue(), f"5.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -427,7 +421,7 @@ def min_udf(v: pd.Series) -> float:
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -458,7 +452,7 @@ def normalize(pdf):
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -496,7 +490,7 @@ def asof_join(left, right):
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -530,7 +524,7 @@ def normalize(table):
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -562,7 +556,7 @@ def summarize(left, right):
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     def test_perf_profiler_render(self):
@@ -572,7 +566,7 @@ def test_perf_profiler_render(self):
 
         id = list(self.profile_results.keys())[0]
 
-        if has_flameprof:
+        if have_flameprof:
             self.assertIn("svg", self.spark.profile.render(id))
             self.assertIn("svg", self.spark.profile.render(id, type="perf"))
             self.assertIn("svg", self.spark.profile.render(id, renderer="flameprof"))
diff --git a/python/pyspark/sql/tests/test_udtf.py b/python/pyspark/sql/tests/test_udtf.py
index f3f993fc6a787..eca3ab0013b92 100644
--- a/python/pyspark/sql/tests/test_udtf.py
+++ b/python/pyspark/sql/tests/test_udtf.py
@@ -27,10 +27,12 @@
     PySparkTypeError,
     AnalysisException,
     PySparkPicklingError,
+    IllegalArgumentException,
 )
 from pyspark.util import PythonEvalType
 from pyspark.sql.functions import (
     array,
+    col,
     create_map,
     array,
     lit,
@@ -74,8 +76,7 @@ def eval(self):
                 yield "hello", "world"
 
         func = udtf(TestUDTF, returnType="c1: string, c2: string")
-        rows = func().collect()
-        self.assertEqual(rows, [Row(c1="hello", c2="world")])
+        assertDataFrameEqual(func(), [Row(c1="hello", c2="world")])
 
     def test_udtf_yield_single_row_col(self):
         class TestUDTF:
@@ -83,8 +84,7 @@ def eval(self, a: int):
                 yield a,
 
         func = udtf(TestUDTF, returnType="a: int")
-        rows = func(lit(1)).collect()
-        self.assertEqual(rows, [Row(a=1)])
+        assertDataFrameEqual(func(lit(1)), [Row(a=1)])
 
     def test_udtf_yield_multi_cols(self):
         class TestUDTF:
@@ -92,8 +92,7 @@ def eval(self, a: int):
                 yield a, a + 1
 
         func = udtf(TestUDTF, returnType="a: int, b: int")
-        rows = func(lit(1)).collect()
-        self.assertEqual(rows, [Row(a=1, b=2)])
+        assertDataFrameEqual(func(lit(1)), [Row(a=1, b=2)])
 
     def test_udtf_yield_multi_rows(self):
         class TestUDTF:
@@ -102,8 +101,7 @@ def eval(self, a: int):
                 yield a + 1,
 
         func = udtf(TestUDTF, returnType="a: int")
-        rows = func(lit(1)).collect()
-        self.assertEqual(rows, [Row(a=1), Row(a=2)])
+        assertDataFrameEqual(func(lit(1)), [Row(a=1), Row(a=2)])
 
     def test_udtf_yield_multi_row_col(self):
         class TestUDTF:
@@ -113,8 +111,8 @@ def eval(self, a: int, b: int):
                 yield a, b, b - a
 
         func = udtf(TestUDTF, returnType="a: int, b: int, c: int")
-        rows = func(lit(1), lit(2)).collect()
-        self.assertEqual(rows, [Row(a=1, b=2, c=3), Row(a=1, b=2, c=-1), Row(a=1, b=2, c=1)])
+        res = func(lit(1), lit(2))
+        assertDataFrameEqual(res, [Row(a=1, b=2, c=3), Row(a=1, b=2, c=-1), Row(a=1, b=2, c=1)])
 
     def test_udtf_decorator(self):
         @udtf(returnType="a: int, b: int")
@@ -122,8 +120,7 @@ class TestUDTF:
             def eval(self, a: int):
                 yield a, a + 1
 
-        rows = TestUDTF(lit(1)).collect()
-        self.assertEqual(rows, [Row(a=1, b=2)])
+        assertDataFrameEqual(TestUDTF(lit(1)), [Row(a=1, b=2)])
 
     def test_udtf_registration(self):
         class TestUDTF:
@@ -135,9 +132,7 @@ def eval(self, a: int, b: int):
         func = udtf(TestUDTF, returnType="a: int, b: int, c: int")
         self.spark.udtf.register("testUDTF", func)
         df = self.spark.sql("SELECT * FROM testUDTF(1, 2)")
-        self.assertEqual(
-            df.collect(), [Row(a=1, b=2, c=3), Row(a=1, b=2, c=-1), Row(a=1, b=2, c=1)]
-        )
+        assertDataFrameEqual(df, [Row(a=1, b=2, c=3), Row(a=1, b=2, c=-1), Row(a=1, b=2, c=1)])
 
     def test_udtf_with_lateral_join(self):
         class TestUDTF:
@@ -150,10 +145,33 @@ def eval(self, a: int, b: int) -> Iterator:
         df = self.spark.sql(
             "SELECT f.* FROM values (0, 1), (1, 2) t(a, b), LATERAL testUDTF(a, b) f"
         )
+        schema = StructType(
+            [
+                StructField("a", IntegerType(), True),
+                StructField("b", IntegerType(), True),
+                StructField("c", IntegerType(), True),
+            ]
+        )
         expected = self.spark.createDataFrame(
-            [(0, 1, 1), (0, 1, -1), (1, 2, 3), (1, 2, -1)], schema=["a", "b", "c"]
+            [(0, 1, 1), (0, 1, -1), (1, 2, 3), (1, 2, -1)], schema=schema
+        )
+        assertDataFrameEqual(df, expected)
+
+    def test_udtf_with_lateral_join_dataframe(self):
+        @udtf(returnType="a: int, b: int, c: int")
+        class TestUDTF:
+            def eval(self, a: int, b: int) -> Iterator:
+                yield a, b, a + b
+                yield a, b, a - b
+
+        self.spark.udtf.register("testUDTF", TestUDTF)
+
+        assertDataFrameEqual(
+            self.spark.sql("values (0, 1), (1, 2) t(a, b)").lateralJoin(
+                TestUDTF(col("a").outer(), col("b").outer())
+            ),
+            self.spark.sql("SELECT * FROM values (0, 1), (1, 2) t(a, b), LATERAL testUDTF(a, b)"),
         )
-        self.assertEqual(df.collect(), expected.collect())
 
     def test_udtf_eval_with_return_stmt(self):
         class TestUDTF:
@@ -161,8 +179,8 @@ def eval(self, a: int, b: int):
                 return [(a, a + 1), (b, b + 1)]
 
         func = udtf(TestUDTF, returnType="a: int, b: int")
-        rows = func(lit(1), lit(2)).collect()
-        self.assertEqual(rows, [Row(a=1, b=2), Row(a=2, b=3)])
+        res = func(lit(1), lit(2))
+        assertDataFrameEqual(res, [Row(a=1, b=2), Row(a=2, b=3)])
 
     def test_udtf_eval_returning_non_tuple(self):
         @udtf(returnType="a: int")
@@ -217,14 +235,14 @@ class TestUDTF:
             def eval(self, a: int):
                 ...
 
-        self.assertEqual(TestUDTF(lit(1)).collect(), [])
+        assertDataFrameEqual(TestUDTF(lit(1)), [])
 
         @udtf(returnType="a: int")
         class TestUDTF:
             def eval(self, a: int):
                 return
 
-        self.assertEqual(TestUDTF(lit(1)).collect(), [])
+        assertDataFrameEqual(TestUDTF(lit(1)), [])
 
     def test_udtf_with_conditional_return(self):
         class TestUDTF:
@@ -234,11 +252,25 @@ def eval(self, a: int):
 
         func = udtf(TestUDTF, returnType="a: int")
         self.spark.udtf.register("test_udtf", func)
-        self.assertEqual(
-            self.spark.sql("SELECT * FROM range(0, 8) JOIN LATERAL test_udtf(id)").collect(),
+        assertDataFrameEqual(
+            self.spark.sql("SELECT * FROM range(0, 8) JOIN LATERAL test_udtf(id)"),
             [Row(id=6, a=6), Row(id=7, a=7)],
         )
 
+    def test_udtf_with_conditional_return_dataframe(self):
+        @udtf(returnType="a: int")
+        class TestUDTF:
+            def eval(self, a: int):
+                if a > 5:
+                    yield a,
+
+        self.spark.udtf.register("test_udtf", TestUDTF)
+
+        assertDataFrameEqual(
+            self.spark.range(8).lateralJoin(TestUDTF(col("id").outer())),
+            self.spark.sql("SELECT * FROM range(0, 8) JOIN LATERAL test_udtf(id)"),
+        )
+
     def test_udtf_with_empty_yield(self):
         @udtf(returnType="a: int")
         class TestUDTF:
@@ -254,9 +286,9 @@ def eval(self, a: int):
                 yield a,
                 yield None,
 
-        self.assertEqual(TestUDTF(lit(1)).collect(), [Row(a=1), Row(a=None)])
+        assertDataFrameEqual(TestUDTF(lit(1)), [Row(a=1), Row(a=None)])
         df = self.spark.createDataFrame([(0, 1), (1, 2)], schema=["a", "b"])
-        self.assertEqual(TestUDTF(lit(1)).join(df, "a", "inner").collect(), [Row(a=1, b=2)])
+        assertDataFrameEqual(TestUDTF(lit(1)).join(df, "a", "inner"), [Row(a=1, b=2)])
         assertDataFrameEqual(
             TestUDTF(lit(1)).join(df, "a", "left"), [Row(a=None, b=None), Row(a=1, b=2)]
         )
@@ -267,10 +299,10 @@ class TestUDTF:
             def eval(self, a: int):
                 yield a,
 
-        self.assertEqual(TestUDTF(lit(None)).collect(), [Row(a=None)])
+        assertDataFrameEqual(TestUDTF(lit(None)), [Row(a=None)])
         self.spark.udtf.register("testUDTF", TestUDTF)
         df = self.spark.sql("SELECT * FROM testUDTF(null)")
-        self.assertEqual(df.collect(), [Row(a=None)])
+        assertDataFrameEqual(df, [Row(a=None)])
 
     # These are expected error message substrings to be used in test cases below.
     tooManyPositionalArguments = "too many positional arguments"
@@ -366,8 +398,8 @@ def __init__(self):
             def eval(self, a: int):
                 yield a, a + 1, self.key
 
-        rows = TestUDTF(lit(1)).collect()
-        self.assertEqual(rows, [Row(a=1, b=2, c="test")])
+        res = TestUDTF(lit(1))
+        assertDataFrameEqual(res, [Row(a=1, b=2, c="test")])
 
     def test_udtf_terminate(self):
         @udtf(returnType="key: string, value: float")
@@ -385,8 +417,8 @@ def terminate(self):
                 yield "count", float(self._count)
                 yield "avg", self._sum / self._count
 
-        self.assertEqual(
-            TestUDTF(lit(1)).collect(),
+        assertDataFrameEqual(
+            TestUDTF(lit(1)),
             [Row(key="input", value=1), Row(key="count", value=1.0), Row(key="avg", value=1.0)],
         )
 
@@ -395,8 +427,8 @@ def terminate(self):
             "SELECT id, key, value FROM range(0, 10, 1, 2), "
             "LATERAL test_udtf(id) WHERE key != 'input'"
         )
-        self.assertEqual(
-            df.collect(),
+        assertDataFrameEqual(
+            df,
             [
                 Row(id=4, key="count", value=5.0),
                 Row(id=4, key="avg", value=2.0),
@@ -608,10 +640,8 @@ def eval(self, person):
                 yield f"{person.name}: {person.age}",
 
         self.spark.udtf.register("test_udtf", TestUDTF)
-        self.assertEqual(
-            self.spark.sql(
-                "select * from test_udtf(named_struct('name', 'Alice', 'age', 1))"
-            ).collect(),
+        assertDataFrameEqual(
+            self.spark.sql("select * from test_udtf(named_struct('name', 'Alice', 'age', 1))"),
             [Row(x="Alice: 1")],
         )
 
@@ -634,8 +664,8 @@ def eval(self, m):
                 yield str(m),
 
         self.spark.udtf.register("test_udtf", TestUDTF)
-        self.assertEqual(
-            self.spark.sql("select * from test_udtf(map('key', 'value'))").collect(),
+        assertDataFrameEqual(
+            self.spark.sql("select * from test_udtf(map('key', 'value'))"),
             [Row(x="{'key': 'value'}")],
         )
 
@@ -645,7 +675,7 @@ class TestUDTF:
             def eval(self, x: int):
                 yield {"a": x, "b": x + 1},
 
-        self.assertEqual(TestUDTF(lit(1)).collect(), [Row(x=Row(a=1, b=2))])
+        assertDataFrameEqual(TestUDTF(lit(1)), [Row(x=Row(a=1, b=2))])
 
     def test_udtf_with_array_output_types(self):
         @udtf(returnType="x: array<int>")
@@ -653,7 +683,7 @@ class TestUDTF:
             def eval(self, x: int):
                 yield [x, x + 1, x + 2],
 
-        self.assertEqual(TestUDTF(lit(1)).collect(), [Row(x=[1, 2, 3])])
+        assertDataFrameEqual(TestUDTF(lit(1)), [Row(x=[1, 2, 3])])
 
     def test_udtf_with_map_output_types(self):
         @udtf(returnType="x: map<int,string>")
@@ -661,7 +691,7 @@ class TestUDTF:
             def eval(self, x: int):
                 yield {x: str(x)},
 
-        self.assertEqual(TestUDTF(lit(1)).collect(), [Row(x={1: "1"})])
+        assertDataFrameEqual(TestUDTF(lit(1)), [Row(x={1: "1"})])
 
     def test_udtf_with_empty_output_types(self):
         @udtf(returnType=StructType())
@@ -1019,18 +1049,133 @@ def test_udtf(a: int):
         )
 
     def test_udtf_with_table_argument_query(self):
+        func = self.udtf_for_table_argument()
+        self.spark.udtf.register("test_udtf", func)
+        assertDataFrameEqual(
+            self.spark.sql("SELECT * FROM test_udtf(TABLE (SELECT id FROM range(0, 8)))"),
+            [Row(a=6), Row(a=7)],
+        )
+
+    def test_df_asTable(self):
+        func = self.udtf_for_table_argument()
+        self.spark.udtf.register("test_udtf", func)
+        df = self.spark.range(8)
+        assertDataFrameEqual(
+            func(df.asTable()),
+            self.spark.sql("SELECT * FROM test_udtf(TABLE (SELECT id FROM range(0, 8)))"),
+        )
+
+    def udtf_for_table_argument(self):
         class TestUDTF:
             def eval(self, row: Row):
                 if row["id"] > 5:
                     yield row["id"],
 
         func = udtf(TestUDTF, returnType="a: int")
-        self.spark.udtf.register("test_udtf", func)
-        self.assertEqual(
-            self.spark.sql("SELECT * FROM test_udtf(TABLE (SELECT id FROM range(0, 8)))").collect(),
-            [Row(a=6), Row(a=7)],
+        return func
+
+    def test_df_asTable_chaining_methods(self):
+        class TestUDTF:
+            def eval(self, row: Row):
+                yield row["key"], row["value"]
+
+        func = udtf(TestUDTF, returnType="key: int, value: string")
+        df = self.spark.createDataFrame(
+            [(1, "a", 3), (1, "b", 3), (2, "c", 4), (2, "d", 4)], ["key", "value", "number"]
+        )
+        assertDataFrameEqual(
+            func(df.asTable().partitionBy("key").orderBy(df.value)),
+            [
+                Row(key=1, value="a"),
+                Row(key=1, value="b"),
+                Row(key=2, value="c"),
+                Row(key=2, value="d"),
+            ],
+            checkRowOrder=True,
+        )
+        assertDataFrameEqual(
+            func(df.asTable().partitionBy(["key", "number"]).orderBy(df.value)),
+            [
+                Row(key=1, value="a"),
+                Row(key=1, value="b"),
+                Row(key=2, value="c"),
+                Row(key=2, value="d"),
+            ],
+            checkRowOrder=True,
+        )
+        assertDataFrameEqual(
+            func(df.asTable().partitionBy("key").orderBy(df.value.desc())),
+            [
+                Row(key=1, value="b"),
+                Row(key=1, value="a"),
+                Row(key=2, value="d"),
+                Row(key=2, value="c"),
+            ],
+            checkRowOrder=True,
+        )
+        assertDataFrameEqual(
+            func(df.asTable().partitionBy("key").orderBy(["number", "value"])),
+            [
+                Row(key=1, value="a"),
+                Row(key=1, value="b"),
+                Row(key=2, value="c"),
+                Row(key=2, value="d"),
+            ],
+            checkRowOrder=True,
+        )
+        assertDataFrameEqual(
+            func(df.asTable().withSinglePartition()),
+            [
+                Row(key=1, value="a"),
+                Row(key=1, value="b"),
+                Row(key=2, value="c"),
+                Row(key=2, value="d"),
+            ],
+        )
+
+        assertDataFrameEqual(
+            func(df.asTable().withSinglePartition().orderBy("value")),
+            [
+                Row(key=1, value="a"),
+                Row(key=1, value="b"),
+                Row(key=2, value="c"),
+                Row(key=2, value="d"),
+            ],
         )
 
+        with self.assertRaisesRegex(
+            IllegalArgumentException,
+            r"Cannot call withSinglePartition\(\) after partitionBy\(\)"
+            r" or withSinglePartition\(\) has been called",
+        ):
+            df.asTable().partitionBy(df.key).withSinglePartition()
+
+        with self.assertRaisesRegex(
+            IllegalArgumentException,
+            r"Cannot call partitionBy\(\) after partitionBy\(\)"
+            r" or withSinglePartition\(\) has been called",
+        ):
+            df.asTable().withSinglePartition().partitionBy(df.key)
+
+        with self.assertRaisesRegex(
+            IllegalArgumentException,
+            r"Please call partitionBy\(\) or withSinglePartition\(\) before orderBy\(\)",
+        ):
+            df.asTable().orderBy(df.key)
+
+        with self.assertRaisesRegex(
+            IllegalArgumentException,
+            r"Please call partitionBy\(\) or withSinglePartition\(\) before orderBy\(\)",
+        ):
+            df.asTable().partitionBy().orderBy(df.key)
+
+        with self.assertRaisesRegex(
+            IllegalArgumentException,
+            r"Cannot call partitionBy\(\) after partitionBy\(\)"
+            r" or withSinglePartition\(\) has been called",
+        ):
+            df.asTable().partitionBy(df.key).partitionBy()
+
     def test_udtf_with_int_and_table_argument_query(self):
         class TestUDTF:
             def eval(self, i: int, row: Row):
@@ -1039,26 +1184,19 @@ def eval(self, i: int, row: Row):
 
         func = udtf(TestUDTF, returnType="a: int")
         self.spark.udtf.register("test_udtf", func)
-        self.assertEqual(
-            self.spark.sql(
-                "SELECT * FROM test_udtf(5, TABLE (SELECT id FROM range(0, 8)))"
-            ).collect(),
+        assertDataFrameEqual(
+            self.spark.sql("SELECT * FROM test_udtf(5, TABLE (SELECT id FROM range(0, 8)))"),
             [Row(a=6), Row(a=7)],
         )
 
     def test_udtf_with_table_argument_identifier(self):
-        class TestUDTF:
-            def eval(self, row: Row):
-                if row["id"] > 5:
-                    yield row["id"],
-
-        func = udtf(TestUDTF, returnType="a: int")
+        func = self.udtf_for_table_argument()
         self.spark.udtf.register("test_udtf", func)
 
         with self.tempView("v"):
             self.spark.sql("CREATE OR REPLACE TEMPORARY VIEW v as SELECT id FROM range(0, 8)")
-            self.assertEqual(
-                self.spark.sql("SELECT * FROM test_udtf(TABLE (v))").collect(),
+            assertDataFrameEqual(
+                self.spark.sql("SELECT * FROM test_udtf(TABLE (v))"),
                 [Row(a=6), Row(a=7)],
             )
 
@@ -1073,44 +1211,29 @@ def eval(self, i: int, row: Row):
 
         with self.tempView("v"):
             self.spark.sql("CREATE OR REPLACE TEMPORARY VIEW v as SELECT id FROM range(0, 8)")
-            self.assertEqual(
-                self.spark.sql("SELECT * FROM test_udtf(5, TABLE (v))").collect(),
+            assertDataFrameEqual(
+                self.spark.sql("SELECT * FROM test_udtf(5, TABLE (v))"),
                 [Row(a=6), Row(a=7)],
             )
 
     def test_udtf_with_table_argument_unknown_identifier(self):
-        class TestUDTF:
-            def eval(self, row: Row):
-                if row["id"] > 5:
-                    yield row["id"],
-
-        func = udtf(TestUDTF, returnType="a: int")
+        func = self.udtf_for_table_argument()
         self.spark.udtf.register("test_udtf", func)
 
         with self.assertRaisesRegex(AnalysisException, "TABLE_OR_VIEW_NOT_FOUND"):
             self.spark.sql("SELECT * FROM test_udtf(TABLE (v))").collect()
 
     def test_udtf_with_table_argument_malformed_query(self):
-        class TestUDTF:
-            def eval(self, row: Row):
-                if row["id"] > 5:
-                    yield row["id"],
-
-        func = udtf(TestUDTF, returnType="a: int")
+        func = self.udtf_for_table_argument()
         self.spark.udtf.register("test_udtf", func)
 
         with self.assertRaisesRegex(AnalysisException, "TABLE_OR_VIEW_NOT_FOUND"):
             self.spark.sql("SELECT * FROM test_udtf(TABLE (SELECT * FROM v))").collect()
 
     def test_udtf_with_table_argument_cte_inside(self):
-        class TestUDTF:
-            def eval(self, row: Row):
-                if row["id"] > 5:
-                    yield row["id"],
-
-        func = udtf(TestUDTF, returnType="a: int")
+        func = self.udtf_for_table_argument()
         self.spark.udtf.register("test_udtf", func)
-        self.assertEqual(
+        assertDataFrameEqual(
             self.spark.sql(
                 """
                 SELECT * FROM test_udtf(TABLE (
@@ -1120,19 +1243,14 @@ def eval(self, row: Row):
                   SELECT * FROM t
                 ))
                 """
-            ).collect(),
+            ),
             [Row(a=6), Row(a=7)],
         )
 
     def test_udtf_with_table_argument_cte_outside(self):
-        class TestUDTF:
-            def eval(self, row: Row):
-                if row["id"] > 5:
-                    yield row["id"],
-
-        func = udtf(TestUDTF, returnType="a: int")
+        func = self.udtf_for_table_argument()
         self.spark.udtf.register("test_udtf", func)
-        self.assertEqual(
+        assertDataFrameEqual(
             self.spark.sql(
                 """
                 WITH t AS (
@@ -1140,11 +1258,11 @@ def eval(self, row: Row):
                 )
                 SELECT * FROM test_udtf(TABLE (SELECT id FROM t))
                 """
-            ).collect(),
+            ),
             [Row(a=6), Row(a=7)],
         )
 
-        self.assertEqual(
+        assertDataFrameEqual(
             self.spark.sql(
                 """
                 WITH t AS (
@@ -1152,28 +1270,23 @@ def eval(self, row: Row):
                 )
                 SELECT * FROM test_udtf(TABLE (t))
                 """
-            ).collect(),
+            ),
             [Row(a=6), Row(a=7)],
         )
 
     # TODO(SPARK-44233): Fix the subquery resolution.
     @unittest.skip("Fails to resolve the subquery.")
     def test_udtf_with_table_argument_lateral_join(self):
-        class TestUDTF:
-            def eval(self, row: Row):
-                if row["id"] > 5:
-                    yield row["id"],
-
-        func = udtf(TestUDTF, returnType="a: int")
+        func = self.udtf_for_table_argument()
         self.spark.udtf.register("test_udtf", func)
-        self.assertEqual(
+        assertDataFrameEqual(
             self.spark.sql(
                 """
                 SELECT * FROM
                   range(0, 8) AS t,
                   LATERAL test_udtf(TABLE (t))
                 """
-            ).collect(),
+            ),
             [Row(a=6), Row(a=7)],
         )
 
@@ -1198,8 +1311,8 @@ def eval(self, a: Row, b: Row):
                 self.spark.sql(query).collect()
 
         with self.sql_conf({"spark.sql.tvf.allowMultipleTableArguments.enabled": True}):
-            self.assertEqual(
-                self.spark.sql(query).collect(),
+            assertDataFrameEqual(
+                self.spark.sql(query),
                 [
                     Row(a=0, b=0),
                     Row(a=1, b=0),
@@ -1375,7 +1488,7 @@ def eval(self, a, b):
                 assertSchemaEqual(df.schema, expected_schema)
                 assertDataFrameEqual(df, expected_results)
 
-    def test_udtf_with_analyze_arbitary_number_arguments(self):
+    def test_udtf_with_analyze_arbitrary_number_arguments(self):
         class TestUDTF:
             @staticmethod
             def analyze(*args: AnalyzeArgument) -> AnalyzeResult:
@@ -2197,6 +2310,17 @@ def terminate(self):
                     ],
                 )
 
+    def test_udtf_with_table_argument_and_partition_by_no_terminate(self):
+        func = self.udtf_for_table_argument()  # a udtf with no terminate method defined
+        self.spark.udtf.register("test_udtf", func)
+
+        assertDataFrameEqual(
+            self.spark.sql(
+                "SELECT * FROM test_udtf(TABLE (SELECT id FROM range(0, 8)) PARTITION BY id)"
+            ),
+            [Row(a=6), Row(a=7)],
+        )
+
     def test_udtf_with_table_argument_and_partition_by_and_order_by(self):
         class TestUDTF:
             def __init__(self):
@@ -2539,8 +2663,10 @@ def eval(self, v):
                     yield i, v.toJson()
 
         self.spark.udtf.register("test_udtf", TestUDTF)
-        rows = self.spark.sql('select i, s from test_udtf(parse_json(\'{"a":"b"}\'))').collect()
-        self.assertEqual(rows, [Row(i=n, s='{"a":"b"}') for n in range(10)])
+        assertDataFrameEqual(
+            self.spark.sql('select i, s from test_udtf(parse_json(\'{"a":"b"}\'))'),
+            [Row(i=n, s='{"a":"b"}') for n in range(10)],
+        )
 
     def test_udtf_with_nested_variant_input(self):
         # struct<variant>
@@ -2551,10 +2677,10 @@ def eval(self, v):
                     yield i, v["v"].toJson()
 
         self.spark.udtf.register("test_udtf_struct", TestUDTFStruct)
-        rows = self.spark.sql(
+        res = self.spark.sql(
             "select i, s from test_udtf_struct(named_struct('v', parse_json('{\"a\":\"c\"}')))"
-        ).collect()
-        self.assertEqual(rows, [Row(i=n, s='{"a":"c"}') for n in range(10)])
+        )
+        assertDataFrameEqual(res, [Row(i=n, s='{"a":"c"}') for n in range(10)])
 
         # array<variant>
         @udtf(returnType="i int, s: string")
@@ -2564,10 +2690,8 @@ def eval(self, v):
                     yield i, v[0].toJson()
 
         self.spark.udtf.register("test_udtf_array", TestUDTFArray)
-        rows = self.spark.sql(
-            'select i, s from test_udtf_array(array(parse_json(\'{"a":"d"}\')))'
-        ).collect()
-        self.assertEqual(rows, [Row(i=n, s='{"a":"d"}') for n in range(10)])
+        res = self.spark.sql('select i, s from test_udtf_array(array(parse_json(\'{"a":"d"}\')))')
+        assertDataFrameEqual(res, [Row(i=n, s='{"a":"d"}') for n in range(10)])
 
         # map<string, variant>
         @udtf(returnType="i int, s: string")
@@ -2577,10 +2701,10 @@ def eval(self, v):
                     yield i, v["v"].toJson()
 
         self.spark.udtf.register("test_udtf_map", TestUDTFMap)
-        rows = self.spark.sql(
+        res = self.spark.sql(
             "select i, s from test_udtf_map(map('v', parse_json('{\"a\":\"e\"}')))"
-        ).collect()
-        self.assertEqual(rows, [Row(i=n, s='{"a":"e"}') for n in range(10)])
+        )
+        assertDataFrameEqual(res, [Row(i=n, s='{"a":"e"}') for n in range(10)])
 
     def test_udtf_with_variant_output(self):
         @udtf(returnType="i int, v: variant")
@@ -2591,8 +2715,8 @@ def eval(self, n):
                     yield i, VariantVal(bytes([2, 1, 0, 0, 2, 5, 97 + i]), bytes([1, 1, 0, 1, 97]))
 
         self.spark.udtf.register("test_udtf", TestUDTF)
-        rows = self.spark.sql("select i, to_json(v) from test_udtf(8)").collect()
-        self.assertEqual(rows, [Row(i=n, s=f'{{"a":"{chr(97 + n)}"}}') for n in range(8)])
+        res = self.spark.sql("select i, to_json(v) from test_udtf(8)")
+        assertDataFrameEqual(res, [Row(i=n, s=f'{{"a":"{chr(97 + n)}"}}') for n in range(8)])
 
     def test_udtf_with_nested_variant_output(self):
         # struct<variant>
@@ -2606,8 +2730,8 @@ def eval(self, n):
                     }
 
         self.spark.udtf.register("test_udtf_struct", TestUDTFStruct)
-        rows = self.spark.sql("select i, to_json(v.v1) from test_udtf_struct(8)").collect()
-        self.assertEqual(rows, [Row(i=n, s=f'{{"a":"{chr(97 + n)}"}}') for n in range(8)])
+        res = self.spark.sql("select i, to_json(v.v1) from test_udtf_struct(8)")
+        assertDataFrameEqual(res, [Row(i=n, s=f'{{"a":"{chr(97 + n)}"}}') for n in range(8)])
 
         # array<variant>
         @udtf(returnType="i int, v: array<variant>")
@@ -2620,8 +2744,8 @@ def eval(self, n):
                     ]
 
         self.spark.udtf.register("test_udtf_array", TestUDTFArray)
-        rows = self.spark.sql("select i, to_json(v[0]) from test_udtf_array(8)").collect()
-        self.assertEqual(rows, [Row(i=n, s=f'{{"a":"{chr(98 + n)}"}}') for n in range(8)])
+        res = self.spark.sql("select i, to_json(v[0]) from test_udtf_array(8)")
+        assertDataFrameEqual(res, [Row(i=n, s=f'{{"a":"{chr(98 + n)}"}}') for n in range(8)])
 
         # map<string, variant>
         @udtf(returnType="i int, v: map<string, variant>")
@@ -2634,8 +2758,8 @@ def eval(self, n):
                     }
 
         self.spark.udtf.register("test_udtf_struct", TestUDTFStruct)
-        rows = self.spark.sql("select i, to_json(v['v1']) from test_udtf_struct(8)").collect()
-        self.assertEqual(rows, [Row(i=n, s=f'{{"a":"{chr(99 + n)}"}}') for n in range(8)])
+        res = self.spark.sql("select i, to_json(v['v1']) from test_udtf_struct(8)")
+        assertDataFrameEqual(res, [Row(i=n, s=f'{{"a":"{chr(99 + n)}"}}') for n in range(8)])
 
 
 class UDTFTests(BaseUDTFTestsMixin, ReusedSQLTestCase):
diff --git a/python/pyspark/sql/tvf_argument.py b/python/pyspark/sql/tvf_argument.py
new file mode 100644
index 0000000000000..cb373d35d9ec2
--- /dev/null
+++ b/python/pyspark/sql/tvf_argument.py
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+class TableValuedFunctionArgument:
+    """
+    Base class for arguments passed to Table Valued Functions.
+
+    This class is intentionally left empty and serves as a marker to achieve
+    parity with the Scala `TableValuedFunctionArgument` trait.
+    """
+
+    pass
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 03227c8c8760f..7c3b97f951d67 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -49,7 +49,6 @@
 from pyspark.util import is_remote_only, JVM_INT_MAX
 from pyspark.serializers import CloudPickleSerializer
 from pyspark.sql.utils import (
-    has_numpy,
     get_active_spark_context,
     escape_meta_characters,
     StringConcat,
@@ -65,9 +64,6 @@
     PySparkKeyError,
 )
 
-if has_numpy:
-    import numpy as np
-
 if TYPE_CHECKING:
     import numpy as np
     from py4j.java_gateway import GatewayClient, JavaGateway, JavaClass
@@ -1555,6 +1551,18 @@ def treeString(self, maxDepth: int = JVM_INT_MAX) -> str:
             field._build_formatted_string(prefix, stringConcat, depth)
         return stringConcat.toString()
 
+    def toDDL(self) -> str:
+        from pyspark.sql.utils import is_remote
+
+        if is_remote():
+            from pyspark.sql.connect.session import SparkSession
+
+            session = SparkSession.getActiveSession()
+            assert session is not None
+            return session._to_ddl(self)
+        else:
+            return get_active_spark_context()._to_ddl(self)
+
 
 class VariantType(AtomicType):
     """
@@ -1572,6 +1580,8 @@ def fromInternal(self, obj: Dict) -> Optional["VariantVal"]:
         return VariantVal(obj["value"], obj["metadata"])
 
     def toInternal(self, variant: Any) -> Any:
+        if variant is None:
+            return None
         assert isinstance(variant, VariantVal)
         return {"value": variant.value, "metadata": variant.metadata}
 
@@ -1758,6 +1768,15 @@ def toJson(self, zone_id: str = "UTC") -> str:
         """
         return VariantUtils.to_json(self.value, self.metadata, zone_id)
 
+    @classmethod
+    def parseJson(cls, json_str: str) -> "VariantVal":
+        """
+        Convert the VariantVal to a nested Python object of Python data types.
+        :return: Python representation of the Variant nested structure
+        """
+        (value, metadata) = VariantUtils.parse_json(json_str)
+        return VariantVal(value, metadata)
+
 
 _atomic_types: List[Type[DataType]] = [
     StringType,
@@ -1888,43 +1907,9 @@ def _parse_datatype_string(s: str) -> DataType:
     if is_remote():
         from pyspark.sql.connect.session import SparkSession
 
-        return cast(
-            DataType,
-            SparkSession.active()._client._analyze(method="ddl_parse", ddl_string=s).parsed,
-        )
-
+        return SparkSession.active()._parse_ddl(s)
     else:
-        from py4j.java_gateway import JVMView
-
-        sc = get_active_spark_context()
-
-        def from_ddl_schema(type_str: str) -> DataType:
-            return _parse_datatype_json_string(
-                cast(JVMView, sc._jvm)
-                .org.apache.spark.sql.types.StructType.fromDDL(type_str)
-                .json()
-            )
-
-        def from_ddl_datatype(type_str: str) -> DataType:
-            return _parse_datatype_json_string(
-                cast(JVMView, sc._jvm)
-                .org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str)
-                .json()
-            )
-
-        try:
-            # DDL format, "fieldname datatype, fieldname datatype".
-            return from_ddl_schema(s)
-        except Exception as e:
-            try:
-                # For backwards compatibility, "integer", "struct<fieldname: datatype>" and etc.
-                return from_ddl_datatype(s)
-            except BaseException:
-                try:
-                    # For backwards compatibility, "fieldname: datatype, fieldname: datatype" case.
-                    return from_ddl_datatype("struct<%s>" % s.strip())
-                except BaseException:
-                    raise e
+        return get_active_spark_context()._parse_ddl(s)
 
 
 def _parse_datatype_json_string(json_string: str) -> DataType:
@@ -3250,7 +3235,13 @@ def convert(self, obj: datetime.timedelta, gateway_client: "GatewayClient") -> "
 
 class NumpyScalarConverter:
     def can_convert(self, obj: Any) -> bool:
-        return has_numpy and isinstance(obj, np.generic)
+        from pyspark.testing.utils import have_numpy
+
+        if have_numpy:
+            import numpy as np
+
+            return isinstance(obj, np.generic)
+        return False
 
     def convert(self, obj: "np.generic", gateway_client: "GatewayClient") -> Any:
         return obj.item()
@@ -3261,6 +3252,8 @@ def _from_numpy_type_to_java_type(
         self, nt: "np.dtype", gateway: "JavaGateway"
     ) -> Optional["JavaClass"]:
         """Convert NumPy type to Py4J Java type."""
+        import numpy as np
+
         if nt in [np.dtype("int8"), np.dtype("int16")]:
             # Mapping int8 to gateway.jvm.byte causes
             #   TypeError: 'bytes' object does not support item assignment
@@ -3281,7 +3274,13 @@ def _from_numpy_type_to_java_type(
         return None
 
     def can_convert(self, obj: Any) -> bool:
-        return has_numpy and isinstance(obj, np.ndarray) and obj.ndim == 1
+        from pyspark.testing.utils import have_numpy
+
+        if have_numpy:
+            import numpy as np
+
+            return isinstance(obj, np.ndarray) and obj.ndim == 1
+        return False
 
     def convert(self, obj: "np.ndarray", gateway_client: "GatewayClient") -> "JavaGateway":
         from pyspark import SparkContext
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index 9cf93938528f8..cf093bd936437 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -391,7 +391,7 @@ def _create_judf(self, func: Callable[..., Any]) -> "JavaObject":
         wrapped_func = _wrap_function(sc, func, self.returnType)
         jdt = spark._jsparkSession.parseDataType(self.returnType.json())
         assert sc._jvm is not None
-        judf = sc._jvm.org.apache.spark.sql.execution.python.UserDefinedPythonFunction(
+        judf = getattr(sc._jvm, "org.apache.spark.sql.execution.python.UserDefinedPythonFunction")(
             self._name, wrapped_func, jdt, self.evalType, self.deterministic
         )
         return judf
diff --git a/python/pyspark/sql/udtf.py b/python/pyspark/sql/udtf.py
index 5ce3e2dfd2a9e..3ea32349d81d2 100644
--- a/python/pyspark/sql/udtf.py
+++ b/python/pyspark/sql/udtf.py
@@ -32,7 +32,7 @@
 
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaObject
-    from pyspark.sql._typing import ColumnOrName
+    from pyspark.sql._typing import TVFArgumentOrName
     from pyspark.sql.dataframe import DataFrame
     from pyspark.sql.session import SparkSession
 
@@ -148,7 +148,7 @@ class AnalyzeResult:
         The schema that the Python UDTF will return.
     withSinglePartition: bool
         If true, the UDTF is specifying for Catalyst to repartition all rows of the input TABLE
-        argument to one collection for consumption by exactly one instance of the correpsonding
+        argument to one collection for consumption by exactly one instance of the corresponding
         UDTF class.
     partitionBy: sequence of :class:`PartitioningColumn`
         If non-empty, this is a sequence of expressions that the UDTF is specifying for Catalyst to
@@ -362,32 +362,51 @@ def _create_judtf(self, func: Type) -> "JavaObject":
 
         assert sc._jvm is not None
         if self.returnType is None:
-            judtf = sc._jvm.org.apache.spark.sql.execution.python.UserDefinedPythonTableFunction(
-                self._name, wrapped_func, self.evalType, self.deterministic
-            )
+            judtf = getattr(
+                sc._jvm, "org.apache.spark.sql.execution.python.UserDefinedPythonTableFunction"
+            )(self._name, wrapped_func, self.evalType, self.deterministic)
         else:
             jdt = spark._jsparkSession.parseDataType(self.returnType.json())
-            judtf = sc._jvm.org.apache.spark.sql.execution.python.UserDefinedPythonTableFunction(
-                self._name, wrapped_func, jdt, self.evalType, self.deterministic
-            )
+            judtf = getattr(
+                sc._jvm, "org.apache.spark.sql.execution.python.UserDefinedPythonTableFunction"
+            )(self._name, wrapped_func, jdt, self.evalType, self.deterministic)
         return judtf
 
-    def __call__(self, *args: "ColumnOrName", **kwargs: "ColumnOrName") -> "DataFrame":
+    def __call__(self, *args: "TVFArgumentOrName", **kwargs: "TVFArgumentOrName") -> "DataFrame":
         from pyspark.sql.classic.column import _to_java_column, _to_seq
 
         from pyspark.sql import DataFrame, SparkSession
+        from pyspark.sql.table_arg import TableArg
 
         spark = SparkSession._getActiveSessionOrCreate()
         sc = spark.sparkContext
 
         assert sc._jvm is not None
-        jcols = [_to_java_column(arg) for arg in args] + [
-            sc._jvm.PythonSQLUtils.namedArgumentExpression(key, _to_java_column(value))
-            for key, value in kwargs.items()
-        ]
+        # Process positional arguments
+        jargs = []
+        for arg in args:
+            if isinstance(arg, TableArg):
+                # If the argument is a TableArg, get the Java TableArg object
+                jargs.append(arg._j_table_arg)
+            else:
+                # Otherwise, convert it to a Java column
+                jargs.append(_to_java_column(arg))  # type: ignore[arg-type]
+
+        # Process keyword arguments
+        jkwargs = []
+        for key, value in kwargs.items():
+            if isinstance(value, TableArg):
+                # If the value is a TableArg, get the Java TableArg object
+                j_arg = value._j_table_arg
+            else:
+                # Otherwise, convert it to a Java column
+                j_arg = _to_java_column(value)  # type: ignore[arg-type]
+            # Create a named argument expression
+            j_named_arg = sc._jvm.PythonSQLUtils.namedArgumentExpression(key, j_arg)
+            jkwargs.append(j_named_arg)
 
         judtf = self._judtf
-        jPythonUDTF = judtf.apply(spark._jsparkSession, _to_seq(sc, jcols))
+        jPythonUDTF = judtf.apply(spark._jsparkSession, _to_seq(sc, jargs + jkwargs))
         return DataFrame(jPythonUDTF, spark)
 
     def asDeterministic(self) -> "UserDefinedTableFunction":
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index 3cacc5b9d021a..855496ff3b7ca 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -62,14 +62,6 @@
     from pyspark.sql.dataframe import DataFrame
     from pyspark.pandas._typing import IndexOpsLike, SeriesOrIndex
 
-has_numpy: bool = False
-try:
-    import numpy as np  # noqa: F401
-
-    has_numpy = True
-except ImportError:
-    pass
-
 
 FuncT = TypeVar("FuncT", bound=Callable[..., Any])
 
@@ -118,18 +110,34 @@ def require_test_compiled() -> None:
 
 def require_minimum_plotly_version() -> None:
     """Raise ImportError if plotly is not installed"""
+    from pyspark.loose_version import LooseVersion
+
     minimum_plotly_version = "4.8"
 
     try:
-        import plotly  # noqa: F401
+        import plotly
+
+        have_plotly = True
     except ImportError as error:
+        have_plotly = False
+        raised_error = error
+    if not have_plotly:
         raise PySparkImportError(
             errorClass="PACKAGE_NOT_INSTALLED",
             messageParameters={
-                "package_name": "plotly",
+                "package_name": "Plotly",
                 "minimum_version": str(minimum_plotly_version),
             },
-        ) from error
+        ) from raised_error
+    if LooseVersion(plotly.__version__) < LooseVersion(minimum_plotly_version):
+        raise PySparkImportError(
+            errorClass="UNSUPPORTED_PACKAGE_VERSION",
+            messageParameters={
+                "package_name": "Plotly",
+                "minimum_version": str(minimum_plotly_version),
+                "current_version": str(plotly.__version__),
+            },
+        )
 
 
 class ForeachBatchFunction:
diff --git a/python/pyspark/sql/variant_utils.py b/python/pyspark/sql/variant_utils.py
index 40cc69c1f0961..3025523064e1d 100644
--- a/python/pyspark/sql/variant_utils.py
+++ b/python/pyspark/sql/variant_utils.py
@@ -21,7 +21,7 @@
 import json
 import struct
 from array import array
-from typing import Any, Callable, Dict, List, Tuple
+from typing import Any, Callable, Dict, List, NamedTuple, Tuple
 from pyspark.errors import PySparkValueError
 from zoneinfo import ZoneInfo
 
@@ -108,8 +108,25 @@ class VariantUtils:
     # string size) + (size bytes of string content).
     LONG_STR = 16
 
+    VERSION = 1
+    # The lower 4 bits of the first metadata byte contain the version.
+    VERSION_MASK = 0x0F
+
+    U8_MAX = 0xFF
+    U16_MAX = 0xFFFF
+    U24_MAX = 0xFFFFFF
+    U24_SIZE = 3
     U32_SIZE = 4
 
+    I8_MAX = 0x7F
+    I8_MIN = -0x80
+    I16_MAX = 0x7FFF
+    I16_MIN = -0x8000
+    I32_MAX = 0x7FFFFFFF
+    I32_MIN = -0x80000000
+    I64_MAX = 0x7FFFFFFFFFFFFFFF
+    I64_MIN = -0x8000000000000000
+
     EPOCH = datetime.datetime(
         year=1970, month=1, day=1, hour=0, minute=0, second=0, tzinfo=datetime.timezone.utc
     )
@@ -140,6 +157,15 @@ def to_python(cls, value: bytes, metadata: bytes) -> str:
         """
         return cls._to_python(value, metadata, 0)
 
+    @classmethod
+    def parse_json(cls, json_str: str) -> Tuple[bytes, bytes]:
+        """
+        Parses the JSON string and creates the Variant binary (value, metadata)
+        :return: tuple of 2 binary values (value, metadata)
+        """
+        builder = VariantBuilder()
+        return builder.build(json_str)
+
     @classmethod
     def _read_long(cls, data: bytes, pos: int, num_bytes: int, signed: bool) -> int:
         cls._check_index(pos, len(data))
@@ -468,7 +494,10 @@ def _handle_object(
                 value, offset_start + offset_size * i, offset_size, signed=False
             )
             value_pos = data_start + offset
-            key_value_pos_list.append((cls._get_metadata_key(metadata, id), value_pos))
+            if metadata is not None:
+                key_value_pos_list.append((cls._get_metadata_key(metadata, id), value_pos))
+            else:
+                key_value_pos_list.append(("", value_pos))
         return func(key_value_pos_list)
 
     @classmethod
@@ -496,3 +525,297 @@ def _handle_array(cls, value: bytes, pos: int, func: Callable[[List[int]], Any])
             element_pos = data_start + offset
             value_pos_list.append(element_pos)
         return func(value_pos_list)
+
+
+class FieldEntry(NamedTuple):
+    """
+    Info about an object field
+    """
+
+    key: str
+    id: int
+    offset: int
+
+
+class VariantBuilder:
+    """
+    A utility class for building VariantVal.
+    """
+
+    DEFAULT_SIZE_LIMIT = 16 * 1024 * 1024
+
+    def __init__(self, size_limit: int = DEFAULT_SIZE_LIMIT):
+        self.value = bytearray()
+        self.dictionary = dict[str, int]()
+        self.dictionary_keys = list[bytes]()
+        self.size_limit = size_limit
+
+    def build(self, json_str: str) -> Tuple[bytes, bytes]:
+        parsed = json.loads(json_str, parse_float=self._handle_float)
+        self._process_parsed_json(parsed)
+
+        num_keys = len(self.dictionary_keys)
+        dictionary_string_size = sum(len(key) for key in self.dictionary_keys)
+
+        # Determine the number of bytes required per offset entry.
+        # The largest offset is the one-past-the-end value, which is total string size. It's very
+        # unlikely that the number of keys could be larger, but incorporate that into the
+        # calculation in case of pathological data.
+        max_size = max(dictionary_string_size, num_keys)
+        if max_size > self.size_limit:
+            raise PySparkValueError(errorClass="VARIANT_SIZE_LIMIT_EXCEEDED", messageParameters={})
+        offset_size = self._get_integer_size(max_size)
+
+        offset_start = 1 + offset_size
+        string_start = offset_start + (num_keys + 1) * offset_size
+        metadata_size = string_start + dictionary_string_size
+        if metadata_size > self.size_limit:
+            raise PySparkValueError(errorClass="VARIANT_SIZE_LIMIT_EXCEEDED", messageParameters={})
+
+        metadata = bytearray()
+        header_byte = VariantUtils.VERSION | ((offset_size - 1) << 6)
+        metadata.extend(header_byte.to_bytes(1, byteorder="little"))
+        metadata.extend(num_keys.to_bytes(offset_size, byteorder="little"))
+        # write offsets
+        current_offset = 0
+        for key in self.dictionary_keys:
+            metadata.extend(current_offset.to_bytes(offset_size, byteorder="little"))
+            current_offset += len(key)
+        metadata.extend(current_offset.to_bytes(offset_size, byteorder="little"))
+        # write key data
+        for key in self.dictionary_keys:
+            metadata.extend(key)
+        return (bytes(self.value), bytes(metadata))
+
+    def _process_parsed_json(self, parsed: Any) -> None:
+        if type(parsed) is dict:
+            fields = list[FieldEntry]()
+            start = len(self.value)
+            for key, value in parsed.items():
+                id = self._add_key(key)
+                fields.append(FieldEntry(key, id, len(self.value) - start))
+                self._process_parsed_json(value)
+            self._finish_writing_object(start, fields)
+        elif type(parsed) is list:
+            offsets = []
+            start = len(self.value)
+            for elem in parsed:
+                offsets.append(len(self.value) - start)
+                self._process_parsed_json(elem)
+            self._finish_writing_array(start, offsets)
+        elif type(parsed) is str:
+            self._append_string(parsed)
+        elif type(parsed) is int:
+            if not self._append_int(parsed):
+                self._process_parsed_json(self._handle_float(str(parsed)))
+        elif type(parsed) is float:
+            self._append_float(parsed)
+        elif type(parsed) is decimal.Decimal:
+            self._append_decimal(parsed)
+        elif type(parsed) is bool:
+            self._append_boolean(parsed)
+        elif parsed is None:
+            self._append_null()
+        else:
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
+
+    # Choose the smallest unsigned integer type that can store `value`. It must be within
+    # [0, U24_MAX].
+    def _get_integer_size(self, value: int) -> int:
+        if value <= VariantUtils.U8_MAX:
+            return 1
+        if value <= VariantUtils.U16_MAX:
+            return 2
+        return VariantUtils.U24_SIZE
+
+    def _check_capacity(self, additional: int) -> None:
+        required = len(self.value) + additional
+        if required > self.size_limit:
+            raise PySparkValueError(errorClass="VARIANT_SIZE_LIMIT_EXCEEDED", messageParameters={})
+
+    def _primitive_header(self, type: int) -> bytes:
+        return bytes([(type << 2) | VariantUtils.PRIMITIVE])
+
+    def _short_string_header(self, size: int) -> bytes:
+        return bytes([size << 2 | VariantUtils.SHORT_STR])
+
+    def _array_header(self, large_size: bool, offset_size: int) -> bytes:
+        return bytes(
+            [
+                (
+                    (large_size << (VariantUtils.BASIC_TYPE_BITS + 2))
+                    | ((offset_size - 1) << VariantUtils.BASIC_TYPE_BITS)
+                    | VariantUtils.ARRAY
+                )
+            ]
+        )
+
+    def _object_header(self, large_size: bool, id_size: int, offset_size: int) -> bytes:
+        return bytes(
+            [
+                (
+                    (large_size << (VariantUtils.BASIC_TYPE_BITS + 4))
+                    | ((id_size - 1) << (VariantUtils.BASIC_TYPE_BITS + 2))
+                    | ((offset_size - 1) << VariantUtils.BASIC_TYPE_BITS)
+                    | VariantUtils.OBJECT
+                )
+            ]
+        )
+
+    # Add a key to the variant dictionary. If the key already exists, the dictionary is
+    # not modified. In either case, return the id of the key.
+    def _add_key(self, key: str) -> int:
+        if key in self.dictionary:
+            return self.dictionary[key]
+        id = len(self.dictionary_keys)
+        self.dictionary[key] = id
+        self.dictionary_keys.append(key.encode("utf-8"))
+        return id
+
+    def _handle_float(self, num_str: str) -> Any:
+        # a float can be a decimal if it only contains digits, '-', or '-'.
+        if all([ch.isdecimal() or ch == "-" or ch == "." for ch in num_str]):
+            dec = decimal.Decimal(num_str)
+            precision = len(dec.as_tuple().digits)
+            scale = -int(dec.as_tuple().exponent)
+
+            if (
+                scale <= VariantUtils.MAX_DECIMAL16_PRECISION
+                and precision <= VariantUtils.MAX_DECIMAL16_PRECISION
+            ):
+                return dec
+        return float(num_str)
+
+    def _append_boolean(self, b: bool) -> None:
+        self._check_capacity(1)
+        self.value.extend(self._primitive_header(VariantUtils.TRUE if b else VariantUtils.FALSE))
+
+    def _append_null(self) -> None:
+        self._check_capacity(1)
+        self.value.extend(self._primitive_header(VariantUtils.NULL))
+
+    def _append_string(self, s: str) -> None:
+        text = s.encode("utf-8")
+        long_str = len(text) > VariantUtils.MAX_SHORT_STR_SIZE
+        additional = (1 + VariantUtils.U32_SIZE) if long_str else 1
+        self._check_capacity(additional + len(text))
+        if long_str:
+            self.value.extend(self._primitive_header(VariantUtils.LONG_STR))
+            self.value.extend(len(text).to_bytes(VariantUtils.U32_SIZE, byteorder="little"))
+        else:
+            self.value.extend(self._short_string_header(len(text)))
+        self.value.extend(text)
+
+    def _append_int(self, i: int) -> bool:
+        self._check_capacity(1 + 8)
+        if i >= VariantUtils.I8_MIN and i <= VariantUtils.I8_MAX:
+            self.value.extend(self._primitive_header(VariantUtils.INT1))
+            self.value.extend(i.to_bytes(1, byteorder="little", signed=True))
+        elif i >= VariantUtils.I16_MIN and i <= VariantUtils.I16_MAX:
+            self.value.extend(self._primitive_header(VariantUtils.INT2))
+            self.value.extend(i.to_bytes(2, byteorder="little", signed=True))
+        elif i >= VariantUtils.I32_MIN and i <= VariantUtils.I32_MAX:
+            self.value.extend(self._primitive_header(VariantUtils.INT4))
+            self.value.extend(i.to_bytes(4, byteorder="little", signed=True))
+        elif i >= VariantUtils.I64_MIN and i <= VariantUtils.I64_MAX:
+            self.value.extend(self._primitive_header(VariantUtils.INT8))
+            self.value.extend(i.to_bytes(8, byteorder="little", signed=True))
+        else:
+            return False
+        return True
+
+    # Append a decimal value to the variant builder. The caller should guarantee that its precision
+    # and scale fit into `MAX_DECIMAL16_PRECISION`.
+    def _append_decimal(self, d: decimal.Decimal) -> None:
+        self._check_capacity(2 + 16)
+        precision = len(d.as_tuple().digits)
+        scale = -int(d.as_tuple().exponent)
+        unscaled = int("".join(map(str, d.as_tuple().digits)))
+        unscaled = -unscaled if d < 0 else unscaled
+        if (
+            scale <= VariantUtils.MAX_DECIMAL4_PRECISION
+            and precision <= VariantUtils.MAX_DECIMAL4_PRECISION
+        ):
+            self.value.extend(self._primitive_header(VariantUtils.DECIMAL4))
+            self.value.extend(scale.to_bytes(1, byteorder="little"))
+            self.value.extend(unscaled.to_bytes(4, byteorder="little", signed=True))
+        elif (
+            scale <= VariantUtils.MAX_DECIMAL8_PRECISION
+            and precision <= VariantUtils.MAX_DECIMAL8_PRECISION
+        ):
+            self.value.extend(self._primitive_header(VariantUtils.DECIMAL8))
+            self.value.extend(scale.to_bytes(1, byteorder="little"))
+            self.value.extend(unscaled.to_bytes(8, byteorder="little", signed=True))
+        else:
+            assert (
+                scale <= VariantUtils.MAX_DECIMAL16_PRECISION
+                and precision <= VariantUtils.MAX_DECIMAL16_PRECISION
+            )
+            self.value.extend(self._primitive_header(VariantUtils.DECIMAL16))
+            self.value.extend(scale.to_bytes(1, byteorder="little"))
+            self.value.extend(unscaled.to_bytes(16, byteorder="little", signed=True))
+
+    def _append_float(self, f: float) -> None:
+        self._check_capacity(1 + 8)
+        self.value.extend(self._primitive_header(VariantUtils.DOUBLE))
+        self.value.extend(struct.pack("<d", f))
+
+    # Finish writing a variant array after all of its elements have already been written.
+    def _finish_writing_array(self, start: int, offsets: List[int]) -> None:
+        data_size = len(self.value) - start
+        num_offsets = len(offsets)
+        large_size = num_offsets > VariantUtils.U8_MAX
+        size_bytes = VariantUtils.U32_SIZE if large_size else 1
+        offset_size = self._get_integer_size(data_size)
+        # The space for header byte, object size, and offset list.
+        header_size = 1 + size_bytes + (num_offsets + 1) * offset_size
+        self._check_capacity(header_size)
+        self.value.extend(bytearray(header_size))
+        # Shift the just-written element data to make room for the header section.
+        self.value[start + header_size :] = bytes(self.value[start : start + data_size])
+        # Write the header byte, num offsets
+        offset_start = start + 1 + size_bytes
+        self.value[start : start + 1] = self._array_header(large_size, offset_size)
+        self.value[start + 1 : offset_start] = num_offsets.to_bytes(size_bytes, byteorder="little")
+        # write offset list
+        offset_list = bytearray()
+        for offset in offsets:
+            offset_list.extend(offset.to_bytes(offset_size, byteorder="little"))
+        offset_list.extend(data_size.to_bytes(offset_size, byteorder="little"))
+        self.value[offset_start : offset_start + len(offset_list)] = offset_list
+
+    # Finish writing a variant object after all of its fields have already been written.
+    def _finish_writing_object(self, start: int, fields: List[FieldEntry]) -> None:
+        num_fields = len(fields)
+        # object fields are from a python dictionary, so keys are already distinct
+        fields.sort(key=lambda f: f.key)
+        max_id = 0
+        for field in fields:
+            max_id = max(max_id, field.id)
+
+        data_size = len(self.value) - start
+        large_size = num_fields > VariantUtils.U8_MAX
+        size_bytes = VariantUtils.U32_SIZE if large_size else 1
+        id_size = self._get_integer_size(max_id)
+        offset_size = self._get_integer_size(data_size)
+        # The space for header byte, object size, id list, and offset list.
+        header_size = 1 + size_bytes + num_fields * id_size + (num_fields + 1) * offset_size
+        self._check_capacity(header_size)
+        self.value.extend(bytearray(header_size))
+        # Shift the just-written field data to make room for the object header section.
+        self.value[start + header_size :] = self.value[start : start + data_size]
+        # Write the header byte, num fields, id list, offset list
+        self.value[start : start + 1] = self._object_header(large_size, id_size, offset_size)
+        self.value[start + 1 : start + 1 + size_bytes] = num_fields.to_bytes(
+            size_bytes, byteorder="little"
+        )
+        id_start = start + 1 + size_bytes
+        offset_start = id_start + num_fields * id_size
+        id_list = bytearray()
+        offset_list = bytearray()
+        for field in fields:
+            id_list.extend(field.id.to_bytes(id_size, byteorder="little"))
+            offset_list.extend(field.offset.to_bytes(offset_size, byteorder="little"))
+        offset_list.extend(data_size.to_bytes(offset_size, byteorder="little"))
+        self.value[id_start : id_start + len(id_list)] = id_list
+        self.value[offset_start : offset_start + len(offset_list)] = offset_list
diff --git a/python/pyspark/sql/worker/create_data_source.py b/python/pyspark/sql/worker/create_data_source.py
index ef70876fc32c5..f74c1555e6e9e 100644
--- a/python/pyspark/sql/worker/create_data_source.py
+++ b/python/pyspark/sql/worker/create_data_source.py
@@ -20,7 +20,7 @@
 from typing import IO
 
 from pyspark.accumulators import _accumulatorRegistry
-from pyspark.errors import PySparkAssertionError, PySparkRuntimeError, PySparkTypeError
+from pyspark.errors import PySparkAssertionError, PySparkTypeError
 from pyspark.serializers import (
     read_bool,
     read_int,
@@ -127,13 +127,7 @@ def main(infile: IO, outfile: IO) -> None:
             options[key] = value
 
         # Instantiate a data source.
-        try:
-            data_source = data_source_cls(options=options)  # type: ignore
-        except Exception as e:
-            raise PySparkRuntimeError(
-                errorClass="DATA_SOURCE_CREATE_ERROR",
-                messageParameters={"error": str(e)},
-            )
+        data_source = data_source_cls(options=options)  # type: ignore
 
         # Get the schema of the data source.
         # If user_specified_schema is not None, use user_specified_schema.
@@ -141,17 +135,11 @@ def main(infile: IO, outfile: IO) -> None:
         # Throw exception if the data source does not implement schema().
         is_ddl_string = False
         if user_specified_schema is None:
-            try:
-                schema = data_source.schema()
-                if isinstance(schema, str):
-                    # Here we cannot use _parse_datatype_string to parse the DDL string schema.
-                    # as it requires an active Spark session.
-                    is_ddl_string = True
-            except NotImplementedError:
-                raise PySparkRuntimeError(
-                    errorClass="NOT_IMPLEMENTED",
-                    messageParameters={"feature": "DataSource.schema"},
-                )
+            schema = data_source.schema()
+            if isinstance(schema, str):
+                # Here we cannot use _parse_datatype_string to parse the DDL string schema.
+                # as it requires an active Spark session.
+                is_ddl_string = True
         else:
             schema = user_specified_schema  # type: ignore
 
diff --git a/python/pyspark/sql/worker/write_into_data_source.py b/python/pyspark/sql/worker/write_into_data_source.py
index a114a3facc467..91a1f4d3b1b34 100644
--- a/python/pyspark/sql/worker/write_into_data_source.py
+++ b/python/pyspark/sql/worker/write_into_data_source.py
@@ -32,6 +32,7 @@
 from pyspark.sql.datasource import (
     DataSource,
     DataSourceWriter,
+    DataSourceArrowWriter,
     WriterCommitMessage,
     CaseInsensitiveDict,
 )
@@ -194,7 +195,10 @@ def batch_to_rows() -> Iterator[Row]:
                         ]
                         yield _create_row(fields=fields, values=values)
 
-            res = writer.write(batch_to_rows())
+            if isinstance(writer, DataSourceArrowWriter):
+                res = writer.write(iterator)
+            else:
+                res = writer.write(batch_to_rows())
 
             # Check the commit message has the right type.
             if not isinstance(res, WriterCommitMessage):
diff --git a/python/pyspark/streaming/tests/test_dstream.py b/python/pyspark/streaming/tests/test_dstream.py
index 046247763c0b3..4c9633db311a6 100644
--- a/python/pyspark/streaming/tests/test_dstream.py
+++ b/python/pyspark/streaming/tests/test_dstream.py
@@ -403,7 +403,7 @@ def failed_func(rdd1, rdd2):
 
         self.fail("a failed func should throw an error")
 
-    def test_failed_func_with_reseting_failure(self):
+    def test_failed_func_with_resetting_failure(self):
         input = [self.sc.parallelize([d], 1) for d in range(4)]
         input_stream = self.ssc.queueStream(input)
 
diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py
index 1dd15666382f6..76f5b48ff9bb0 100644
--- a/python/pyspark/testing/utils.py
+++ b/python/pyspark/testing/utils.py
@@ -52,13 +52,9 @@
 
 
 def have_package(name: str) -> bool:
-    try:
-        import importlib
+    import importlib
 
-        importlib.import_module(name)
-        return True
-    except Exception:
-        return False
+    return importlib.util.find_spec(name) is not None
 
 
 have_numpy = have_package("numpy")
@@ -91,6 +87,14 @@ def have_package(name: str) -> bool:
 have_graphviz = have_package("graphviz")
 graphviz_requirement_message = None if have_graphviz else "No module named 'graphviz'"
 
+have_flameprof = have_package("flameprof")
+flameprof_requirement_message = None if have_flameprof else "No module named 'flameprof'"
+
+have_jinja2 = have_package("jinja2")
+jinja2_requirement_message = None if have_jinja2 else "No module named 'jinja2'"
+
+have_openpyxl = have_package("openpyxl")
+openpyxl_requirement_message = None if have_openpyxl else "No module named 'openpyxl'"
 
 pandas_requirement_message = None
 try:
@@ -336,6 +340,7 @@ def check_error(
         messageParameters: Optional[Dict[str, str]] = None,
         query_context_type: Optional[QueryContextType] = None,
         fragment: Optional[str] = None,
+        matchPVals: bool = False,
     ):
         query_context = exception.getQueryContext()
         assert bool(query_context) == (query_context_type is not None), (
@@ -359,9 +364,30 @@ def check_error(
         # Test message parameters
         expected = messageParameters
         actual = exception.getMessageParameters()
-        self.assertEqual(
-            expected, actual, f"Expected message parameters was '{expected}', got '{actual}'"
-        )
+        if matchPVals:
+            self.assertEqual(
+                len(expected),
+                len(actual),
+                "Expected message parameters count does not match actual message parameters count"
+                f": {len(expected)}, {len(actual)}.",
+            )
+            for key, value in expected.items():
+                self.assertIn(
+                    key,
+                    actual,
+                    f"Expected message parameter key '{key}' was not found "
+                    "in actual message parameters.",
+                )
+                self.assertRegex(
+                    actual[key],
+                    value,
+                    f"Expected message parameter value '{value}' does not match actual message "
+                    f"parameter value '{actual[key]}'.",
+                ),
+        else:
+            self.assertEqual(
+                expected, actual, f"Expected message parameters was '{expected}', got '{actual}'"
+            )
 
         # Test query context
         if query_context:
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index 3b38b8b72c615..3e9a68ccfe2e5 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -27,6 +27,7 @@
 import traceback
 import typing
 import socket
+import warnings
 from types import TracebackType
 from typing import Any, Callable, IO, Iterator, List, Optional, TextIO, Tuple, Union
 
@@ -366,7 +367,8 @@ def inheritable_thread_target(f: Optional[Union[Callable, "SparkSession"]] = Non
 
     >>> Thread(target=inheritable_thread_target(target_func)).start()  # doctest: +SKIP
 
-    If you're using Spark Connect, you should explicitly provide Spark session as follows:
+    If you're using Spark Connect or if you want to inherit the tags properly,
+    you should explicitly provide Spark session as follows:
 
     >>> @inheritable_thread_target(session)  # doctest: +SKIP
     ... def target_func():
@@ -406,13 +408,41 @@ def inner(*args: Any, **kwargs: Any) -> Any:
 
         return outer
 
-    # Non Spark Connect
+    # Non Spark Connect with SparkSession or Callable
+    from pyspark.sql import SparkSession
     from pyspark import SparkContext
     from py4j.clientserver import ClientServer
 
     if isinstance(SparkContext._gateway, ClientServer):
         # Here's when the pinned-thread mode (PYSPARK_PIN_THREAD) is on.
 
+        if isinstance(f, SparkSession):
+            session = f
+            assert session is not None
+            tags = set(session.getTags())
+            # Local properties are copied when wrapping the function.
+            assert SparkContext._active_spark_context is not None
+            properties = SparkContext._active_spark_context._jsc.sc().getLocalProperties().clone()
+
+            def outer(ff: Callable) -> Callable:
+                @functools.wraps(ff)
+                def wrapped(*args: Any, **kwargs: Any) -> Any:
+                    # Apply properties and tags in the child thread.
+                    assert SparkContext._active_spark_context is not None
+                    SparkContext._active_spark_context._jsc.sc().setLocalProperties(properties)
+                    for tag in tags:
+                        session.addTag(tag)  # type: ignore[union-attr]
+                    return ff(*args, **kwargs)
+
+                return wrapped
+
+            return outer
+
+        warnings.warn(
+            "Spark session is not provided. Tags will not be inherited.",
+            UserWarning,
+        )
+
         # NOTICE the internal difference vs `InheritableThread`. `InheritableThread`
         # copies local properties when the thread starts but `inheritable_thread_target`
         # copies when the function is wrapped.
@@ -506,11 +536,15 @@ def copy_local_properties(*a: Any, **k: Any) -> Any:
             from pyspark import SparkContext
             from py4j.clientserver import ClientServer
 
+            self._session = session  # type: ignore[assignment]
             if isinstance(SparkContext._gateway, ClientServer):
                 # Here's when the pinned-thread mode (PYSPARK_PIN_THREAD) is on.
                 def copy_local_properties(*a: Any, **k: Any) -> Any:
                     # self._props is set before starting the thread to match the behavior with JVM.
                     assert hasattr(self, "_props")
+                    if hasattr(self, "_tags"):
+                        for tag in self._tags:  # type: ignore[has-type]
+                            self._session.addTag(tag)
                     assert SparkContext._active_spark_context is not None
                     SparkContext._active_spark_context._jsc.sc().setLocalProperties(self._props)
                     return target(*a, **k)
@@ -546,6 +580,9 @@ def start(self) -> None:
                 self._props = (
                     SparkContext._active_spark_context._jsc.sc().getLocalProperties().clone()
                 )
+                if self._session is not None:
+                    self._tags = self._session.getTags()
+
         return super(InheritableThread, self).start()
 
 
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 04f95e9f52648..e799498cdd80b 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -34,6 +34,7 @@
     _deserialize_accumulator,
 )
 from pyspark.sql.streaming.stateful_processor_api_client import StatefulProcessorApiClient
+from pyspark.sql.streaming.stateful_processor_util import TransformWithStateInPandasFuncMode
 from pyspark.taskcontext import BarrierTaskContext, TaskContext
 from pyspark.resource import ResourceInformation
 from pyspark.util import PythonEvalType, local_connect_and_auth
@@ -153,7 +154,7 @@ def verify_result_length(result, length):
     )
 
 
-def wrap_arrow_batch_udf(f, args_offsets, kwargs_offsets, return_type):
+def wrap_arrow_batch_udf(f, args_offsets, kwargs_offsets, return_type, runner_conf):
     import pandas as pd
 
     func, args_kwargs_offsets = wrap_kwargs_support(f, args_offsets, kwargs_offsets)
@@ -171,9 +172,21 @@ def wrap_arrow_batch_udf(f, args_offsets, kwargs_offsets, return_type):
     elif type(return_type) == BinaryType:
         result_func = lambda r: bytes(r) if r is not None else r  # noqa: E731
 
-    @fail_on_stopiteration
-    def evaluate(*args: pd.Series) -> pd.Series:
-        return pd.Series([result_func(func(*row)) for row in zip(*args)])
+    if "spark.sql.execution.pythonUDF.arrow.concurrency.level" in runner_conf:
+        from concurrent.futures import ThreadPoolExecutor
+
+        c = int(runner_conf["spark.sql.execution.pythonUDF.arrow.concurrency.level"])
+
+        @fail_on_stopiteration
+        def evaluate(*args: pd.Series) -> pd.Series:
+            with ThreadPoolExecutor(max_workers=c) as pool:
+                return pd.Series(list(pool.map(lambda row: result_func(func(*row)), zip(*args))))
+
+    else:
+
+        @fail_on_stopiteration
+        def evaluate(*args: pd.Series) -> pd.Series:
+            return pd.Series([result_func(func(*row)) for row in zip(*args)])
 
     def verify_result_length(result, length):
         if len(result) != length:
@@ -493,36 +506,36 @@ def wrapped(key_series, value_series):
 
 
 def wrap_grouped_transform_with_state_pandas_udf(f, return_type, runner_conf):
-    def wrapped(stateful_processor_api_client, key, value_series_gen):
+    def wrapped(stateful_processor_api_client, mode, key, value_series_gen):
         import pandas as pd
 
         values = (pd.concat(x, axis=1) for x in value_series_gen)
-        result_iter = f(stateful_processor_api_client, key, values)
+        result_iter = f(stateful_processor_api_client, mode, key, values)
 
         # TODO(SPARK-49100): add verification that elements in result_iter are
         # indeed of type pd.DataFrame and confirm to assigned cols
 
         return result_iter
 
-    return lambda p, k, v: [(wrapped(p, k, v), to_arrow_type(return_type))]
+    return lambda p, m, k, v: [(wrapped(p, m, k, v), to_arrow_type(return_type))]
 
 
 def wrap_grouped_transform_with_state_pandas_init_state_udf(f, return_type, runner_conf):
-    def wrapped(stateful_processor_api_client, key, value_series_gen):
+    def wrapped(stateful_processor_api_client, mode, key, value_series_gen):
         import pandas as pd
 
         state_values_gen, init_states_gen = itertools.tee(value_series_gen, 2)
         state_values = (df for x, _ in state_values_gen if not (df := pd.concat(x, axis=1)).empty)
         init_states = (df for _, x in init_states_gen if not (df := pd.concat(x, axis=1)).empty)
 
-        result_iter = f(stateful_processor_api_client, key, state_values, init_states)
+        result_iter = f(stateful_processor_api_client, mode, key, state_values, init_states)
 
         # TODO(SPARK-49100): add verification that elements in result_iter are
         # indeed of type pd.DataFrame and confirm to assigned cols
 
         return result_iter
 
-    return lambda p, k, v: [(wrapped(p, k, v), to_arrow_type(return_type))]
+    return lambda p, m, k, v: [(wrapped(p, m, k, v), to_arrow_type(return_type))]
 
 
 def wrap_grouped_map_pandas_udf_with_state(f, return_type):
@@ -854,7 +867,7 @@ def read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index, profil
     if eval_type == PythonEvalType.SQL_SCALAR_PANDAS_UDF:
         return wrap_scalar_pandas_udf(func, args_offsets, kwargs_offsets, return_type)
     elif eval_type == PythonEvalType.SQL_ARROW_BATCHED_UDF:
-        return wrap_arrow_batch_udf(func, args_offsets, kwargs_offsets, return_type)
+        return wrap_arrow_batch_udf(func, args_offsets, kwargs_offsets, return_type, runner_conf)
     elif eval_type == PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF:
         return args_offsets, wrap_pandas_batch_iter_udf(func, return_type)
     elif eval_type == PythonEvalType.SQL_MAP_PANDAS_ITER_UDF:
@@ -1036,7 +1049,7 @@ def eval(self, *args, **kwargs) -> Iterator:
                 list(args) + list(kwargs.values())
             )
             if changed_partitions:
-                if self._udtf.terminate is not None:
+                if hasattr(self._udtf, "terminate"):
                     result = self._udtf.terminate()
                     if result is not None:
                         for row in result:
@@ -1062,7 +1075,7 @@ def eval(self, *args, **kwargs) -> Iterator:
                     self._eval_raised_skip_rest_of_input_table = True
 
         def terminate(self) -> Iterator:
-            if self._udtf.terminate is not None:
+            if hasattr(self._udtf, "terminate"):
                 return self._udtf.terminate()
             return iter(())
 
@@ -1556,7 +1569,8 @@ def read_udfs(pickleSer, infile, eval_type):
                 arrow_cast,
             )
     else:
-        ser = BatchedSerializer(CPickleSerializer(), 100)
+        batch_size = int(os.environ.get("PYTHON_UDF_BATCH_SIZE", "100"))
+        ser = BatchedSerializer(CPickleSerializer(), batch_size)
 
     is_profiling = read_bool(infile)
     if is_profiling:
@@ -1697,18 +1711,22 @@ def mapper(a):
         ser.key_offsets = parsed_offsets[0][0]
         stateful_processor_api_client = StatefulProcessorApiClient(state_server_port, key_schema)
 
-        # Create function like this:
-        #   mapper a: f([a[0]], [a[0], a[1]])
         def mapper(a):
-            key = a[0]
+            mode = a[0]
 
-            def values_gen():
-                for x in a[1]:
-                    retVal = [x[1][o] for o in parsed_offsets[0][1]]
-                    yield retVal
+            if mode == TransformWithStateInPandasFuncMode.PROCESS_DATA:
+                key = a[1]
 
-            # This must be generator comprehension - do not materialize.
-            return f(stateful_processor_api_client, key, values_gen())
+                def values_gen():
+                    for x in a[2]:
+                        retVal = [x[1][o] for o in parsed_offsets[0][1]]
+                        yield retVal
+
+                # This must be generator comprehension - do not materialize.
+                return f(stateful_processor_api_client, mode, key, values_gen())
+            else:
+                # mode == PROCESS_TIMER or mode == COMPLETE
+                return f(stateful_processor_api_client, mode, None, iter([]))
 
     elif eval_type == PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF:
         # We assume there is only one UDF here because grouped map doesn't
@@ -1731,16 +1749,22 @@ def values_gen():
         stateful_processor_api_client = StatefulProcessorApiClient(state_server_port, key_schema)
 
         def mapper(a):
-            key = a[0]
+            mode = a[0]
 
-            def values_gen():
-                for x in a[1]:
-                    retVal = [x[1][o] for o in parsed_offsets[0][1]]
-                    initVal = [x[2][o] for o in parsed_offsets[1][1]]
-                    yield retVal, initVal
+            if mode == TransformWithStateInPandasFuncMode.PROCESS_DATA:
+                key = a[1]
 
-            # This must be generator comprehension - do not materialize.
-            return f(stateful_processor_api_client, key, values_gen())
+                def values_gen():
+                    for x in a[2]:
+                        retVal = [x[1][o] for o in parsed_offsets[0][1]]
+                        initVal = [x[2][o] for o in parsed_offsets[1][1]]
+                        yield retVal, initVal
+
+                # This must be generator comprehension - do not materialize.
+                return f(stateful_processor_api_client, mode, key, values_gen())
+            else:
+                # mode == PROCESS_TIMER or mode == COMPLETE
+                return f(stateful_processor_api_client, mode, None, iter([]))
 
     elif eval_type == PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF:
         import pyarrow as pa
@@ -1958,17 +1982,6 @@ def process():
             try:
                 serializer.dump_stream(out_iter, outfile)
             finally:
-                # Sending a signal to TransformWithState UDF to perform proper cleanup steps.
-                if (
-                    eval_type == PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_UDF
-                    or eval_type == PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF
-                ):
-                    # Sending key as None to indicate that process() has finished.
-                    end_iter = func(split_index, iter([(None, None)]))
-                    # Need to materialize the iterator to trigger the cleanup steps, nothing needs
-                    # to be done here.
-                    for _ in end_iter:
-                        pass
                 if hasattr(out_iter, "close"):
                     out_iter.close()
 
diff --git a/python/pyspark/worker_util.py b/python/pyspark/worker_util.py
index 81c05ce94eb65..5c758d3f83fe6 100644
--- a/python/pyspark/worker_util.py
+++ b/python/pyspark/worker_util.py
@@ -107,8 +107,8 @@ def setup_memory_limits(memory_limit_mb: int) -> None:
 
         except (resource.error, OSError, ValueError) as e:
             # not all systems support resource limits, so warn instead of failing
-            curent = currentframe()
-            lineno = getframeinfo(curent).lineno + 1 if curent is not None else 0
+            current = currentframe()
+            lineno = getframeinfo(current).lineno + 1 if current is not None else 0
             if "__file__" in globals():
                 print(
                     warnings.formatwarning(
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 211c6c93b9674..814a3e1c595b8 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -37,7 +37,7 @@
       <dependencies>
         <dependency>
           <groupId>io.fabric8</groupId>
-          <artifactId>volcano-model-v1beta1</artifactId>
+          <artifactId>volcano-model</artifactId>
           <version>${kubernetes-client.version}</version>
         </dependency>
         <dependency>
@@ -105,11 +105,6 @@
       <scope>test</scope>
     </dependency>
 
-    <dependency>
-      <groupId>io.fabric8</groupId>
-      <artifactId>kubernetes-httpclient-okhttp</artifactId>
-      <version>${kubernetes-client.version}</version>
-    </dependency>
     <dependency>
       <groupId>io.fabric8</groupId>
       <artifactId>kubernetes-client</artifactId>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
index 2c28dc380046c..557bf01cbdbae 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
@@ -24,10 +24,7 @@ import com.google.common.io.Files
 import io.fabric8.kubernetes.client.{ConfigBuilder, KubernetesClient, KubernetesClientBuilder}
 import io.fabric8.kubernetes.client.Config.KUBERNETES_REQUEST_RETRY_BACKOFFLIMIT_SYSTEM_PROPERTY
 import io.fabric8.kubernetes.client.Config.autoConfigure
-import io.fabric8.kubernetes.client.okhttp.OkHttpClientFactory
 import io.fabric8.kubernetes.client.utils.Utils.getSystemPropertyOrEnvVar
-import okhttp3.Dispatcher
-import okhttp3.OkHttpClient
 
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.{DeveloperApi, Since, Stable}
@@ -35,7 +32,6 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.K8S_CONTEXT
 import org.apache.spark.internal.config.ConfigEntry
-import org.apache.spark.util.ThreadUtils
 
 /**
  * :: DeveloperApi ::
@@ -78,10 +74,6 @@ object SparkKubernetesClientFactory extends Logging {
       .getOption(s"$kubernetesAuthConfPrefix.$CLIENT_KEY_FILE_CONF_SUFFIX")
     val clientCertFile = sparkConf
       .getOption(s"$kubernetesAuthConfPrefix.$CLIENT_CERT_FILE_CONF_SUFFIX")
-    // TODO(SPARK-37687): clean up direct usage of OkHttpClient, see also:
-    // https://github.com/fabric8io/kubernetes-client/issues/3547
-    val dispatcher = new Dispatcher(
-      ThreadUtils.newDaemonCachedThreadPool("kubernetes-dispatcher"))
 
     // Allow for specifying a context used to auto-configure from the users K8S config file
     val kubeContext = sparkConf.get(KUBERNETES_CONTEXT).filter(_.nonEmpty)
@@ -117,17 +109,9 @@ object SparkKubernetesClientFactory extends Logging {
       }.withOption(namespace) {
         (ns, configBuilder) => configBuilder.withNamespace(ns)
       }.build()
-    val factoryWithCustomDispatcher = new OkHttpClientFactory() {
-      override protected def additionalConfig(builder: OkHttpClient.Builder): Unit = {
-        builder.dispatcher(dispatcher)
-      }
-    }
     logDebug("Kubernetes client config: " +
       new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(config))
-    new KubernetesClientBuilder()
-      .withHttpClientFactory(factoryWithCustomDispatcher)
-      .withConfig(config)
-      .build()
+    new KubernetesClientBuilder().withConfig(config).build()
   }
 
   private implicit class OptionConfigurableConfigBuilder(val configBuilder: ConfigBuilder)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/ExecutorKubernetesCredentialsFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/ExecutorKubernetesCredentialsFeatureStepSuite.scala
index 59cc7ac91d1ab..6a14711071b80 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/ExecutorKubernetesCredentialsFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/ExecutorKubernetesCredentialsFeatureStepSuite.scala
@@ -16,10 +16,11 @@
  */
 package org.apache.spark.deploy.k8s.features
 
+import io.fabric8.kubernetes.api.model.PodSpec
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesTestConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 
 class ExecutorKubernetesCredentialsFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
@@ -30,58 +31,40 @@ class ExecutorKubernetesCredentialsFeatureStepSuite extends SparkFunSuite with B
     baseConf = new SparkConf(false)
   }
 
-  private def newExecutorConf(environment: Map[String, String] = Map.empty):
-  KubernetesExecutorConf = {
-    KubernetesTestConf.createExecutorConf(
-      sparkConf = baseConf,
-      environment = environment)
-  }
-
   test("configure spark pod with executor service account") {
     baseConf.set(KUBERNETES_EXECUTOR_SERVICE_ACCOUNT_NAME, "executor-name")
-    val step = new ExecutorKubernetesCredentialsFeatureStep(newExecutorConf())
-    val spec = step
-      .configurePod(SparkPod.initialPod())
-      .pod
-      .getSpec
-
-    val serviceAccountName = spec.getServiceAccountName
-    val accountName = spec.getServiceAccount
-    assertSAName(serviceAccountName, accountName)
+    val spec = evaluateStep()
+    assertSAName("executor-name", spec)
   }
 
   test("configure spark pod with with driver service account " +
     "and without executor service account") {
     baseConf.set(KUBERNETES_DRIVER_SERVICE_ACCOUNT_NAME, "driver-name")
-    val step = new ExecutorKubernetesCredentialsFeatureStep(newExecutorConf())
-    val spec = step
-      .configurePod(SparkPod.initialPod())
-      .pod
-      .getSpec
-
-    val serviceAccountName = spec.getServiceAccountName
-    val accountName = spec.getServiceAccount
-    assertSAName(serviceAccountName, accountName)
+    val spec = evaluateStep()
+    assertSAName("driver-name", spec)
   }
 
   test("configure spark pod with with driver service account " +
     "and with executor service account") {
     baseConf.set(KUBERNETES_DRIVER_SERVICE_ACCOUNT_NAME, "driver-name")
     baseConf.set(KUBERNETES_EXECUTOR_SERVICE_ACCOUNT_NAME, "executor-name")
+    val spec = evaluateStep()
+    assertSAName("executor-name", spec)
+  }
+
+  private def assertSAName(expectedServiceAccountName: String,
+      spec: PodSpec): Unit = {
+    assert(spec.getServiceAccountName.equals(expectedServiceAccountName))
+    assert(spec.getServiceAccount.equals(expectedServiceAccountName))
+  }
 
-    val step = new ExecutorKubernetesCredentialsFeatureStep(newExecutorConf())
-    val spec = step
+  private def evaluateStep(): PodSpec = {
+    val executorConf = KubernetesTestConf.createExecutorConf(
+        sparkConf = baseConf)
+    val step = new ExecutorKubernetesCredentialsFeatureStep(executorConf)
+    step
       .configurePod(SparkPod.initialPod())
       .pod
       .getSpec
-
-    val serviceAccountName = spec.getServiceAccountName
-    val accountName = spec.getServiceAccount
-    assertSAName(serviceAccountName, accountName)
-  }
-
-  def assertSAName(serviceAccountName: String, accountName: String): Unit = {
-    assert(serviceAccountName.equals(serviceAccountName))
-    assert(accountName.equals(accountName))
   }
 }
diff --git a/resource-managers/kubernetes/core/volcano/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala b/resource-managers/kubernetes/core/volcano/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
index 314550713ef16..046d268df4e29 100644
--- a/resource-managers/kubernetes/core/volcano/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
+++ b/resource-managers/kubernetes/core/volcano/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
@@ -17,8 +17,8 @@
 package org.apache.spark.deploy.k8s.features
 
 import io.fabric8.kubernetes.api.model._
+import io.fabric8.volcano.api.model.scheduling.v1beta1.{PodGroup, PodGroupSpec}
 import io.fabric8.volcano.client.DefaultVolcanoClient
-import io.fabric8.volcano.scheduling.v1beta1.{PodGroup, PodGroupSpec}
 
 import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverConf, KubernetesExecutorConf, SparkPod}
 import org.apache.spark.internal.Logging
diff --git a/resource-managers/kubernetes/core/volcano/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala b/resource-managers/kubernetes/core/volcano/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
index dab414e0e19e7..c92164993ef80 100644
--- a/resource-managers/kubernetes/core/volcano/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/volcano/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
@@ -18,7 +18,7 @@ package org.apache.spark.deploy.k8s.features
 
 import java.io.File
 
-import io.fabric8.volcano.scheduling.v1beta1.PodGroup
+import io.fabric8.volcano.api.model.scheduling.v1beta1.PodGroup
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index 5b80fe10596c1..36848cba0609e 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -330,11 +330,11 @@ You can also specify your specific dockerfile to build JVM/Python/R based image
 
 ## Requirements
 - A minimum of 6 CPUs and 9G of memory is required to complete all Volcano test cases.
-- Volcano v1.9.0.
+- Volcano v1.10.0.
 
 ## Installation
 
-    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.9.0/installer/volcano-development.yaml
+    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.10.0/installer/volcano-development.yaml
 
 ## Run tests
 
@@ -355,5 +355,5 @@ You can also specify `volcano` tag to only run Volcano test:
 
 ## Cleanup Volcano
 
-    kubectl delete -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.9.0/installer/volcano-development.yaml
+    kubectl delete -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.10.0/installer/volcano-development.yaml
 
diff --git a/resource-managers/kubernetes/integration-tests/volcano/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala b/resource-managers/kubernetes/integration-tests/volcano/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
index 2f414b72ee195..4a9dc135ecfdc 100644
--- a/resource-managers/kubernetes/integration-tests/volcano/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/volcano/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
@@ -28,8 +28,8 @@ import scala.concurrent.Future
 import scala.jdk.CollectionConverters._
 
 import io.fabric8.kubernetes.api.model.{HasMetadata, Pod, Quantity}
+import io.fabric8.volcano.api.model.scheduling.v1beta1.{Queue, QueueBuilder}
 import io.fabric8.volcano.client.VolcanoClient
-import io.fabric8.volcano.scheduling.v1beta1.{Queue, QueueBuilder}
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index c86195d0ef31e..911ce2e27f96d 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -529,7 +529,7 @@ private[yarn] class YarnAllocator(
             log"${MDC(LogKeys.MEMORY_SIZE, resource.getMemorySize)} MB memory."
           if (resource.getResources.nonEmpty) {
             requestContainerMessage = requestContainerMessage +
-              log" with custom resources: ${MDC(LogKeys.RESOURCE, resource)}"
+              log" with custom resources: ${MDC(LogKeys.YARN_RESOURCE, resource)}"
           }
           logInfo(requestContainerMessage)
         }
@@ -820,6 +820,7 @@ private[yarn] class YarnAllocator(
         logInfo(log"Skip launching executorRunnable as running executors count: " +
           log"${MDC(LogKeys.COUNT, rpRunningExecs)} reached target executors count: " +
           log"${MDC(LogKeys.NUM_EXECUTOR_TARGET, getOrUpdateTargetNumExecutorsForRPId(rpId))}.")
+        internalReleaseContainer(container)
       }
     }
   }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 92d9f2d62d1c1..71843b7f90b1f 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -293,7 +293,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
   }
 
   test("running Spark in yarn-cluster mode displays driver log links") {
-    val log4jConf = new File(tempDir, "log4j.properties")
+    val log4jConf = new File(tempDir, "log4j2.properties")
     val logOutFile = new File(tempDir, "logs")
     Files.asCharSink(log4jConf, StandardCharsets.UTF_8).write(
       s"""rootLogger.level = debug
diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh
index 0bea4a45040ed..814e17f147129 100755
--- a/sbin/spark-config.sh
+++ b/sbin/spark-config.sh
@@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
 # Add the PySpark classes to the PYTHONPATH:
 if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then
   export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
-  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:${PYTHONPATH}"
+  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.9-src.zip:${PYTHONPATH}"
   export PYSPARK_PYTHONPATH_SET=1
 fi
diff --git a/sbin/start-connect-server.sh b/sbin/start-connect-server.sh
index 668423bad1cbb..7f0c430a468a9 100755
--- a/sbin/start-connect-server.sh
+++ b/sbin/start-connect-server.sh
@@ -33,7 +33,7 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   echo "Usage: ./sbin/start-connect-server.sh [--wait] [options]"
 
   "${SPARK_HOME}"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
-  exit 1
+  exit 0
 fi
 
 . "${SPARK_HOME}/bin/load-spark-env.sh"
diff --git a/sbin/start-history-server.sh b/sbin/start-history-server.sh
index 71dace47767cb..a99c8e557885b 100755
--- a/sbin/start-history-server.sh
+++ b/sbin/start-history-server.sh
@@ -40,7 +40,7 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   pattern+="\|Registered signal handler for"
 
   "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
-  exit 1
+  exit 0
 fi
 
 . "${SPARK_HOME}/sbin/spark-config.sh"
diff --git a/sbin/start-master.sh b/sbin/start-master.sh
index 36fe4b4abeb91..25e739132f0d5 100755
--- a/sbin/start-master.sh
+++ b/sbin/start-master.sh
@@ -35,7 +35,7 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   pattern+="\|Registered signal handler for"
 
   "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
-  exit 1
+  exit 0
 fi
 
 ORIGINAL_ARGS="$@"
diff --git a/sbin/start-thriftserver.sh b/sbin/start-thriftserver.sh
index b1d38713218b7..a457526979341 100755
--- a/sbin/start-thriftserver.sh
+++ b/sbin/start-thriftserver.sh
@@ -52,7 +52,7 @@ function usage {
 
 if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   usage
-  exit 1
+  exit 0
 fi
 
 export SUBMIT_USAGE_FUNCTION=usage
diff --git a/sbin/start-worker.sh b/sbin/start-worker.sh
index fd58f01bac2eb..c0147a51b3f2c 100755
--- a/sbin/start-worker.sh
+++ b/sbin/start-worker.sh
@@ -47,7 +47,7 @@ if [[ $# -lt 1 ]] || [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   pattern+="\|Registered signal handler for"
 
   "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
-  exit 1
+  [[ $# -lt 1 ]] && exit 1 || exit 0
 fi
 
 . "${SPARK_HOME}/sbin/spark-config.sh"
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 05b3f6a268985..7e64dc9be6731 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -460,33 +460,6 @@ This file is divided into 3 sections:
     <parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
   </check>
 
-  <check customId="GuavaToStringHelper" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Objects.toStringHelper</parameter></parameters>
-    <customMessage>Avoid using Object.toStringHelper. Use ToStringBuilder instead.</customMessage>
-  </check>
-
-  <check customId="GuavaFilesCreateTempDir" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">Files\.createTempDir\(</parameter></parameters>
-    <customMessage>Avoid using com.google.common.io.Files.createTempDir due to CVE-2020-8908.
-      Use org.apache.spark.util.Utils.createTempDir instead.
-    </customMessage>
-  </check>
-
-  <check customId="GuavaFileBackedOutputStream" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">FileBackedOutputStream</parameter></parameters>
-    <customMessage>Avoid using FileBackedOutputStream due to CVE-2023-2976.</customMessage>
-  </check>
-
-  <check customId="GuavaAtomicDoubleArray" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">AtomicDoubleArray</parameter></parameters>
-    <customMessage>Avoid using AtomicDoubleArray due to CVE-2018-10237.</customMessage>
-  </check>
-
-  <check customId="GuavaCompoundOrdering" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
-    <parameters><parameter name="regex">CompoundOrdering</parameter></parameters>
-    <customMessage>Avoid using CompoundOrdering due to CVE-2018-10237.</customMessage>
-  </check>
-
   <check customId="byteCountToDisplaySize" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
     <parameters><parameter name="regex">byteCountToDisplaySize</parameter></parameters>
     <customMessage>Use Utils.bytesToString instead of byteCountToDisplaySize for consistency.</customMessage>
diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index eeebe89de8ff1..dafeed48aef11 100644
--- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -283,6 +283,7 @@ IS: 'IS';
 ITEMS: 'ITEMS';
 ITERATE: 'ITERATE';
 JOIN: 'JOIN';
+JSON: 'JSON';
 KEYS: 'KEYS';
 LANGUAGE: 'LANGUAGE';
 LAST: 'LAST';
@@ -365,6 +366,7 @@ REAL: 'REAL';
 RECORDREADER: 'RECORDREADER';
 RECORDWRITER: 'RECORDWRITER';
 RECOVER: 'RECOVER';
+RECURSIVE: 'RECURSIVE';
 REDUCE: 'REDUCE';
 REFERENCES: 'REFERENCES';
 REFRESH: 'REFRESH';
diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index cdee8c906054d..667d200268cf8 100644
--- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -48,15 +48,15 @@ compoundOrSingleStatement
     ;
 
 singleCompoundStatement
-    : BEGIN compoundBody END SEMICOLON? EOF
+    : BEGIN compoundBody? END SEMICOLON? EOF
     ;
 
 beginEndCompoundBlock
-    : beginLabel? BEGIN compoundBody END endLabel?
+    : beginLabel? BEGIN compoundBody? END endLabel?
     ;
 
 compoundBody
-    : (compoundStatements+=compoundStatement SEMICOLON)*
+    : (compoundStatements+=compoundStatement SEMICOLON)+
     ;
 
 compoundStatement
@@ -70,6 +70,7 @@ compoundStatement
     | leaveStatement
     | iterateStatement
     | loopStatement
+    | forStatement
     ;
 
 setStatementWithOptionalVarKeyword
@@ -111,6 +112,10 @@ loopStatement
     : beginLabel? LOOP compoundBody END LOOP endLabel?
     ;
 
+forStatement
+    : beginLabel? FOR (multipartIdentifier AS)? query DO compoundBody END FOR endLabel?
+    ;
+
 singleStatement
     : (statement|setResetStatement) SEMICOLON* EOF
     ;
@@ -231,6 +236,7 @@ statement
     | ALTER TABLE identifierReference RECOVER PARTITIONS                 #recoverPartitions
     | ALTER TABLE identifierReference
         (clusterBySpec | CLUSTER BY NONE)                              #alterClusterBy
+    | ALTER TABLE identifierReference collationSpec                    #alterTableCollation
     | DROP TABLE (IF EXISTS)? identifierReference PURGE?               #dropTable
     | DROP VIEW (IF EXISTS)? identifierReference                       #dropView
     | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)?
@@ -238,6 +244,7 @@ statement
         identifierCommentList?
         (commentSpec |
          schemaBinding |
+         collationSpec |
          (PARTITIONED ON identifierList) |
          (TBLPROPERTIES propertyList))*
         AS query                                                       #createView
@@ -280,7 +287,7 @@ statement
     | (DESC | DESCRIBE) namespace EXTENDED?
         identifierReference                                            #describeNamespace
     | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)?
-        identifierReference partitionSpec? describeColName?            #describeRelation
+        identifierReference partitionSpec? describeColName? (AS JSON)? #describeRelation
     | (DESC | DESCRIBE) QUERY? query                                   #describeQuery
     | COMMENT ON namespace identifierReference IS
         comment                                                        #commentNamespace
@@ -502,7 +509,7 @@ describeColName
     ;
 
 ctes
-    : WITH namedQuery (COMMA namedQuery)*
+    : WITH RECURSIVE? namedQuery (COMMA namedQuery)*
     ;
 
 namedQuery
@@ -523,6 +530,7 @@ createTableClauses
      createFileFormat |
      locationSpec |
      commentSpec |
+     collationSpec |
      (TBLPROPERTIES tableProps=propertyList))*
     ;
 
@@ -643,7 +651,7 @@ sortItem
     ;
 
 fromStatement
-    : fromClause fromStatementBody+
+    : fromClause fromStatementBody*
     ;
 
 fromStatementBody
@@ -1227,8 +1235,12 @@ colPosition
     : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier
     ;
 
+collationSpec
+    : DEFAULT COLLATION collationName=identifier
+    ;
+
 collateClause
-    : COLLATE collationName=identifier
+    : COLLATE collationName=multipartIdentifier
     ;
 
 type
@@ -1504,6 +1516,9 @@ version
 operatorPipeRightSide
     : selectClause windowClause?
     | EXTEND extendList=namedExpressionSeq
+    | SET operatorPipeSetAssignmentSeq
+    | DROP identifierSeq
+    | AS errorCapturingIdentifier
     // Note that the WINDOW clause is not allowed in the WHERE pipe operator, but we add it here in
     // the grammar simply for purposes of catching this invalid syntax and throwing a specific
     // dedicated error message.
@@ -1515,11 +1530,20 @@ operatorPipeRightSide
     | unpivotClause pivotClause?
     | sample
     | joinRelation
-    | operator=(UNION | EXCEPT | SETMINUS | INTERSECT) setQuantifier? right=queryTerm
+    | operator=(UNION | EXCEPT | SETMINUS | INTERSECT) setQuantifier? right=queryPrimary
     | queryOrganization
     | AGGREGATE namedExpressionSeq? aggregationClause?
     ;
 
+operatorPipeSetAssignmentSeq
+    : ident+=errorCapturingIdentifier
+        (DOT errorCapturingIdentifier)*  // This is invalid syntax; we just capture it here.
+        EQ expression
+        (COMMA ident+=errorCapturingIdentifier
+          (DOT errorCapturingIdentifier)*  // This is invalid syntax; we just capture it here.
+          EQ expression)*
+    ;
+
 // When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
 // - Reserved keywords:
 //     Keywords that are reserved and can't be used as identifiers for table, view, column,
@@ -1656,6 +1680,7 @@ ansiNonReserved
     | INVOKER
     | ITEMS
     | ITERATE
+    | JSON
     | KEYS
     | LANGUAGE
     | LAST
@@ -2015,6 +2040,7 @@ nonReserved
     | IS
     | ITEMS
     | ITERATE
+    | JSON
     | KEYS
     | LANGUAGE
     | LAST
@@ -2094,6 +2120,7 @@ nonReserved
     | RECORDREADER
     | RECORDWRITER
     | RECOVER
+    | RECURSIVE
     | REDUCE
     | REFERENCES
     | REFRESH
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Column.scala b/sql/api/src/main/scala/org/apache/spark/sql/Column.scala
index 8498ae04d9a2a..f13b340e5e9c8 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/Column.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.parser.DataTypeParser
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions.{lit, map}
-import org.apache.spark.sql.internal.{ColumnNode, LazyOuterReference, UnresolvedAttribute}
+import org.apache.spark.sql.internal.ColumnNode
 import org.apache.spark.sql.types._
 import org.apache.spark.util.ArrayImplicits._
 
@@ -137,7 +137,7 @@ class TypedColumn[-T, U](node: ColumnNode, private[sql] val encoder: Encoder[U])
  * @since 1.3.0
  */
 @Stable
-class Column(val node: ColumnNode) extends Logging {
+class Column(val node: ColumnNode) extends Logging with TableValuedFunctionArgument {
   private[sql] def this(name: String, planId: Option[Long]) = this(withOrigin {
     name match {
       case "*" => internal.UnresolvedStar(None, planId)
@@ -1383,20 +1383,27 @@ class Column(val node: ColumnNode) extends Logging {
   def over(): Column = over(Window.spec)
 
   /**
-   * Marks this column reference as an outer reference for subqueries.
+   * Mark this column as an outer column if its expression refers to columns from an outer query.
+   * This is used to trigger lazy analysis of Spark Classic DataFrame, so that we can use it to
+   * build subquery expressions. Spark Connect DataFrame is always lazily analyzed and does not
+   * need to use this function.
    *
-   * @group subquery
+   * {{{
+   *   // Spark can't analyze this `df` now as it doesn't know how to resolve `t1.col`.
+   *   val df = spark.table("t2").where($"t2.col" === $"t1.col".outer())
+   *
+   *   // Since this `df` is lazily analyzed, you won't see any error until you try to execute it.
+   *   df.collect()  // Fails with UNRESOLVED_COLUMN error.
+   *
+   *   // Now Spark can resolve `t1.col` with the outer plan `spark.table("t1")`.
+   *   spark.table("t1").where(df.exists())
+   * }}}
+   *
+   * @group expr_ops
    * @since 4.0.0
    */
-  def outer(): Column = withOrigin {
-    node match {
-      case attr: UnresolvedAttribute if !attr.isMetadataColumn =>
-        Column(LazyOuterReference(attr.nameParts, attr.planId))
-      case _ =>
-        throw new IllegalArgumentException(
-          "Only unresolved attributes can be used as outer references")
-    }
-  }
+  def outer(): Column = Column(internal.LazyExpression(node))
+
 }
 
 /**
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
index 9976b34f7a01f..4957d76af9a29 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -81,6 +81,20 @@ object Encoders {
    */
   def DOUBLE: Encoder[java.lang.Double] = BoxedDoubleEncoder
 
+  /**
+   * An encoder for nullable char type.
+   *
+   * @since 4.0.0
+   */
+  def CHAR(length: Int): Encoder[java.lang.String] = CharEncoder(length)
+
+  /**
+   * An encoder for nullable varchar type.
+   *
+   * @since 4.0.0
+   */
+  def VARCHAR(length: Int): Encoder[java.lang.String] = VarcharEncoder(length)
+
   /**
    * An encoder for nullable string type.
    *
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
index 9e6e0e97f0302..091fbf20a0a7f 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
@@ -17,6 +17,7 @@
 package org.apache.spark.sql
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.internal.config.{ConfigEntry, OptionalConfigEntry}
 
 /**
  * Runtime configuration interface for Spark. To access this, use `SparkSession.conf`.
@@ -53,6 +54,11 @@ abstract class RuntimeConfig {
     set(key, value.toString)
   }
 
+  /**
+   * Sets the given Spark runtime configuration property.
+   */
+  private[sql] def set[T](entry: ConfigEntry[T], value: T): Unit
+
   /**
    * Returns the value of Spark runtime configuration property for the given key. If the key is
    * not set yet, return its default value if possible, otherwise `NoSuchElementException` will be
@@ -74,6 +80,25 @@ abstract class RuntimeConfig {
    */
   def get(key: String, default: String): String
 
+  /**
+   * Returns the value of Spark runtime configuration property for the given key. If the key is
+   * not set yet, return `defaultValue` in [[ConfigEntry]].
+   */
+  @throws[NoSuchElementException]("if the key is not set")
+  private[sql] def get[T](entry: ConfigEntry[T]): T
+
+  /**
+   * Returns the value of Spark runtime configuration property for the given key. If the key is
+   * not set yet, return None.
+   */
+  private[sql] def get[T](entry: OptionalConfigEntry[T]): Option[T]
+
+  /**
+   * Returns the value of Spark runtime configuration property for the given key. If the key is
+   * not set yet, return the user given `default`.
+   */
+  private[sql] def get[T](entry: ConfigEntry[T], default: T): T
+
   /**
    * Returns all properties set in this conf.
    *
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/TableValuedFunctionArgument.scala b/sql/api/src/main/scala/org/apache/spark/sql/TableValuedFunctionArgument.scala
new file mode 100644
index 0000000000000..f99c4ecd48554
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/TableValuedFunctionArgument.scala
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+trait TableValuedFunctionArgument
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/Dataset.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/Dataset.scala
index 9d41998f11dc6..20c181e7b9cf6 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/api/Dataset.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/Dataset.scala
@@ -859,6 +859,60 @@ abstract class Dataset[T] extends Serializable {
     joinWith(other, condition, "inner")
   }
 
+  /**
+   * Lateral join with another `DataFrame`.
+   *
+   * Behaves as an JOIN LATERAL.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @group untypedrel
+   * @since 4.0.0
+   */
+  def lateralJoin(right: DS[_]): Dataset[Row]
+
+  /**
+   * Lateral join with another `DataFrame`.
+   *
+   * Behaves as an JOIN LATERAL.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param joinExprs
+   *   Join expression.
+   * @group untypedrel
+   * @since 4.0.0
+   */
+  def lateralJoin(right: DS[_], joinExprs: Column): Dataset[Row]
+
+  /**
+   * Lateral join with another `DataFrame`.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param joinType
+   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `left`,
+   *   `leftouter`, `left_outer`.
+   * @group untypedrel
+   * @since 4.0.0
+   */
+  def lateralJoin(right: DS[_], joinType: String): Dataset[Row]
+
+  /**
+   * Lateral join with another `DataFrame`.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param joinExprs
+   *   Join expression.
+   * @param joinType
+   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `left`,
+   *   `leftouter`, `left_outer`.
+   * @group untypedrel
+   * @since 4.0.0
+   */
+  def lateralJoin(right: DS[_], joinExprs: Column, joinType: String): Dataset[Row]
+
   protected def sortInternal(global: Boolean, sortExprs: Seq[Column]): Dataset[T]
 
   /**
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/SQLContext.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/SQLContext.scala
new file mode 100644
index 0000000000000..50590fffa1521
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/SQLContext.scala
@@ -0,0 +1,1022 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api
+
+import scala.collection.immutable
+import scala.reflect.runtime.universe.TypeTag
+
+import _root_.java.util.{List => JList, Map => JMap, Properties}
+
+import org.apache.spark.SparkContext
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Stable, Unstable}
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.internal.Logging
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Encoder, Encoders, ExperimentalMethods, Row}
+import org.apache.spark.sql.api.SQLImplicits
+import org.apache.spark.sql.catalog.Table
+import org.apache.spark.sql.functions.{array_size, coalesce, col, lit, when}
+import org.apache.spark.sql.sources.BaseRelation
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.ExecutionListenerManager
+
+/**
+ * The entry point for working with structured data (rows and columns) in Spark 1.x.
+ *
+ * As of Spark 2.0, this is replaced by [[SparkSession]]. However, we are keeping the class here
+ * for backward compatibility.
+ *
+ * @groupname basic Basic Operations
+ * @groupname ddl_ops Persistent Catalog DDL
+ * @groupname cachemgmt Cached Table Management
+ * @groupname genericdata Generic Data Sources
+ * @groupname specificdata Specific Data Sources
+ * @groupname config Configuration
+ * @groupname dataframes Custom DataFrame Creation
+ * @groupname dataset Custom Dataset Creation
+ * @groupname Ungrouped Support functions for language integrated queries
+ * @since 1.0.0
+ */
+@Stable
+abstract class SQLContext private[sql] (val sparkSession: SparkSession)
+    extends Logging
+    with Serializable {
+
+  // Note: Since Spark 2.0 this class has become a wrapper of SparkSession, where the
+  // real functionality resides. This class remains mainly for backward compatibility.
+
+  def sparkContext: SparkContext = sparkSession.sparkContext
+
+  /**
+   * Returns a [[SQLContext]] as new session, with separated SQL configurations, temporary tables,
+   * registered functions, but sharing the same `SparkContext`, cached data and other things.
+   *
+   * @since 1.6.0
+   */
+  def newSession(): SQLContext
+
+  /**
+   * An interface to register custom QueryExecutionListener that listen for execution metrics.
+   */
+  def listenerManager: ExecutionListenerManager
+
+  /**
+   * Set Spark SQL configuration properties.
+   *
+   * @group config
+   * @since 1.0.0
+   */
+  def setConf(props: Properties): Unit
+
+  /**
+   * Set the given Spark SQL configuration property.
+   *
+   * @group config
+   * @since 1.0.0
+   */
+  def setConf(key: String, value: String): Unit = {
+    sparkSession.conf.set(key, value)
+  }
+
+  /**
+   * Return the value of Spark SQL configuration property for the given key.
+   *
+   * @group config
+   * @since 1.0.0
+   */
+  def getConf(key: String): String = {
+    sparkSession.conf.get(key)
+  }
+
+  /**
+   * Return the value of Spark SQL configuration property for the given key. If the key is not set
+   * yet, return `defaultValue`.
+   *
+   * @group config
+   * @since 1.0.0
+   */
+  def getConf(key: String, defaultValue: String): String = {
+    sparkSession.conf.get(key, defaultValue)
+  }
+
+  /**
+   * Return all the configuration properties that have been set (i.e. not the default). This
+   * creates a new copy of the config properties in the form of a Map.
+   *
+   * @group config
+   * @since 1.0.0
+   */
+  def getAllConfs: immutable.Map[String, String] = {
+    sparkSession.conf.getAll
+  }
+
+  /**
+   * :: Experimental :: A collection of methods that are considered experimental, but can be used
+   * to hook into the query planner for advanced functionality.
+   *
+   * @group basic
+   * @since 1.3.0
+   */
+  @Experimental
+  @transient
+  @Unstable
+  def experimental: ExperimentalMethods
+
+  /**
+   * Returns a `DataFrame` with no rows or columns.
+   *
+   * @group basic
+   * @since 1.3.0
+   */
+  def emptyDataFrame: Dataset[Row] = sparkSession.emptyDataFrame
+
+  /**
+   * A collection of methods for registering user-defined functions (UDF).
+   *
+   * The following example registers a Scala closure as UDF:
+   * {{{
+   *   sqlContext.udf.register("myUDF", (arg1: Int, arg2: String) => arg2 + arg1)
+   * }}}
+   *
+   * The following example registers a UDF in Java:
+   * {{{
+   *   sqlContext.udf().register("myUDF",
+   *       (Integer arg1, String arg2) -> arg2 + arg1,
+   *       DataTypes.StringType);
+   * }}}
+   *
+   * @note
+   *   The user-defined functions must be deterministic. Due to optimization, duplicate
+   *   invocations may be eliminated or the function may even be invoked more times than it is
+   *   present in the query.
+   *
+   * @group basic
+   * @since 1.3.0
+   */
+  def udf: UDFRegistration
+
+  /**
+   * (Scala-specific) Implicit methods available in Scala for converting common Scala objects into
+   * `DataFrame`s.
+   *
+   * {{{
+   *   val sqlContext = new SQLContext(sc)
+   *   import sqlContext.implicits._
+   * }}}
+   *
+   * @group basic
+   * @since 1.3.0
+   */
+  val implicits: SQLImplicits
+
+  /**
+   * Returns true if the table is currently cached in-memory.
+   * @group cachemgmt
+   * @since 1.3.0
+   */
+  def isCached(tableName: String): Boolean = {
+    sparkSession.catalog.isCached(tableName)
+  }
+
+  /**
+   * Caches the specified table in-memory.
+   * @group cachemgmt
+   * @since 1.3.0
+   */
+  def cacheTable(tableName: String): Unit = {
+    sparkSession.catalog.cacheTable(tableName)
+  }
+
+  /**
+   * Removes the specified table from the in-memory cache.
+   * @group cachemgmt
+   * @since 1.3.0
+   */
+  def uncacheTable(tableName: String): Unit = {
+    sparkSession.catalog.uncacheTable(tableName)
+  }
+
+  /**
+   * Removes all cached tables from the in-memory cache.
+   * @since 1.3.0
+   */
+  def clearCache(): Unit = {
+    sparkSession.catalog.clearCache()
+  }
+
+  /**
+   * Creates a DataFrame from an RDD of Product (e.g. case classes, tuples).
+   *
+   * @group dataframes
+   * @since 1.3.0
+   */
+  def createDataFrame[A <: Product: TypeTag](rdd: RDD[A]): Dataset[Row] = {
+    sparkSession.createDataFrame(rdd)
+  }
+
+  /**
+   * Creates a DataFrame from a local Seq of Product.
+   *
+   * @group dataframes
+   * @since 1.3.0
+   */
+  def createDataFrame[A <: Product: TypeTag](data: Seq[A]): Dataset[Row] = {
+    sparkSession.createDataFrame(data)
+  }
+
+  /**
+   * Convert a `BaseRelation` created for external data sources into a `DataFrame`.
+   *
+   * @group dataframes
+   * @since 1.3.0
+   */
+  def baseRelationToDataFrame(baseRelation: BaseRelation): Dataset[Row] = {
+    sparkSession.baseRelationToDataFrame(baseRelation)
+  }
+
+  /**
+   * :: DeveloperApi :: Creates a `DataFrame` from an `RDD` containing
+   * [[org.apache.spark.sql.Row Row]]s using the given schema. It is important to make sure that
+   * the structure of every [[org.apache.spark.sql.Row Row]] of the provided RDD matches the
+   * provided schema. Otherwise, there will be runtime exception. Example:
+   * {{{
+   *  import org.apache.spark.sql._
+   *  import org.apache.spark.sql.types._
+   *  val sqlContext = new org.apache.spark.sql.SQLContext(sc)
+   *
+   *  val schema =
+   *    StructType(
+   *      StructField("name", StringType, false) ::
+   *      StructField("age", IntegerType, true) :: Nil)
+   *
+   *  val people =
+   *    sc.textFile("examples/src/main/resources/people.txt").map(
+   *      _.split(",")).map(p => Row(p(0), p(1).trim.toInt))
+   *  val dataFrame = sqlContext.createDataFrame(people, schema)
+   *  dataFrame.printSchema
+   *  // root
+   *  // |-- name: string (nullable = false)
+   *  // |-- age: integer (nullable = true)
+   *
+   *  dataFrame.createOrReplaceTempView("people")
+   *  sqlContext.sql("select name from people").collect.foreach(println)
+   * }}}
+   *
+   * @group dataframes
+   * @since 1.3.0
+   */
+  @DeveloperApi
+  def createDataFrame(rowRDD: RDD[Row], schema: StructType): Dataset[Row] = {
+    sparkSession.createDataFrame(rowRDD, schema)
+  }
+
+  /**
+   * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an
+   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL
+   * representation) that is generally created automatically through implicits from a
+   * `SparkSession`, or can be created explicitly by calling static methods on
+   * [[org.apache.spark.sql.Encoders Encoders]].
+   *
+   * ==Example==
+   *
+   * {{{
+   *
+   *   import spark.implicits._
+   *   case class Person(name: String, age: Long)
+   *   val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19))
+   *   val ds = spark.createDataset(data)
+   *
+   *   ds.show()
+   *   // +-------+---+
+   *   // |   name|age|
+   *   // +-------+---+
+   *   // |Michael| 29|
+   *   // |   Andy| 30|
+   *   // | Justin| 19|
+   *   // +-------+---+
+   * }}}
+   *
+   * @since 2.0.0
+   * @group dataset
+   */
+  def createDataset[T: Encoder](data: Seq[T]): Dataset[T] = {
+    sparkSession.createDataset(data)
+  }
+
+  /**
+   * Creates a [[Dataset]] from an RDD of a given type. This method requires an encoder (to
+   * convert a JVM object of type `T` to and from the internal Spark SQL representation) that is
+   * generally created automatically through implicits from a `SparkSession`, or can be created
+   * explicitly by calling static methods on [[org.apache.spark.sql.Encoders Encoders]].
+   *
+   * @since 2.0.0
+   * @group dataset
+   */
+  def createDataset[T: Encoder](data: RDD[T]): Dataset[T] = {
+    sparkSession.createDataset(data)
+  }
+
+  /**
+   * Creates a [[Dataset]] from a `JList` of a given type. This method requires an encoder (to
+   * convert a JVM object of type `T` to and from the internal Spark SQL representation) that is
+   * generally created automatically through implicits from a `SparkSession`, or can be created
+   * explicitly by calling static methods on [[org.apache.spark.sql.Encoders Encoders]].
+   *
+   * ==Java Example==
+   *
+   * {{{
+   *     List<String> data = Arrays.asList("hello", "world");
+   *     Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
+   * }}}
+   *
+   * @since 2.0.0
+   * @group dataset
+   */
+  def createDataset[T: Encoder](data: JList[T]): Dataset[T] = {
+    sparkSession.createDataset(data)
+  }
+
+  /**
+   * :: DeveloperApi :: Creates a `DataFrame` from a `JavaRDD` containing
+   * [[org.apache.spark.sql.Row Row]]s using the given schema. It is important to make sure that
+   * the structure of every [[org.apache.spark.sql.Row Row]] of the provided RDD matches the
+   * provided schema. Otherwise, there will be runtime exception.
+   *
+   * @group dataframes
+   * @since 1.3.0
+   */
+  @DeveloperApi
+  def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): Dataset[Row] = {
+    sparkSession.createDataFrame(rowRDD, schema)
+  }
+
+  /**
+   * :: DeveloperApi :: Creates a `DataFrame` from a `JList` containing
+   * [[org.apache.spark.sql.Row Row]]s using the given schema. It is important to make sure that
+   * the structure of every [[org.apache.spark.sql.Row Row]] of the provided List matches the
+   * provided schema. Otherwise, there will be runtime exception.
+   *
+   * @group dataframes
+   * @since 1.6.0
+   */
+  @DeveloperApi
+  def createDataFrame(rows: JList[Row], schema: StructType): Dataset[Row] = {
+    sparkSession.createDataFrame(rows, schema)
+  }
+
+  /**
+   * Applies a schema to an RDD of Java Beans.
+   *
+   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, SELECT * queries
+   * will return the columns in an undefined order.
+   * @group dataframes
+   * @since 1.3.0
+   */
+  def createDataFrame(rdd: RDD[_], beanClass: Class[_]): Dataset[Row] = {
+    sparkSession.createDataFrame(rdd, beanClass)
+  }
+
+  /**
+   * Applies a schema to an RDD of Java Beans.
+   *
+   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, SELECT * queries
+   * will return the columns in an undefined order.
+   * @group dataframes
+   * @since 1.3.0
+   */
+  def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): Dataset[Row] = {
+    sparkSession.createDataFrame(rdd, beanClass)
+  }
+
+  /**
+   * Applies a schema to a List of Java Beans.
+   *
+   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, SELECT * queries
+   * will return the columns in an undefined order.
+   * @group dataframes
+   * @since 1.6.0
+   */
+  def createDataFrame(data: JList[_], beanClass: Class[_]): Dataset[Row] = {
+    sparkSession.createDataFrame(data, beanClass)
+  }
+
+  /**
+   * Returns a [[DataFrameReader]] that can be used to read non-streaming data in as a
+   * `DataFrame`.
+   * {{{
+   *   sqlContext.read.parquet("/path/to/file.parquet")
+   *   sqlContext.read.schema(schema).json("/path/to/file.json")
+   * }}}
+   *
+   * @group genericdata
+   * @since 1.4.0
+   */
+  def read: DataFrameReader
+
+  /**
+   * Returns a `DataStreamReader` that can be used to read streaming data in as a `DataFrame`.
+   * {{{
+   *   sparkSession.readStream.parquet("/path/to/directory/of/parquet/files")
+   *   sparkSession.readStream.schema(schema).json("/path/to/directory/of/json/files")
+   * }}}
+   *
+   * @since 2.0.0
+   */
+  def readStream: DataStreamReader
+
+  /**
+   * Creates an external table from the given path and returns the corresponding DataFrame. It
+   * will use the default data source configured by spark.sql.sources.default.
+   *
+   * @group ddl_ops
+   * @since 1.3.0
+   */
+  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
+  def createExternalTable(tableName: String, path: String): Dataset[Row] = {
+    sparkSession.catalog.createTable(tableName, path)
+  }
+
+  /**
+   * Creates an external table from the given path based on a data source and returns the
+   * corresponding DataFrame.
+   *
+   * @group ddl_ops
+   * @since 1.3.0
+   */
+  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
+  def createExternalTable(tableName: String, path: String, source: String): Dataset[Row] = {
+    sparkSession.catalog.createTable(tableName, path, source)
+  }
+
+  /**
+   * Creates an external table from the given path based on a data source and a set of options.
+   * Then, returns the corresponding DataFrame.
+   *
+   * @group ddl_ops
+   * @since 1.3.0
+   */
+  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
+  def createExternalTable(
+      tableName: String,
+      source: String,
+      options: JMap[String, String]): Dataset[Row] = {
+    sparkSession.catalog.createTable(tableName, source, options)
+  }
+
+  /**
+   * (Scala-specific) Creates an external table from the given path based on a data source and a
+   * set of options. Then, returns the corresponding DataFrame.
+   *
+   * @group ddl_ops
+   * @since 1.3.0
+   */
+  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
+  def createExternalTable(
+      tableName: String,
+      source: String,
+      options: Map[String, String]): Dataset[Row] = {
+    sparkSession.catalog.createTable(tableName, source, options)
+  }
+
+  /**
+   * Create an external table from the given path based on a data source, a schema and a set of
+   * options. Then, returns the corresponding DataFrame.
+   *
+   * @group ddl_ops
+   * @since 1.3.0
+   */
+  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
+  def createExternalTable(
+      tableName: String,
+      source: String,
+      schema: StructType,
+      options: JMap[String, String]): Dataset[Row] = {
+    sparkSession.catalog.createTable(tableName, source, schema, options)
+  }
+
+  /**
+   * (Scala-specific) Create an external table from the given path based on a data source, a
+   * schema and a set of options. Then, returns the corresponding DataFrame.
+   *
+   * @group ddl_ops
+   * @since 1.3.0
+   */
+  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
+  def createExternalTable(
+      tableName: String,
+      source: String,
+      schema: StructType,
+      options: Map[String, String]): Dataset[Row] = {
+    sparkSession.catalog.createTable(tableName, source, schema, options)
+  }
+
+  /**
+   * Drops the temporary table with the given table name in the catalog. If the table has been
+   * cached/persisted before, it's also unpersisted.
+   *
+   * @param tableName
+   *   the name of the table to be unregistered.
+   * @group basic
+   * @since 1.3.0
+   */
+  def dropTempTable(tableName: String): Unit = {
+    sparkSession.catalog.dropTempView(tableName)
+  }
+
+  /**
+   * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements in a
+   * range from 0 to `end` (exclusive) with step value 1.
+   *
+   * @since 1.4.1
+   * @group dataframe
+   */
+  def range(end: Long): Dataset[Row] = sparkSession.range(end).toDF()
+
+  /**
+   * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements in a
+   * range from `start` to `end` (exclusive) with step value 1.
+   *
+   * @since 1.4.0
+   * @group dataframe
+   */
+  def range(start: Long, end: Long): Dataset[Row] = sparkSession.range(start, end).toDF()
+
+  /**
+   * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements in a
+   * range from `start` to `end` (exclusive) with a step value.
+   *
+   * @since 2.0.0
+   * @group dataframe
+   */
+  def range(start: Long, end: Long, step: Long): Dataset[Row] = {
+    sparkSession.range(start, end, step).toDF()
+  }
+
+  /**
+   * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements in an
+   * range from `start` to `end` (exclusive) with an step value, with partition number specified.
+   *
+   * @since 1.4.0
+   * @group dataframe
+   */
+  def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[Row] = {
+    sparkSession.range(start, end, step, numPartitions).toDF()
+  }
+
+  /**
+   * Executes a SQL query using Spark, returning the result as a `DataFrame`. This API eagerly
+   * runs DDL/DML commands, but not for SELECT queries.
+   *
+   * @group basic
+   * @since 1.3.0
+   */
+  def sql(sqlText: String): Dataset[Row] = sparkSession.sql(sqlText)
+
+  /**
+   * Returns the specified table as a `DataFrame`.
+   *
+   * @group ddl_ops
+   * @since 1.3.0
+   */
+  def table(tableName: String): Dataset[Row] = {
+    sparkSession.table(tableName)
+  }
+
+  /**
+   * Returns a `DataFrame` containing names of existing tables in the current database. The
+   * returned DataFrame has three columns, database, tableName and isTemporary (a Boolean
+   * indicating if a table is a temporary one or not).
+   *
+   * @group ddl_ops
+   * @since 1.3.0
+   */
+  def tables(): Dataset[Row] = {
+    mapTableDatasetOutput(sparkSession.catalog.listTables())
+  }
+
+  /**
+   * Returns a `DataFrame` containing names of existing tables in the given database. The returned
+   * DataFrame has three columns, database, tableName and isTemporary (a Boolean indicating if a
+   * table is a temporary one or not).
+   *
+   * @group ddl_ops
+   * @since 1.3.0
+   */
+  def tables(databaseName: String): Dataset[Row] = {
+    mapTableDatasetOutput(sparkSession.catalog.listTables(databaseName))
+  }
+
+  private def mapTableDatasetOutput(tables: Dataset[Table]): Dataset[Row] = {
+    tables
+      .select(
+        // Re-implement `org.apache.spark.sql.catalog.Table.database` method.
+        // Abusing `coalesce` to tell Spark all these columns are not nullable.
+        when(
+          coalesce(array_size(col("namespace")), lit(0)).equalTo(lit(1)),
+          coalesce(col("namespace")(0), lit("")))
+          .otherwise(lit(""))
+          .as("namespace"),
+        coalesce(col("name"), lit("")).as("tableName"),
+        col("isTemporary"))
+  }
+
+  /**
+   * Returns a `StreamingQueryManager` that allows managing all the
+   * [[org.apache.spark.sql.api.StreamingQuery StreamingQueries]] active on `this` context.
+   *
+   * @since 2.0.0
+   */
+  def streams: StreamingQueryManager
+
+  /**
+   * Returns the names of tables in the current database as an array.
+   *
+   * @group ddl_ops
+   * @since 1.3.0
+   */
+  def tableNames(): Array[String] = {
+    tableNames(sparkSession.catalog.currentDatabase)
+  }
+
+  /**
+   * Returns the names of tables in the given database as an array.
+   *
+   * @group ddl_ops
+   * @since 1.3.0
+   */
+  def tableNames(databaseName: String): Array[String] = {
+    sparkSession.catalog
+      .listTables(databaseName)
+      .select(col("name"))
+      .as(Encoders.STRING)
+      .collect()
+  }
+
+  ////////////////////////////////////////////////////////////////////////////
+  ////////////////////////////////////////////////////////////////////////////
+  // Deprecated methods
+  ////////////////////////////////////////////////////////////////////////////
+  ////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * @deprecated
+   *   As of 1.3.0, replaced by `createDataFrame()`.
+   */
+  @deprecated("Use createDataFrame instead.", "1.3.0")
+  def applySchema(rowRDD: RDD[Row], schema: StructType): Dataset[Row] = {
+    createDataFrame(rowRDD, schema)
+  }
+
+  /**
+   * @deprecated
+   *   As of 1.3.0, replaced by `createDataFrame()`.
+   */
+  @deprecated("Use createDataFrame instead.", "1.3.0")
+  def applySchema(rowRDD: JavaRDD[Row], schema: StructType): Dataset[Row] = {
+    createDataFrame(rowRDD, schema)
+  }
+
+  /**
+   * @deprecated
+   *   As of 1.3.0, replaced by `createDataFrame()`.
+   */
+  @deprecated("Use createDataFrame instead.", "1.3.0")
+  def applySchema(rdd: RDD[_], beanClass: Class[_]): Dataset[Row] = {
+    createDataFrame(rdd, beanClass)
+  }
+
+  /**
+   * @deprecated
+   *   As of 1.3.0, replaced by `createDataFrame()`.
+   */
+  @deprecated("Use createDataFrame instead.", "1.3.0")
+  def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): Dataset[Row] = {
+    createDataFrame(rdd, beanClass)
+  }
+
+  /**
+   * Loads a Parquet file, returning the result as a `DataFrame`. This function returns an empty
+   * `DataFrame` if no paths are passed in.
+   *
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().parquet()`.
+   */
+  @deprecated("Use read.parquet() instead.", "1.4.0")
+  @scala.annotation.varargs
+  def parquetFile(paths: String*): Dataset[Row] = {
+    if (paths.isEmpty) {
+      emptyDataFrame
+    } else {
+      read.parquet(paths: _*)
+    }
+  }
+
+  /**
+   * Loads a JSON file (one object per line), returning the result as a `DataFrame`. It goes
+   * through the entire dataset once to determine the schema.
+   *
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().json()`.
+   */
+  @deprecated("Use read.json() instead.", "1.4.0")
+  def jsonFile(path: String): Dataset[Row] = {
+    read.json(path)
+  }
+
+  /**
+   * Loads a JSON file (one object per line) and applies the given schema, returning the result as
+   * a `DataFrame`.
+   *
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().json()`.
+   */
+  @deprecated("Use read.json() instead.", "1.4.0")
+  def jsonFile(path: String, schema: StructType): Dataset[Row] = {
+    read.schema(schema).json(path)
+  }
+
+  /**
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().json()`.
+   */
+  @deprecated("Use read.json() instead.", "1.4.0")
+  def jsonFile(path: String, samplingRatio: Double): Dataset[Row] = {
+    read.option("samplingRatio", samplingRatio.toString).json(path)
+  }
+
+  /**
+   * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
+   * `DataFrame`. It goes through the entire dataset once to determine the schema.
+   *
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().json()`.
+   */
+  @deprecated("Use read.json() instead.", "1.4.0")
+  def jsonRDD(json: RDD[String]): Dataset[Row] = read.json(json)
+
+  /**
+   * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
+   * `DataFrame`. It goes through the entire dataset once to determine the schema.
+   *
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().json()`.
+   */
+  @deprecated("Use read.json() instead.", "1.4.0")
+  def jsonRDD(json: JavaRDD[String]): Dataset[Row] = read.json(json)
+
+  /**
+   * Loads an RDD[String] storing JSON objects (one object per record) and applies the given
+   * schema, returning the result as a `DataFrame`.
+   *
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().json()`.
+   */
+  @deprecated("Use read.json() instead.", "1.4.0")
+  def jsonRDD(json: RDD[String], schema: StructType): Dataset[Row] = {
+    read.schema(schema).json(json)
+  }
+
+  /**
+   * Loads an JavaRDD[String] storing JSON objects (one object per record) and applies the given
+   * schema, returning the result as a `DataFrame`.
+   *
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().json()`.
+   */
+  @deprecated("Use read.json() instead.", "1.4.0")
+  def jsonRDD(json: JavaRDD[String], schema: StructType): Dataset[Row] = {
+    read.schema(schema).json(json)
+  }
+
+  /**
+   * Loads an RDD[String] storing JSON objects (one object per record) inferring the schema,
+   * returning the result as a `DataFrame`.
+   *
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().json()`.
+   */
+  @deprecated("Use read.json() instead.", "1.4.0")
+  def jsonRDD(json: RDD[String], samplingRatio: Double): Dataset[Row] = {
+    read.option("samplingRatio", samplingRatio.toString).json(json)
+  }
+
+  /**
+   * Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the schema,
+   * returning the result as a `DataFrame`.
+   *
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().json()`.
+   */
+  @deprecated("Use read.json() instead.", "1.4.0")
+  def jsonRDD(json: JavaRDD[String], samplingRatio: Double): Dataset[Row] = {
+    read.option("samplingRatio", samplingRatio.toString).json(json)
+  }
+
+  /**
+   * Returns the dataset stored at path as a DataFrame, using the default data source configured
+   * by spark.sql.sources.default.
+   *
+   * @group genericdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().load(path)`.
+   */
+  @deprecated("Use read.load(path) instead.", "1.4.0")
+  def load(path: String): Dataset[Row] = {
+    read.load(path)
+  }
+
+  /**
+   * Returns the dataset stored at path as a DataFrame, using the given data source.
+   *
+   * @group genericdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().format(source).load(path)`.
+   */
+  @deprecated("Use read.format(source).load(path) instead.", "1.4.0")
+  def load(path: String, source: String): Dataset[Row] = {
+    read.format(source).load(path)
+  }
+
+  /**
+   * (Java-specific) Returns the dataset specified by the given data source and a set of options
+   * as a DataFrame.
+   *
+   * @group genericdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().format(source).options(options).load()`.
+   */
+  @deprecated("Use read.format(source).options(options).load() instead.", "1.4.0")
+  def load(source: String, options: JMap[String, String]): Dataset[Row] = {
+    read.options(options).format(source).load()
+  }
+
+  /**
+   * (Scala-specific) Returns the dataset specified by the given data source and a set of options
+   * as a DataFrame.
+   *
+   * @group genericdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().format(source).options(options).load()`.
+   */
+  @deprecated("Use read.format(source).options(options).load() instead.", "1.4.0")
+  def load(source: String, options: Map[String, String]): Dataset[Row] = {
+    read.options(options).format(source).load()
+  }
+
+  /**
+   * (Java-specific) Returns the dataset specified by the given data source and a set of options
+   * as a DataFrame, using the given schema as the schema of the DataFrame.
+   *
+   * @group genericdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().format(source).schema(schema).options(options).load()`.
+   */
+  @deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0")
+  def load(source: String, schema: StructType, options: JMap[String, String]): Dataset[Row] = {
+    read.format(source).schema(schema).options(options).load()
+  }
+
+  /**
+   * (Scala-specific) Returns the dataset specified by the given data source and a set of options
+   * as a DataFrame, using the given schema as the schema of the DataFrame.
+   *
+   * @group genericdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().format(source).schema(schema).options(options).load()`.
+   */
+  @deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0")
+  def load(source: String, schema: StructType, options: Map[String, String]): Dataset[Row] = {
+    read.format(source).schema(schema).options(options).load()
+  }
+
+  /**
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL url named
+   * table.
+   *
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().jdbc()`.
+   */
+  @deprecated("Use read.jdbc() instead.", "1.4.0")
+  def jdbc(url: String, table: String): Dataset[Row] = {
+    read.jdbc(url, table, new Properties)
+  }
+
+  /**
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL url named
+   * table. Partitions of the table will be retrieved in parallel based on the parameters passed
+   * to this function.
+   *
+   * @param columnName
+   *   the name of a column of integral type that will be used for partitioning.
+   * @param lowerBound
+   *   the minimum value of `columnName` used to decide partition stride
+   * @param upperBound
+   *   the maximum value of `columnName` used to decide partition stride
+   * @param numPartitions
+   *   the number of partitions. the range `minValue`-`maxValue` will be split evenly into this
+   *   many partitions
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().jdbc()`.
+   */
+  @deprecated("Use read.jdbc() instead.", "1.4.0")
+  def jdbc(
+      url: String,
+      table: String,
+      columnName: String,
+      lowerBound: Long,
+      upperBound: Long,
+      numPartitions: Int): Dataset[Row] = {
+    read.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties)
+  }
+
+  /**
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL url named
+   * table. The theParts parameter gives a list expressions suitable for inclusion in WHERE
+   * clauses; each one defines one partition of the `DataFrame`.
+   *
+   * @group specificdata
+   * @deprecated
+   *   As of 1.4.0, replaced by `read().jdbc()`.
+   */
+  @deprecated("Use read.jdbc() instead.", "1.4.0")
+  def jdbc(url: String, table: String, theParts: Array[String]): Dataset[Row] = {
+    read.jdbc(url, table, theParts, new Properties)
+  }
+}
+
+/**
+ * This SQLContext object contains utility functions to create a singleton SQLContext instance, or
+ * to get the created SQLContext instance.
+ *
+ * It also provides utility functions to support preference for threads in multiple sessions
+ * scenario, setActive could set a SQLContext for current thread, which will be returned by
+ * getOrCreate instead of the global one.
+ */
+trait SQLContextCompanion {
+  private[sql] type SQLContextImpl <: SQLContext
+  private[sql] type SparkContextImpl <: SparkContext
+
+  /**
+   * Get the singleton SQLContext if it exists or create a new one using the given SparkContext.
+   *
+   * This function can be used to create a singleton SQLContext object that can be shared across
+   * the JVM.
+   *
+   * If there is an active SQLContext for current thread, it will be returned instead of the
+   * global one.
+   *
+   * @since 1.5.0
+   */
+  @deprecated("Use SparkSession.builder instead", "2.0.0")
+  def getOrCreate(sparkContext: SparkContextImpl): SQLContextImpl
+
+  /**
+   * Changes the SQLContext that will be returned in this thread and its children when
+   * SQLContext.getOrCreate() is called. This can be used to ensure that a given thread receives a
+   * SQLContext with an isolated session, instead of the global (first created) context.
+   *
+   * @since 1.6.0
+   */
+  @deprecated("Use SparkSession.setActiveSession instead", "2.0.0")
+  def setActive(sqlContext: SQLContextImpl): Unit = {
+    SparkSession.setActiveSession(sqlContext.sparkSession)
+  }
+
+  /**
+   * Clears the active SQLContext for current thread. Subsequent calls to getOrCreate will return
+   * the first created context instead of a thread-local override.
+   *
+   * @since 1.6.0
+   */
+  @deprecated("Use SparkSession.clearActiveSession instead", "2.0.0")
+  def clearActive(): Unit = {
+    SparkSession.clearActiveSession()
+  }
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/SQLImplicits.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/SQLImplicits.scala
index 5e022570d3ca7..200e913b5412e 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/api/SQLImplicits.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/SQLImplicits.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, DE
  *
  * @since 1.6.0
  */
-abstract class SQLImplicits extends LowPrioritySQLImplicits with Serializable {
+abstract class SQLImplicits extends EncoderImplicits with Serializable {
   type DS[U] <: Dataset[U]
 
   protected def session: SparkSession
@@ -51,8 +51,35 @@ abstract class SQLImplicits extends LowPrioritySQLImplicits with Serializable {
     }
   }
 
-  // Primitives
+  /**
+   * Creates a [[Dataset]] from a local Seq.
+   * @since 1.6.0
+   */
+  implicit def localSeqToDatasetHolder[T: Encoder](s: Seq[T]): DatasetHolder[T, DS] = {
+    new DatasetHolder(session.createDataset(s).asInstanceOf[DS[T]])
+  }
+
+  /**
+   * Creates a [[Dataset]] from an RDD.
+   *
+   * @since 1.6.0
+   */
+  implicit def rddToDatasetHolder[T: Encoder](rdd: RDD[T]): DatasetHolder[T, DS] =
+    new DatasetHolder(session.createDataset(rdd).asInstanceOf[DS[T]])
+
+  /**
+   * An implicit conversion that turns a Scala `Symbol` into a [[org.apache.spark.sql.Column]].
+   * @since 1.3.0
+   */
+  implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name)
+}
 
+/**
+ * EncoderImplicits used to implicitly generate SQL Encoders. Note that these functions don't rely
+ * on or expose `SparkSession`.
+ */
+trait EncoderImplicits extends LowPrioritySQLImplicits with Serializable {
+  // Primitives
   /** @since 1.6.0 */
   implicit def newIntEncoder: Encoder[Int] = Encoders.scalaInt
 
@@ -270,28 +297,6 @@ abstract class SQLImplicits extends LowPrioritySQLImplicits with Serializable {
   /** @since 1.6.1 */
   implicit def newProductArrayEncoder[A <: Product: TypeTag]: Encoder[Array[A]] =
     newArrayEncoder(ScalaReflection.encoderFor[A])
-
-  /**
-   * Creates a [[Dataset]] from a local Seq.
-   * @since 1.6.0
-   */
-  implicit def localSeqToDatasetHolder[T: Encoder](s: Seq[T]): DatasetHolder[T, DS] = {
-    new DatasetHolder(session.createDataset(s).asInstanceOf[DS[T]])
-  }
-
-  /**
-   * Creates a [[Dataset]] from an RDD.
-   *
-   * @since 1.6.0
-   */
-  implicit def rddToDatasetHolder[T: Encoder](rdd: RDD[T]): DatasetHolder[T, DS] =
-    new DatasetHolder(session.createDataset(rdd).asInstanceOf[DS[T]])
-
-  /**
-   * An implicit conversion that turns a Scala `Symbol` into a [[org.apache.spark.sql.Column]].
-   * @since 1.3.0
-   */
-  implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name)
 }
 
 /**
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/SparkSession.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/SparkSession.scala
index 64b0a87c573d3..af2144cb9eb41 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/api/SparkSession.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/SparkSession.scala
@@ -30,7 +30,7 @@ import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.annotation.{DeveloperApi, Experimental, Stable, Unstable}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Encoder, ExperimentalMethods, Row, RuntimeConfig, SparkSessionExtensions, SQLContext}
+import org.apache.spark.sql.{Encoder, ExperimentalMethods, Row, RuntimeConfig, SparkSessionExtensions}
 import org.apache.spark.sql.internal.{SessionState, SharedState}
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types.StructType
@@ -470,7 +470,6 @@ abstract class SparkSession extends Serializable with Closeable {
    *   is.
    * @since 3.5.0
    */
-  @Experimental
   def sql(sqlText: String, args: Array[_]): Dataset[Row]
 
   /**
@@ -488,7 +487,6 @@ abstract class SparkSession extends Serializable with Closeable {
    *   `array()`, `struct()`, in that case it is taken as is.
    * @since 3.4.0
    */
-  @Experimental
   def sql(sqlText: String, args: Map[String, Any]): Dataset[Row]
 
   /**
@@ -506,7 +504,6 @@ abstract class SparkSession extends Serializable with Closeable {
    *   `array()`, `struct()`, in that case it is taken as is.
    * @since 3.4.0
    */
-  @Experimental
   def sql(sqlText: String, args: util.Map[String, Any]): Dataset[Row] = {
     sql(sqlText, args.asScala.toMap)
   }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/avro/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/avro/functions.scala
index fffad557aca5e..e30a9e7c2ba01 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/avro/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/avro/functions.scala
@@ -94,4 +94,32 @@ object functions {
   def to_avro(data: Column, jsonFormatSchema: String): Column = {
     Column.fn("to_avro", data, lit(jsonFormatSchema))
   }
+
+  /**
+   * Returns schema in the DDL format of the avro schema in JSON string format.
+   *
+   * @param jsonFormatSchema
+   *   the avro schema in JSON string format.
+   *
+   * @since 4.0.0
+   */
+  @Experimental
+  def schema_of_avro(jsonFormatSchema: String): Column = {
+    Column.fn("schema_of_avro", lit(jsonFormatSchema))
+  }
+
+  /**
+   * Returns schema in the DDL format of the avro schema in JSON string format.
+   *
+   * @param jsonFormatSchema
+   *   the avro schema in JSON string format.
+   * @param options
+   *   options to control how the Avro record is parsed.
+   *
+   * @since 4.0.0
+   */
+  @Experimental
+  def schema_of_avro(jsonFormatSchema: String, options: java.util.Map[String, String]): Column = {
+    Column.fnWithOptions("schema_of_avro", options.asScala.iterator, lit(jsonFormatSchema))
+  }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala
index 9ae7de97abf58..d998502ac1b25 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala
@@ -231,6 +231,8 @@ object AgnosticEncoders {
   // Nullable leaf encoders
   case object NullEncoder extends LeafEncoder[java.lang.Void](NullType)
   case object StringEncoder extends LeafEncoder[String](StringType)
+  case class CharEncoder(length: Int) extends LeafEncoder[String](CharType(length))
+  case class VarcharEncoder(length: Int) extends LeafEncoder[String](VarcharType(length))
   case object BinaryEncoder extends LeafEncoder[Array[Byte]](BinaryType)
   case object ScalaBigIntEncoder extends LeafEncoder[BigInt](DecimalType.BigIntDecimal)
   case object JavaBigIntEncoder extends LeafEncoder[JBigInt](DecimalType.BigIntDecimal)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index 8b6da805a6e87..7260ff8f9fefd 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
 import scala.reflect.classTag
 
 import org.apache.spark.sql.{AnalysisException, Row}
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder, CalendarIntervalEncoder, DateEncoder, DayTimeIntervalEncoder, EncoderField, InstantEncoder, IterableEncoder, JavaDecimalEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, NullEncoder, RowEncoder => AgnosticRowEncoder, StringEncoder, TimestampEncoder, UDTEncoder, VariantEncoder, YearMonthIntervalEncoder}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder, CalendarIntervalEncoder, CharEncoder, DateEncoder, DayTimeIntervalEncoder, EncoderField, InstantEncoder, IterableEncoder, JavaDecimalEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, NullEncoder, RowEncoder => AgnosticRowEncoder, StringEncoder, TimestampEncoder, UDTEncoder, VarcharEncoder, VariantEncoder, YearMonthIntervalEncoder}
 import org.apache.spark.sql.errors.{DataTypeErrorsBase, ExecutionErrors}
 import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.sql.types._
@@ -80,7 +80,11 @@ object RowEncoder extends DataTypeErrorsBase {
       case DoubleType => BoxedDoubleEncoder
       case dt: DecimalType => JavaDecimalEncoder(dt, lenientSerialization = true)
       case BinaryType => BinaryEncoder
-      case _: StringType => StringEncoder
+      case CharType(length) if SqlApiConf.get.preserveCharVarcharTypeInfo =>
+        CharEncoder(length)
+      case VarcharType(length) if SqlApiConf.get.preserveCharVarcharTypeInfo =>
+        VarcharEncoder(length)
+      case s: StringType if StringHelper.isPlainString(s) => StringEncoder
       case TimestampType if SqlApiConf.get.datetimeJava8ApiEnabled => InstantEncoder(lenient)
       case TimestampType => TimestampEncoder(lenient)
       case TimestampNTZType => LocalDateTimeEncoder
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
index 71e8517a4164e..94e014fb77f1b 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
@@ -57,6 +57,14 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
     }
   }
 
+  /**
+   * Create a multi-part identifier.
+   */
+  override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] =
+    withOrigin(ctx) {
+      ctx.parts.asScala.map(_.getText).toSeq
+    }
+
   /**
    * Resolve/create a primitive type.
    */
@@ -76,10 +84,11 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
       case (TIMESTAMP_LTZ, Nil) => TimestampType
       case (STRING, Nil) =>
         typeCtx.children.asScala.toSeq match {
-          case Seq(_) => SqlApiConf.get.defaultStringType
+          case Seq(_) => StringType
           case Seq(_, ctx: CollateClauseContext) =>
-            val collationName = visitCollateClause(ctx)
-            val collationId = CollationFactory.collationNameToId(collationName)
+            val collationNameParts = visitCollateClause(ctx).toArray
+            val collationId = CollationFactory.collationNameToId(
+              CollationFactory.resolveFullyQualifiedName(collationNameParts))
             StringType(collationId)
         }
       case (CHARACTER | CHAR, length :: Nil) => CharType(length.getText.toInt)
@@ -219,8 +228,8 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
   /**
    * Returns a collation name.
    */
-  override def visitCollateClause(ctx: CollateClauseContext): String = withOrigin(ctx) {
-    ctx.identifier.getText
+  override def visitCollateClause(ctx: CollateClauseContext): Seq[String] = withOrigin(ctx) {
+    visitMultipartIdentifier(ctx.collationName)
   }
 
   /**
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
index 9c043320dc812..8dff1ceccfcfe 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
@@ -22,7 +22,7 @@ import java.time.temporal.ChronoField
 import java.util.{Calendar, TimeZone}
 import java.util.Calendar.{DAY_OF_MONTH, DST_OFFSET, ERA, HOUR_OF_DAY, MINUTE, MONTH, SECOND, YEAR, ZONE_OFFSET}
 
-import scala.collection.mutable.AnyRefMap
+import scala.collection.mutable.HashMap
 
 import com.fasterxml.jackson.databind.ObjectMapper
 import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule}
@@ -285,12 +285,12 @@ object RebaseDateTime {
   }
 
   // Loads rebasing info from an JSON file. JSON records in the files should conform to
-  // `JsonRebaseRecord`. AnyRefMap is used here instead of Scala's immutable map because
-  // it is 2 times faster in DateTimeRebaseBenchmark.
-  private[sql] def loadRebaseRecords(fileName: String): AnyRefMap[String, RebaseInfo] = {
+  // `JsonRebaseRecord`. Mutable HashMap is used here instead of AnyRefMap due to SPARK-49491.
+  private[sql] def loadRebaseRecords(fileName: String): HashMap[String, RebaseInfo] = {
     val file = SparkClassUtils.getSparkClassLoader.getResource(fileName)
     val jsonRebaseRecords = mapper.readValue[Seq[JsonRebaseRecord]](file)
-    val anyRefMap = new AnyRefMap[String, RebaseInfo]((3 * jsonRebaseRecords.size) / 2)
+    val hashMap = new HashMap[String, RebaseInfo]
+    hashMap.sizeHint(jsonRebaseRecords.size)
     jsonRebaseRecords.foreach { jsonRecord =>
       val rebaseInfo = RebaseInfo(jsonRecord.switches, jsonRecord.diffs)
       var i = 0
@@ -299,9 +299,9 @@ object RebaseDateTime {
         rebaseInfo.diffs(i) = rebaseInfo.diffs(i) * MICROS_PER_SECOND
         i += 1
       }
-      anyRefMap.update(jsonRecord.tz, rebaseInfo)
+      hashMap.update(jsonRecord.tz, rebaseInfo)
     }
-    anyRefMap
+    hashMap
   }
 
   /**
@@ -313,7 +313,7 @@ object RebaseDateTime {
    */
   private val gregJulianRebaseMap = loadRebaseRecords("gregorian-julian-rebase-micros.json")
 
-  private def getLastSwitchTs(rebaseMap: AnyRefMap[String, RebaseInfo]): Long = {
+  private def getLastSwitchTs(rebaseMap: HashMap[String, RebaseInfo]): Long = {
     val latestTs = rebaseMap.values.map(_.switches.last).max
     require(
       rebaseMap.values.forall(_.diffs.last == 0),
@@ -404,7 +404,7 @@ object RebaseDateTime {
     if (micros >= lastSwitchGregorianTs) {
       micros
     } else {
-      val rebaseRecord = gregJulianRebaseMap.getOrNull(timeZoneId)
+      val rebaseRecord = gregJulianRebaseMap.get(timeZoneId).orNull
       if (rebaseRecord == null || micros < rebaseRecord.switches(0)) {
         rebaseGregorianToJulianMicros(TimeZone.getTimeZone(timeZoneId), micros)
       } else {
@@ -526,7 +526,7 @@ object RebaseDateTime {
     if (micros >= lastSwitchJulianTs) {
       micros
     } else {
-      val rebaseRecord = julianGregRebaseMap.getOrNull(timeZoneId)
+      val rebaseRecord = julianGregRebaseMap.get(timeZoneId).orNull
       if (rebaseRecord == null || micros < rebaseRecord.switches(0)) {
         rebaseJulianToGregorianMicros(TimeZone.getTimeZone(timeZoneId), micros)
       } else {
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkCharVarcharUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkCharVarcharUtils.scala
index 2a26c079e8d4d..51b2c40f9bf2e 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkCharVarcharUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkCharVarcharUtils.scala
@@ -54,8 +54,7 @@ trait SparkCharVarcharUtils {
       StructType(fields.map { field =>
         field.copy(dataType = replaceCharVarcharWithString(field.dataType))
       })
-    case _: CharType => StringType
-    case _: VarcharType => StringType
+    case CharType(_) | VarcharType(_) if !SqlApiConf.get.preserveCharVarcharTypeInfo => StringType
     case _ => dt
   }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index 0608322be13b3..e8c50be9f5513 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -73,7 +73,7 @@ object SparkStringUtils extends Logging {
 
   /**
    * Format a sequence with semantics similar to calling .mkString(). Any elements beyond
-   * maxNumToStringFields will be dropped and replaced by a "... N more fields" placeholder.
+   * `maxFields` will be dropped and replaced by a "... N more fields" placeholder.
    *
    * @return
    *   the trimmed and formatted string.
@@ -90,10 +90,11 @@ object SparkStringUtils extends Logging {
           "Truncated the string representation of a plan since it was too large. This " +
             s"behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.")
       }
-      val numFields = math.max(0, maxFields - 1)
-      seq
-        .take(numFields)
-        .mkString(start, sep, sep + "... " + (seq.length - numFields) + " more fields" + end)
+      val numFields = math.max(0, maxFields)
+      val restNum = seq.length - numFields
+      val ending = (if (numFields == 0) "" else sep) +
+        (if (restNum == 0) "" else s"... $restNum more fields") + end
+      seq.take(numFields).mkString(start, sep, ending)
     } else {
       seq.mkString(start, sep, end)
     }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala
index 3e63b8281f739..617cab4b2a39b 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala
@@ -41,6 +41,18 @@ private[sql] trait CompilationErrors extends DataTypeErrorsBase {
       cause = Option(cause))
   }
 
+  def describeJsonNotExtendedError(tableName: String): AnalysisException = {
+    new AnalysisException(
+      errorClass = "DESCRIBE_JSON_NOT_EXTENDED",
+      messageParameters = Map("tableName" -> tableName))
+  }
+
+  def describeColJsonUnsupportedError(): AnalysisException = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON",
+      messageParameters = Map.empty)
+  }
+
   def cannotFindDescriptorFileError(filePath: String, cause: Throwable): AnalysisException = {
     new AnalysisException(
       errorClass = "PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND",
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
index 2a04212ee2585..9f509fa843a2b 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1147,6 +1147,77 @@ object functions {
    */
   def sum_distinct(e: Column): Column = Column.fn("sum", isDistinct = true, e)
 
+  /**
+   * Aggregate function: returns the concatenation of non-null input values.
+   *
+   * @group agg_funcs
+   * @since 4.0.0
+   */
+  def listagg(e: Column): Column = Column.fn("listagg", e)
+
+  /**
+   * Aggregate function: returns the concatenation of non-null input values, separated by the
+   * delimiter.
+   *
+   * @group agg_funcs
+   * @since 4.0.0
+   */
+  def listagg(e: Column, delimiter: Column): Column = Column.fn("listagg", e, delimiter)
+
+  /**
+   * Aggregate function: returns the concatenation of distinct non-null input values.
+   *
+   * @group agg_funcs
+   * @since 4.0.0
+   */
+  def listagg_distinct(e: Column): Column = Column.fn("listagg", isDistinct = true, e)
+
+  /**
+   * Aggregate function: returns the concatenation of distinct non-null input values, separated by
+   * the delimiter.
+   *
+   * @group agg_funcs
+   * @since 4.0.0
+   */
+  def listagg_distinct(e: Column, delimiter: Column): Column =
+    Column.fn("listagg", isDistinct = true, e, delimiter)
+
+  /**
+   * Aggregate function: returns the concatenation of non-null input values. Alias for `listagg`.
+   *
+   * @group agg_funcs
+   * @since 4.0.0
+   */
+  def string_agg(e: Column): Column = Column.fn("string_agg", e)
+
+  /**
+   * Aggregate function: returns the concatenation of non-null input values, separated by the
+   * delimiter. Alias for `listagg`.
+   *
+   * @group agg_funcs
+   * @since 4.0.0
+   */
+  def string_agg(e: Column, delimiter: Column): Column = Column.fn("string_agg", e, delimiter)
+
+  /**
+   * Aggregate function: returns the concatenation of distinct non-null input values. Alias for
+   * `listagg`.
+   *
+   * @group agg_funcs
+   * @since 4.0.0
+   */
+  def string_agg_distinct(e: Column): Column = Column.fn("string_agg", isDistinct = true, e)
+
+  /**
+   * Aggregate function: returns the concatenation of distinct non-null input values, separated by
+   * the delimiter. Alias for `listagg`.
+   *
+   * @group agg_funcs
+   * @since 4.0.0
+   */
+  def string_agg_distinct(e: Column, delimiter: Column): Column =
+    Column.fn("string_agg", isDistinct = true, e, delimiter)
+
   /**
    * Aggregate function: alias for `var_samp`.
    *
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
index 773494f418659..76cd436b39b58 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
@@ -40,6 +40,7 @@ private[sql] trait SqlApiConf {
   def timestampType: AtomicType
   def allowNegativeScaleOfDecimalEnabled: Boolean
   def charVarcharAsString: Boolean
+  def preserveCharVarcharTypeInfo: Boolean
   def datetimeJava8ApiEnabled: Boolean
   def sessionLocalTimeZone: String
   def legacyTimeParserPolicy: LegacyBehaviorPolicy.Value
@@ -47,7 +48,6 @@ private[sql] trait SqlApiConf {
   def stackTracesInDataFrameContext: Int
   def dataFrameQueryContextEnabled: Boolean
   def legacyAllowUntypedScalaUDFs: Boolean
-  def allowReadingUnknownCollations: Boolean
 }
 
 private[sql] object SqlApiConf {
@@ -60,7 +60,6 @@ private[sql] object SqlApiConf {
     SqlApiConfHelper.LOCAL_RELATION_CACHE_THRESHOLD_KEY
   }
   val DEFAULT_COLLATION: String = SqlApiConfHelper.DEFAULT_COLLATION
-  val ALLOW_READING_UNKNOWN_COLLATIONS: String = SqlApiConfHelper.ALLOW_READING_UNKNOWN_COLLATIONS
 
   def get: SqlApiConf = SqlApiConfHelper.getConfGetter.get()()
 
@@ -82,6 +81,7 @@ private[sql] object DefaultSqlApiConf extends SqlApiConf {
   override def timestampType: AtomicType = TimestampType
   override def allowNegativeScaleOfDecimalEnabled: Boolean = false
   override def charVarcharAsString: Boolean = false
+  override def preserveCharVarcharTypeInfo: Boolean = false
   override def datetimeJava8ApiEnabled: Boolean = false
   override def sessionLocalTimeZone: String = TimeZone.getDefault.getID
   override def legacyTimeParserPolicy: LegacyBehaviorPolicy.Value = LegacyBehaviorPolicy.CORRECTED
@@ -89,5 +89,4 @@ private[sql] object DefaultSqlApiConf extends SqlApiConf {
   override def stackTracesInDataFrameContext: Int = 1
   override def dataFrameQueryContextEnabled: Boolean = true
   override def legacyAllowUntypedScalaUDFs: Boolean = false
-  override def allowReadingUnknownCollations: Boolean = false
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala
index c8d6f395d4506..13ef13e5894e0 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala
@@ -33,8 +33,6 @@ private[sql] object SqlApiConfHelper {
   val SESSION_LOCAL_TIMEZONE_KEY: String = "spark.sql.session.timeZone"
   val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String = "spark.sql.session.localRelationCacheThreshold"
   val DEFAULT_COLLATION: String = "spark.sql.session.collation.default"
-  val ALLOW_READING_UNKNOWN_COLLATIONS: String =
-    "spark.sql.collation.allowReadingUnknownCollations"
 
   val confGetter: AtomicReference[() => SqlApiConf] = {
     new AtomicReference[() => SqlApiConf](() => DefaultSqlApiConf)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/columnNodes.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/columnNodes.scala
index e3cc320a8b00f..ef4bdb8d5bdff 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/columnNodes.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/columnNodes.scala
@@ -70,6 +70,19 @@ private[sql] trait ColumnNode extends ColumnNodeLike {
 trait ColumnNodeLike {
   private[internal] def normalize(): ColumnNodeLike = this
   private[internal] def sql: String
+  private[internal] def children: Seq[ColumnNodeLike]
+
+  private[sql] def foreach(f: ColumnNodeLike => Unit): Unit = {
+    f(this)
+    children.foreach(_.foreach(f))
+  }
+
+  private[sql] def collect[A](pf: PartialFunction[ColumnNodeLike, A]): Seq[A] = {
+    val ret = new collection.mutable.ArrayBuffer[A]()
+    val lifted = pf.lift
+    foreach(node => lifted(node).foreach(ret.+=))
+    ret.toSeq
+  }
 }
 
 private[internal] object ColumnNode {
@@ -118,6 +131,8 @@ private[sql] case class Literal(
     case v: Short => toSQLValue(v)
     case _ => value.toString
   }
+
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
 }
 
 /**
@@ -141,6 +156,8 @@ private[sql] case class UnresolvedAttribute(
     copy(planId = None, origin = NO_ORIGIN)
 
   override def sql: String = nameParts.map(n => if (n.contains(".")) s"`$n`" else n).mkString(".")
+
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
 }
 
 private[sql] object UnresolvedAttribute {
@@ -167,24 +184,6 @@ private[sql] object UnresolvedAttribute {
     apply(unparsedIdentifier, None, false, CurrentOrigin.get)
 }
 
-/**
- * Reference to an attribute in the outer context, used for Subqueries.
- *
- * @param nameParts
- *   name of the attribute.
- * @param planId
- *   id of the plan (Dataframe) that produces the attribute.
- */
-private[sql] case class LazyOuterReference(
-    nameParts: Seq[String],
-    planId: Option[Long] = None,
-    override val origin: Origin = CurrentOrigin.get)
-    extends ColumnNode {
-  override private[internal] def normalize(): LazyOuterReference =
-    copy(planId = None, origin = NO_ORIGIN)
-  override def sql: String = nameParts.map(n => if (n.contains(".")) s"`$n`" else n).mkString(".")
-}
-
 /**
  * Reference to all columns in a namespace (global, a Dataframe, or a nested struct).
  *
@@ -201,6 +200,7 @@ private[sql] case class UnresolvedStar(
   override private[internal] def normalize(): UnresolvedStar =
     copy(planId = None, origin = NO_ORIGIN)
   override def sql: String = unparsedTarget.map(_ + ".*").getOrElse("*")
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
 }
 
 /**
@@ -226,6 +226,8 @@ private[sql] case class UnresolvedFunction(
     copy(arguments = ColumnNode.normalize(arguments), origin = NO_ORIGIN)
 
   override def sql: String = functionName + argumentsToSql(arguments)
+
+  override private[internal] def children: Seq[ColumnNodeLike] = arguments
 }
 
 /**
@@ -240,6 +242,7 @@ private[sql] case class SqlExpression(
     extends ColumnNode {
   override private[internal] def normalize(): SqlExpression = copy(origin = NO_ORIGIN)
   override def sql: String = expression
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
 }
 
 /**
@@ -268,6 +271,8 @@ private[sql] case class Alias(
     }
     s"${child.sql} AS $alias"
   }
+
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq(child)
 }
 
 /**
@@ -293,10 +298,14 @@ private[sql] case class Cast(
   override def sql: String = {
     s"${optionToSql(evalMode)}CAST(${child.sql} AS ${dataType.sql})"
   }
+
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq(child) ++ evalMode
 }
 
 private[sql] object Cast {
-  sealed abstract class EvalMode(override val sql: String = "") extends ColumnNodeLike
+  sealed abstract class EvalMode(override val sql: String = "") extends ColumnNodeLike {
+    override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
+  }
   object Legacy extends EvalMode
   object Ansi extends EvalMode
   object Try extends EvalMode("TRY_")
@@ -318,6 +327,7 @@ private[sql] case class UnresolvedRegex(
   override private[internal] def normalize(): UnresolvedRegex =
     copy(planId = None, origin = NO_ORIGIN)
   override def sql: String = regex
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
 }
 
 /**
@@ -340,13 +350,19 @@ private[sql] case class SortOrder(
     copy(child = child.normalize(), origin = NO_ORIGIN)
 
   override def sql: String = s"${child.sql} ${sortDirection.sql} ${nullOrdering.sql}"
+
+  override def children: Seq[ColumnNodeLike] = Seq(child, sortDirection, nullOrdering)
 }
 
 private[sql] object SortOrder {
-  sealed abstract class SortDirection(override val sql: String) extends ColumnNodeLike
+  sealed abstract class SortDirection(override val sql: String) extends ColumnNodeLike {
+    override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
+  }
   object Ascending extends SortDirection("ASC")
   object Descending extends SortDirection("DESC")
-  sealed abstract class NullOrdering(override val sql: String) extends ColumnNodeLike
+  sealed abstract class NullOrdering(override val sql: String) extends ColumnNodeLike {
+    override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
+  }
   object NullsFirst extends NullOrdering("NULLS FIRST")
   object NullsLast extends NullOrdering("NULLS LAST")
 }
@@ -370,6 +386,8 @@ private[sql] case class Window(
     origin = NO_ORIGIN)
 
   override def sql: String = s"${windowFunction.sql} OVER (${windowSpec.sql})"
+
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq(windowFunction, windowSpec)
 }
 
 private[sql] case class WindowSpec(
@@ -388,6 +406,9 @@ private[sql] case class WindowSpec(
       optionToSql(frame))
     parts.filter(_.nonEmpty).mkString(" ")
   }
+  override private[internal] def children: Seq[ColumnNodeLike] = {
+    partitionColumns ++ sortColumns ++ frame
+  }
 }
 
 private[sql] case class WindowFrame(
@@ -399,15 +420,19 @@ private[sql] case class WindowFrame(
     copy(lower = lower.normalize(), upper = upper.normalize())
   override private[internal] def sql: String =
     s"${frameType.sql} BETWEEN ${lower.sql} AND ${upper.sql}"
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq(frameType, lower, upper)
 }
 
 private[sql] object WindowFrame {
-  sealed abstract class FrameType(override val sql: String) extends ColumnNodeLike
+  sealed abstract class FrameType(override val sql: String) extends ColumnNodeLike {
+    override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
+  }
   object Row extends FrameType("ROWS")
   object Range extends FrameType("RANGE")
 
   sealed abstract class FrameBoundary extends ColumnNodeLike {
     override private[internal] def normalize(): FrameBoundary = this
+    override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
   }
   object CurrentRow extends FrameBoundary {
     override private[internal] def sql = "CURRENT ROW"
@@ -421,6 +446,7 @@ private[sql] object WindowFrame {
   case class Value(value: ColumnNode) extends FrameBoundary {
     override private[internal] def normalize(): Value = copy(value.normalize())
     override private[internal] def sql: String = value.sql
+    override private[internal] def children: Seq[ColumnNodeLike] = Seq(value)
   }
   def value(i: Int): Value = Value(Literal(i, Some(IntegerType)))
   def value(l: Long): Value = Value(Literal(l, Some(LongType)))
@@ -452,6 +478,8 @@ private[sql] case class LambdaFunction(
     }
     argumentsSql + " -> " + function.sql
   }
+
+  override private[internal] def children: Seq[ColumnNodeLike] = function +: arguments
 }
 
 object LambdaFunction {
@@ -473,6 +501,8 @@ private[sql] case class UnresolvedNamedLambdaVariable(
     copy(origin = NO_ORIGIN)
 
   override def sql: String = name
+
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
 }
 
 object UnresolvedNamedLambdaVariable {
@@ -513,6 +543,8 @@ private[sql] case class UnresolvedExtractValue(
     copy(child = child.normalize(), extraction = extraction.normalize(), origin = NO_ORIGIN)
 
   override def sql: String = s"${child.sql}[${extraction.sql}]"
+
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq(child, extraction)
 }
 
 /**
@@ -539,6 +571,9 @@ private[sql] case class UpdateFields(
     case Some(value) => s"update_field(${structExpression.sql}, $fieldName, ${value.sql})"
     case None => s"drop_field(${structExpression.sql}, $fieldName)"
   }
+  override private[internal] def children: Seq[ColumnNodeLike] = {
+    structExpression +: valueExpression.toSeq
+  }
 }
 
 /**
@@ -567,6 +602,11 @@ private[sql] case class CaseWhenOtherwise(
       branches.map(cv => s" WHEN ${cv._1.sql} THEN ${cv._2.sql}").mkString +
       otherwise.map(o => s" ELSE ${o.sql}").getOrElse("") +
       " END"
+
+  override private[internal] def children: Seq[ColumnNodeLike] = {
+    val branchChildren = branches.flatMap { case (condition, value) => Seq(condition, value) }
+    branchChildren ++ otherwise
+  }
 }
 
 /**
@@ -588,8 +628,26 @@ private[sql] case class InvokeInlineUserDefinedFunction(
 
   override def sql: String =
     function.name + argumentsToSql(arguments)
+
+  override private[internal] def children: Seq[ColumnNodeLike] = arguments
 }
 
 private[sql] trait UserDefinedFunctionLike {
   def name: String = SparkClassUtils.getFormattedClassName(this)
 }
+
+/**
+ * A marker node to trigger Spark Classic DataFrame lazy analysis.
+ *
+ * @param child
+ *   that needs to be lazily analyzed in Spark Classic DataFrame.
+ */
+private[sql] case class LazyExpression(
+    child: ColumnNode,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): ColumnNode =
+    copy(child = child.normalize(), origin = NO_ORIGIN)
+  override def sql: String = "lazy" + argumentsToSql(Seq(child))
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq(child)
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractStringType.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractStringType.scala
index 49d8bf9e001ab..6dcb8a876b7a2 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractStringType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractStringType.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.internal.types
 
-import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.sql.types.{AbstractDataType, DataType, StringType}
 
 /**
@@ -26,7 +25,7 @@ import org.apache.spark.sql.types.{AbstractDataType, DataType, StringType}
 abstract class AbstractStringType(supportsTrimCollation: Boolean = false)
     extends AbstractDataType
     with Serializable {
-  override private[sql] def defaultConcreteType: DataType = SqlApiConf.get.defaultStringType
+  override private[sql] def defaultConcreteType: DataType = StringType
   override private[sql] def simpleString: String = "string"
 
   override private[sql] def acceptsType(other: DataType): Boolean = other match {
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessor.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessor.scala
index 55477b4dda0c9..b47629cb54396 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessor.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessor.scala
@@ -20,16 +20,25 @@ package org.apache.spark.sql.streaming
 import java.io.Serializable
 
 import org.apache.spark.annotation.{Evolving, Experimental}
+import org.apache.spark.sql.api.EncoderImplicits
 import org.apache.spark.sql.errors.ExecutionErrors
 
 /**
  * Represents the arbitrary stateful logic that needs to be provided by the user to perform
  * stateful manipulations on keyed streams.
+ *
+ * Users can also explicitly use `import implicits._` to access the EncoderImplicits and use the
+ * state variable APIs relying on implicit encoders.
  */
 @Experimental
 @Evolving
 private[sql] abstract class StatefulProcessor[K, I, O] extends Serializable {
 
+  // scalastyle:off
+  // Disable style checker so "implicits" object can start with lowercase i
+  object implicits extends EncoderImplicits
+  // scalastyle:on
+
   /**
    * Handle to the stateful processor that provides access to the state store and other stateful
    * processing related APIs.
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
index fc32248b4baf3..53dfc5e9b2828 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
@@ -110,4 +110,13 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT
   override private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = {
     f(this) || elementType.existsRecursively(f)
   }
+
+  override private[spark] def transformRecursively(
+      f: PartialFunction[DataType, DataType]): DataType = {
+    if (f.isDefinedAt(this)) {
+      f(this)
+    } else {
+      ArrayType(elementType.transformRecursively(f), containsNull)
+    }
+  }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/CharType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/CharType.scala
index 5e30ff6e52a14..68dad6c87c01e 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/CharType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/CharType.scala
@@ -17,14 +17,19 @@
 
 package org.apache.spark.sql.types
 
+import org.json4s.JsonAST.{JString, JValue}
+
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.util.CollationFactory
 
 @Experimental
-case class CharType(length: Int) extends AtomicType {
+case class CharType(length: Int)
+    extends StringType(CollationFactory.UTF8_BINARY_COLLATION_ID, FixedLength(length)) {
   require(length >= 0, "The length of char type cannot be negative.")
 
   override def defaultSize: Int = length
   override def typeName: String = s"char($length)"
+  override def jsonValue: JValue = JString(typeName)
   override def toString: String = s"CharType($length)"
   private[spark] override def asNullable: CharType = this
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 4cf7d8efb96a5..db7e7c0ae1885 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -27,7 +27,7 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkThrowable}
+import org.apache.spark.{SparkIllegalArgumentException, SparkThrowable}
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.analysis.SqlApiAnalysis
 import org.apache.spark.sql.catalyst.parser.DataTypeParser
@@ -105,6 +105,13 @@ abstract class DataType extends AbstractDataType {
    */
   private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = f(this)
 
+  /**
+   * Recursively applies the provided partial function `f` to transform this DataType tree.
+   */
+  private[spark] def transformRecursively(f: PartialFunction[DataType, DataType]): DataType = {
+    if (f.isDefinedAt(this)) f(this) else this
+  }
+
   final override private[sql] def defaultConcreteType: DataType = this
 
   override private[sql] def acceptsType(other: DataType): Boolean = sameType(other)
@@ -340,17 +347,8 @@ object DataType {
         fields.collect { case (fieldPath, JString(collation)) =>
           collation.split("\\.", 2) match {
             case Array(provider: String, collationName: String) =>
-              try {
-                CollationFactory.assertValidProvider(provider)
-                fieldPath -> collationName
-              } catch {
-                case e: SparkException
-                    if e.getCondition == "COLLATION_INVALID_PROVIDER" &&
-                      SqlApiConf.get.allowReadingUnknownCollations =>
-                  // If the collation provider is unknown and the config for reading such
-                  // collations is enabled, return the UTF8_BINARY collation.
-                  fieldPath -> "UTF8_BINARY"
-              }
+              CollationFactory.assertValidProvider(provider)
+              fieldPath -> collationName
           }
         }.toMap
 
@@ -359,16 +357,7 @@ object DataType {
   }
 
   private def stringTypeWithCollation(collationName: String): StringType = {
-    try {
-      StringType(CollationFactory.collationNameToId(collationName))
-    } catch {
-      case e: SparkException
-          if e.getCondition == "COLLATION_INVALID_NAME" &&
-            SqlApiConf.get.allowReadingUnknownCollations =>
-        // If the collation name is unknown and the config for reading such collations is enabled,
-        // return the UTF8_BINARY collation.
-        StringType(CollationFactory.UTF8_BINARY_COLLATION_ID)
-    }
+    StringType(CollationFactory.collationNameToId(collationName))
   }
 
   protected[types] def buildFormattedString(
@@ -458,7 +447,7 @@ object DataType {
   private[sql] def equalsIgnoreCompatibleCollation(from: DataType, to: DataType): Boolean = {
     (from, to) match {
       // String types with possibly different collations are compatible.
-      case (_: StringType, _: StringType) => true
+      case (a: StringType, b: StringType) => a.constraint == b.constraint
 
       case (fromDataType, toDataType) => fromDataType == toDataType
     }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala
index 1dfb9aaf9e29b..de656c13ca4bf 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala
@@ -89,6 +89,18 @@ case class MapType(keyType: DataType, valueType: DataType, valueContainsNull: Bo
   override private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = {
     f(this) || keyType.existsRecursively(f) || valueType.existsRecursively(f)
   }
+
+  override private[spark] def transformRecursively(
+      f: PartialFunction[DataType, DataType]): DataType = {
+    if (f.isDefinedAt(this)) {
+      f(this)
+    } else {
+      MapType(
+        keyType.transformRecursively(f),
+        valueType.transformRecursively(f),
+        valueContainsNull)
+    }
+  }
 }
 
 /**
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala
index 1eb645e37c4aa..cd3182ab2dcde 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala
@@ -21,6 +21,7 @@ import org.json4s.JsonAST.{JString, JValue}
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.util.CollationFactory
+import org.apache.spark.sql.internal.SqlApiConf
 
 /**
  * The data type representing `String` values. Please use the singleton `DataTypes.StringType`.
@@ -30,7 +31,11 @@ import org.apache.spark.sql.catalyst.util.CollationFactory
  *   The id of collation for this StringType.
  */
 @Stable
-class StringType private (val collationId: Int) extends AtomicType with Serializable {
+class StringType private[sql] (
+    val collationId: Int,
+    val constraint: StringConstraint = NoConstraint)
+    extends AtomicType
+    with Serializable {
 
   /**
    * Support for Binary Equality implies that strings are considered equal only if they are byte
@@ -39,7 +44,8 @@ class StringType private (val collationId: Int) extends AtomicType with Serializ
    * equality and hashing).
    */
   private[sql] def supportsBinaryEquality: Boolean =
-    CollationFactory.fetchCollation(collationId).supportsBinaryEquality
+    collationId == CollationFactory.UTF8_BINARY_COLLATION_ID ||
+      CollationFactory.fetchCollation(collationId).supportsBinaryEquality
 
   private[sql] def supportsLowercaseEquality: Boolean =
     CollationFactory.fetchCollation(collationId).supportsLowercaseEquality
@@ -75,15 +81,26 @@ class StringType private (val collationId: Int) extends AtomicType with Serializ
    */
   override def typeName: String =
     if (isUTF8BinaryCollation) "string"
-    else s"string collate ${CollationFactory.fetchCollation(collationId).collationName}"
+    else s"string collate $collationName"
+
+  override def toString: String =
+    if (isUTF8BinaryCollation) "StringType"
+    else s"StringType($collationName)"
+
+  private[sql] def collationName: String =
+    CollationFactory.fetchCollation(collationId).collationName
 
   // Due to backwards compatibility and compatibility with other readers
   // all string types are serialized in json as regular strings and
   // the collation information is written to struct field metadata
   override def jsonValue: JValue = JString("string")
 
-  override def equals(obj: Any): Boolean =
-    obj.isInstanceOf[StringType] && obj.asInstanceOf[StringType].collationId == collationId
+  override def equals(obj: Any): Boolean = {
+    obj match {
+      case s: StringType => s.collationId == collationId && s.constraint == constraint
+      case _ => false
+    }
+  }
 
   override def hashCode(): Int = collationId.hashCode()
 
@@ -101,7 +118,8 @@ class StringType private (val collationId: Int) extends AtomicType with Serializ
  * @since 1.3.0
  */
 @Stable
-case object StringType extends StringType(0) {
+case object StringType
+    extends StringType(CollationFactory.UTF8_BINARY_COLLATION_ID, NoConstraint) {
   private[spark] def apply(collationId: Int): StringType = new StringType(collationId)
 
   def apply(collation: String): StringType = {
@@ -109,3 +127,65 @@ case object StringType extends StringType(0) {
     new StringType(collationId)
   }
 }
+
+sealed trait StringConstraint
+
+case object StringHelper extends PartialOrdering[StringConstraint] {
+  override def tryCompare(x: StringConstraint, y: StringConstraint): Option[Int] = {
+    (x, y) match {
+      case (NoConstraint, NoConstraint) => Some(0)
+      case (NoConstraint, _) => Some(-1)
+      case (_, NoConstraint) => Some(1)
+      case (FixedLength(l1), FixedLength(l2)) => Some(l2.compareTo(l1))
+      case (FixedLength(l1), MaxLength(l2)) if l1 <= l2 => Some(1)
+      case (MaxLength(l1), FixedLength(l2)) if l1 >= l2 => Some(-1)
+      case (MaxLength(l1), MaxLength(l2)) => Some(l2.compareTo(l1))
+      case _ => None
+    }
+  }
+
+  override def lteq(x: StringConstraint, y: StringConstraint): Boolean = {
+    tryCompare(x, y).exists(_ <= 0)
+  }
+
+  override def gteq(x: StringConstraint, y: StringConstraint): Boolean = {
+    tryCompare(x, y).exists(_ >= 0)
+  }
+
+  override def equiv(x: StringConstraint, y: StringConstraint): Boolean = {
+    tryCompare(x, y).contains(0)
+  }
+
+  def isPlainString(s: StringType): Boolean = s.constraint == NoConstraint
+
+  def isMoreConstrained(a: StringType, b: StringType): Boolean =
+    gteq(a.constraint, b.constraint)
+
+  def tightestCommonString(s1: StringType, s2: StringType): Option[StringType] = {
+    if (s1.collationId != s2.collationId) {
+      return None
+    }
+    if (!SqlApiConf.get.preserveCharVarcharTypeInfo) {
+      return Some(StringType(s1.collationId))
+    }
+    Some((s1.constraint, s2.constraint) match {
+      case (FixedLength(l1), FixedLength(l2)) => CharType(l1.max(l2))
+      case (MaxLength(l1), FixedLength(l2)) => VarcharType(l1.max(l2))
+      case (FixedLength(l1), MaxLength(l2)) => VarcharType(l1.max(l2))
+      case (MaxLength(l1), MaxLength(l2)) => VarcharType(l1.max(l2))
+      case _ => StringType(s1.collationId)
+    })
+  }
+
+  def removeCollation(s: StringType): StringType = s match {
+    case CharType(length) => CharType(length)
+    case VarcharType(length) => VarcharType(length)
+    case _: StringType => StringType
+  }
+}
+
+case object NoConstraint extends StringConstraint
+
+case class FixedLength(length: Int) extends StringConstraint
+
+case class MaxLength(length: Int) extends StringConstraint
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
index d4e590629921c..f33a49e686a59 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
@@ -147,6 +147,18 @@ case class StructField(
     if (metadata.contains("comment")) Option(metadata.getString("comment")) else None
   }
 
+  /**
+   * Return the default value of this StructField. This is used for storing the default value of a
+   * function parameter.
+   */
+  private[sql] def getDefault(): Option[String] = {
+    if (metadata.contains("default")) {
+      Option(metadata.getString("default"))
+    } else {
+      None
+    }
+  }
+
   /**
    * Updates the StructField with a new current default value.
    */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 07f6b50bd4a7a..cc95d8ee94b02 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -502,6 +502,18 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   override private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = {
     f(this) || fields.exists(field => field.dataType.existsRecursively(f))
   }
+
+  override private[spark] def transformRecursively(
+      f: PartialFunction[DataType, DataType]): DataType = {
+    if (f.isDefinedAt(this)) {
+      return f(this)
+    }
+
+    val newFields = fields.map { field =>
+      field.copy(dataType = field.dataType.transformRecursively(f))
+    }
+    StructType(newFields)
+  }
 }
 
 /**
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala
index 4993e249b3059..6272cb03bd797 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala
@@ -40,8 +40,11 @@ private[sql] object UpCastRule {
     case (DateType, TimestampNTZType) => true
     case (TimestampNTZType, TimestampType) => true
     case (TimestampType, TimestampNTZType) => true
-    case (_: AtomicType, StringType) => true
-    case (_: CalendarIntervalType, StringType) => true
+
+    case (s1: StringType, s2: StringType) => StringHelper.isMoreConstrained(s1, s2)
+    // TODO: allow upcast from int/double/decimal to char/varchar of sufficient length
+    case (_: AtomicType, s: StringType) => StringHelper.isPlainString(s)
+    case (_: CalendarIntervalType, s: StringType) => StringHelper.isPlainString(s)
     case (NullType, _) => true
 
     // Spark supports casting between long and timestamp, please see `longToTimestamp` and
@@ -69,7 +72,7 @@ private[sql] object UpCastRule {
     case _ => false
   }
 
-  private def legalNumericPrecedence(from: DataType, to: DataType): Boolean = {
+  def legalNumericPrecedence(from: DataType, to: DataType): Boolean = {
     val fromPrecedence = numericPrecedence.indexOf(from)
     val toPrecedence = numericPrecedence.indexOf(to)
     fromPrecedence >= 0 && fromPrecedence < toPrecedence
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/VarcharType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/VarcharType.scala
index 3d21e2e65804e..22f7947b25037 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/VarcharType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/VarcharType.scala
@@ -16,14 +16,19 @@
  */
 package org.apache.spark.sql.types
 
+import org.json4s.JsonAST.{JString, JValue}
+
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.util.CollationFactory
 
 @Experimental
-case class VarcharType(length: Int) extends AtomicType {
+case class VarcharType(length: Int)
+    extends StringType(CollationFactory.UTF8_BINARY_COLLATION_ID, MaxLength(length)) {
   require(length >= 0, "The length of varchar type cannot be negative.")
 
   override def defaultSize: Int = length
   override def typeName: String = s"varchar($length)"
+  override def jsonValue: JValue = JString(typeName)
   override def toString: String = s"VarcharType($length)"
   private[spark] override def asNullable: VarcharType = this
 }
diff --git a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk21-results.txt
index 290568730a22c..24bc5a5efcaae 100644
--- a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk21-results.txt
@@ -2,10 +2,10 @@
 CalendarInterval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 CalendarInterval:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Call setInterval & getInterval                     1028           1028           0        130.5           7.7       1.0X
+Call setInterval & getInterval                     1040           1051          16        129.1           7.7       1.0X
 
 
diff --git a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt
index 526008a3fced1..a5bd7ce02cc8f 100644
--- a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt
@@ -2,10 +2,10 @@
 CalendarInterval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 CalendarInterval:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Call setInterval & getInterval                     1018           1021           4        131.8           7.6       1.0X
+Call setInterval & getInterval                     1030           1030           0        130.3           7.7       1.0X
 
 
diff --git a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk21-results.txt
index b5635dcb20d33..8b9dd199d9df8 100644
--- a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk21-results.txt
@@ -1,105 +1,105 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use empty Set:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0       1390.0           0.7       1.0X
-Use EnumSet                                           2              2           0        441.2           2.3       0.3X
+Use HashSet                                           3              3           0        291.9           3.4       1.0X
+Use EnumSet                                           4              4           0        227.7           4.4       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 1 item Set:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0        485.8           2.1       1.0X
-Use EnumSet                                           2              2           0        544.4           1.8       1.1X
+Use HashSet                                           7              8           1        138.0           7.2       1.0X
+Use EnumSet                                           5              5           0        185.8           5.4       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 3 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0        493.1           2.0       1.0X
-Use EnumSet                                           2              2           0        575.2           1.7       1.2X
+Use HashSet                                          14             14           0         71.9          13.9       1.0X
+Use EnumSet                                           5              5           0        186.1           5.4       2.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 5 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           9              9           0        107.2           9.3       1.0X
-Use EnumSet                                           2              2           0        534.9           1.9       5.0X
+Use HashSet                                          11             11           1         91.4          10.9       1.0X
+Use EnumSet                                           5              5           0        186.1           5.4       2.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 10 items Set:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          10             10           0         98.5          10.1       1.0X
-Use EnumSet                                           2              2           0        534.9           1.9       5.4X
+Use HashSet                                          12             13           0         80.5          12.4       1.0X
+Use EnumSet                                           5              5           0        188.4           5.3       2.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create empty Set:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           0              0           0        408.8           2.4       1.0X
-Use EnumSet                                           1              1           0        136.6           7.3       0.3X
+Use HashSet                                           0              0           0        397.1           2.5       1.0X
+Use EnumSet                                           0              0           0        291.5           3.4       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create 1 item Set:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0        102.5           9.8       1.0X
-Use EnumSet                                           0              0           0        291.4           3.4       2.8X
+Use HashSet                                           2              2           0         49.9          20.0       1.0X
+Use EnumSet                                           0              0           0        291.2           3.4       5.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create 3 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           7              7           0         14.6          68.6       1.0X
-Use EnumSet                                           1              1           0        132.3           7.6       9.1X
+Use HashSet                                           6              6           0         16.0          62.4       1.0X
+Use EnumSet                                           1              1           0        132.4           7.6       8.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create 5 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          34             35           0          2.9         342.4       1.0X
-Use EnumSet                                           1              1           0        150.1           6.7      51.4X
+Use HashSet                                          29             30           1          3.4         292.1       1.0X
+Use EnumSet                                           1              1           0        150.4           6.7      43.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create 10 items Set:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          63             63           1          1.6         627.2       1.0X
-Use EnumSet                                           1              1           0        138.3           7.2      86.8X
+Use HashSet                                          56             56           1          1.8         557.0       1.0X
+Use EnumSet                                           1              1           0        138.6           7.2      77.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use empty Set:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           4              4           0        227.4           4.4       1.0X
-Use EnumSet                                           5              5           0        187.2           5.3       0.8X
+Use HashSet                                           4              4           0        265.6           3.8       1.0X
+Use EnumSet                                           5              5           0        196.1           5.1       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 1 item Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          14             14           0         70.4          14.2       1.0X
-Use EnumSet                                           7              7           0        150.5           6.6       2.1X
+Use HashSet                                           9             10           0        110.6           9.0       1.0X
+Use EnumSet                                           6              6           0        160.0           6.3       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 3 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          27             28           0         36.6          27.3       1.0X
-Use EnumSet                                           7              7           0        151.3           6.6       4.1X
+Use HashSet                                          22             22           0         45.4          22.0       1.0X
+Use EnumSet                                           6              6           0        163.3           6.1       3.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 5 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          54             55           1         18.4          54.4       1.0X
-Use EnumSet                                           7              7           0        147.6           6.8       8.0X
+Use HashSet                                          49             49           0         20.4          49.1       1.0X
+Use EnumSet                                           6              6           0        158.7           6.3       7.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 10 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           84             85           0         11.9          83.9       1.0X
-Use EnumSet                                            7              7           0        137.2           7.3      11.5X
+Use HashSet                                           76             77           1         13.1          76.2       1.0X
+Use EnumSet                                            6              7           0        159.0           6.3      12.1X
 
diff --git a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
index 1794f82b64b11..4b1c3dce2b115 100644
--- a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
@@ -1,105 +1,105 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use empty Set:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0       1441.2           0.7       1.0X
-Use EnumSet                                           2              2           0        563.7           1.8       0.4X
+Use HashSet                                           4              4           0        279.7           3.6       1.0X
+Use EnumSet                                           4              4           0        225.9           4.4       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 1 item Set:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0        445.8           2.2       1.0X
-Use EnumSet                                           2              2           0        554.4           1.8       1.2X
+Use HashSet                                           9              9           1        110.9           9.0       1.0X
+Use EnumSet                                           5              5           0        185.6           5.4       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 3 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0        547.8           1.8       1.0X
-Use EnumSet                                           2              2           0        561.3           1.8       1.0X
+Use HashSet                                          14             14           2         74.0          13.5       1.0X
+Use EnumSet                                           5              5           0        185.6           5.4       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 5 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           8              8           0        121.9           8.2       1.0X
-Use EnumSet                                           2              2           0        545.1           1.8       4.5X
+Use HashSet                                          14             14           1         71.1          14.1       1.0X
+Use EnumSet                                           5              5           0        185.7           5.4       2.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 10 items Set:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           9              9           0        110.1           9.1       1.0X
-Use EnumSet                                           2              2           0        545.0           1.8       5.0X
+Use HashSet                                          15             15           0         68.2          14.7       1.0X
+Use EnumSet                                           5              5           0        185.7           5.4       2.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create empty Set:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           0              0           0        409.8           2.4       1.0X
-Use EnumSet                                           1              1           0        127.6           7.8       0.3X
+Use HashSet                                           0              0           0        407.9           2.5       1.0X
+Use EnumSet                                           0              0           0        225.2           4.4       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create 1 item Set:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0         60.0          16.7       1.0X
-Use EnumSet                                           1              1           0        145.0           6.9       2.4X
+Use HashSet                                           2              2           0         48.3          20.7       1.0X
+Use EnumSet                                           1              1           0         87.6          11.4       1.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create 3 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          10             10           0         10.2          97.7       1.0X
-Use EnumSet                                           1              1           0        137.8           7.3      13.5X
+Use HashSet                                          10             11           1          9.6         103.8       1.0X
+Use EnumSet                                           1              1           0        103.2           9.7      10.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create 5 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          33             33           1          3.1         327.8       1.0X
-Use EnumSet                                           1              1           0        137.9           7.3      45.2X
+Use HashSet                                          40             40           0          2.5         395.0       1.0X
+Use EnumSet                                           1              1           0         99.3          10.1      39.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create 10 items Set:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          60             60           0          1.7         596.5       1.0X
-Use EnumSet                                           1              1           0        131.7           7.6      78.6X
+Use HashSet                                          64             64           1          1.6         639.0       1.0X
+Use EnumSet                                           1              1           0        108.8           9.2      69.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use empty Set:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           4              4           0        250.2           4.0       1.0X
-Use EnumSet                                           5              5           0        190.5           5.2       0.8X
+Use HashSet                                           5              5           0        215.6           4.6       1.0X
+Use EnumSet                                           5              5           0        194.3           5.1       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 1 item Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          13             14           0         74.9          13.3       1.0X
-Use EnumSet                                           7              7           0        148.9           6.7       2.0X
+Use HashSet                                          10             11           1         98.4          10.2       1.0X
+Use EnumSet                                           6              6           0        159.7           6.3       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 3 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          32             33           1         31.6          31.7       1.0X
-Use EnumSet                                           7              7           0        150.4           6.7       4.8X
+Use HashSet                                          25             26           1         40.4          24.7       1.0X
+Use EnumSet                                           6              6           0        158.8           6.3       3.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 5 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          55             58           9         18.2          55.1       1.0X
-Use EnumSet                                           7              7           0        146.6           6.8       8.1X
+Use HashSet                                          54             55           1         18.4          54.3       1.0X
+Use EnumSet                                           6              7           0        155.6           6.4       8.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 10 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           82             83           1         12.1          82.3       1.0X
-Use EnumSet                                            7              7           0        145.2           6.9      12.0X
+Use HashSet                                           80             82           1         12.4          80.3       1.0X
+Use EnumSet                                            6              7           0        156.7           6.4      12.6X
 
diff --git a/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt
index 73f125fc87862..fa12bcbaa3c38 100644
--- a/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt
@@ -2,23 +2,23 @@
 Escape
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Escape Tests:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Legacy                                             7441           7453          11          0.1        7440.7       1.0X
-New                                                 768            770           1          1.3         768.3       9.7X
+Legacy                                             9203           9215           8          0.1        9203.3       1.0X
+New                                                 813            816           2          1.2         813.1      11.3X
 
 
 ================================================================================================
 Unescape
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Unescape Tests:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Legacy                                             4446           4454           5          0.2        4446.4       1.0X
-New                                                 605            611           3          1.7         605.1       7.3X
+Legacy                                             4679           4687           5          0.2        4678.5       1.0X
+New                                                 590            595           5          1.7         589.7       7.9X
 
 
diff --git a/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt b/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt
index 87f5177d28715..dcdef85ea89d8 100644
--- a/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt
@@ -2,23 +2,23 @@
 Escape
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Escape Tests:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Legacy                                             6851           6867           7          0.1        6850.8       1.0X
-New                                                 741            755          38          1.3         741.0       9.2X
+Legacy                                             8620           8633          11          0.1        8620.5       1.0X
+New                                                 779            786           4          1.3         779.3      11.1X
 
 
 ================================================================================================
 Unescape
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Unescape Tests:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Legacy                                             5810           5823          15          0.2        5809.8       1.0X
-New                                                 597            602           5          1.7         596.6       9.7X
+Legacy                                             5714           5728           8          0.2        5714.0       1.0X
+New                                                 593            597           3          1.7         592.5       9.6X
 
 
diff --git a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk21-results.txt
index b09cc75270118..edf44bac9a395 100644
--- a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk21-results.txt
@@ -1,10 +1,10 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 constructor:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-arrayOfAny                                            3              3           0       3243.9           0.3       1.0X
-arrayOfAnyAsObject                                    3              3           0       3243.9           0.3       1.0X
-arrayOfAnyAsSeq                                     225            230           6         44.5          22.5       0.0X
-arrayOfInt                                          273            278           5         36.7          27.3       0.0X
-arrayOfIntAsObject                                  274            278           3         36.5          27.4       0.0X
+arrayOfAny                                            6              6           0       1611.9           0.6       1.0X
+arrayOfAnyAsObject                                    6              6           0       1611.9           0.6       1.0X
+arrayOfAnyAsSeq                                     175            175           1         57.3          17.5       0.0X
+arrayOfInt                                          271            272           0         36.8          27.1       0.0X
+arrayOfIntAsObject                                  250            251           1         40.0          25.0       0.0X
 
diff --git a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
index 56d0a136c2933..fae20f2b0ac35 100644
--- a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
@@ -1,10 +1,10 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 constructor:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-arrayOfAny                                            6              6           0       1619.9           0.6       1.0X
-arrayOfAnyAsObject                                    6              6           0       1619.9           0.6       1.0X
-arrayOfAnyAsSeq                                     157            158           1         63.5          15.7       0.0X
-arrayOfInt                                          252            254           4         39.6          25.2       0.0X
-arrayOfIntAsObject                                  252            253           2         39.6          25.2       0.0X
+arrayOfAny                                            6              6           0       1611.8           0.6       1.0X
+arrayOfAnyAsObject                                    6              6           0       1611.9           0.6       1.0X
+arrayOfAnyAsSeq                                     157            157           2         63.8          15.7       0.0X
+arrayOfInt                                          253            254           0         39.5          25.3       0.0X
+arrayOfIntAsObject                                  253            254           1         39.5          25.3       0.0X
 
diff --git a/sql/catalyst/benchmarks/HashBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/HashBenchmark-jdk21-results.txt
index d246505fc26ca..829099b78e422 100644
--- a/sql/catalyst/benchmarks/HashBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/HashBenchmark-jdk21-results.txt
@@ -2,69 +2,69 @@
 single ints
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2151           2154           4        249.6           4.0       1.0X
-codegen version                                    3580           3597          23        149.9           6.7       0.6X
-codegen version 64-bit                             3385           3408          33        158.6           6.3       0.6X
-codegen HiveHash version                           2884           2886           2        186.1           5.4       0.7X
+interpreted version                                2089           2090           2        257.0           3.9       1.0X
+codegen version                                    3541           3544           4        151.6           6.6       0.6X
+codegen version 64-bit                             3238           3269          44        165.8           6.0       0.6X
+codegen HiveHash version                           2563           2568           8        209.5           4.8       0.8X
 
 
 ================================================================================================
 single longs
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2710           2711           1        198.1           5.0       1.0X
-codegen version                                    5082           5083           0        105.6           9.5       0.5X
-codegen version 64-bit                             3962           3964           3        135.5           7.4       0.7X
-codegen HiveHash version                           3309           3310           1        162.2           6.2       0.8X
+interpreted version                                2718           2719           1        197.5           5.1       1.0X
+codegen version                                    4520           4525           8        118.8           8.4       0.6X
+codegen version 64-bit                             3863           3874          15        139.0           7.2       0.7X
+codegen HiveHash version                           3158           3161           4        170.0           5.9       0.9X
 
 
 ================================================================================================
 normal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                1425           1425           1          1.5         679.5       1.0X
-codegen version                                    1648           1648           1          1.3         785.8       0.9X
-codegen version 64-bit                              725            726           2          2.9         345.7       2.0X
-codegen HiveHash version                           3675           3677           3          0.6        1752.2       0.4X
+interpreted version                                1335           1341           8          1.6         636.6       1.0X
+codegen version                                    1803           1803           0          1.2         859.9       0.7X
+codegen version 64-bit                              735            735           0          2.9         350.3       1.8X
+codegen HiveHash version                           3635           3639           6          0.6        1733.2       0.4X
 
 
 ================================================================================================
 array
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                1018           1019           1          0.1        7768.0       1.0X
-codegen version                                    3632           3633           2          0.0       27706.7       0.3X
-codegen version 64-bit                             2340           2342           3          0.1       17849.7       0.4X
-codegen HiveHash version                            750            751           1          0.2        5721.5       1.4X
+interpreted version                                 958            959           2          0.1        7308.1       1.0X
+codegen version                                    3436           3441           7          0.0       26216.6       0.3X
+codegen version 64-bit                             2352           2353           1          0.1       17945.7       0.4X
+codegen HiveHash version                            685            689           5          0.2        5227.3       1.4X
 
 
 ================================================================================================
 map
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                   0              0           0         97.3          10.3       1.0X
-codegen version                                     249            249           1          0.0       60732.6       0.0X
-codegen version 64-bit                              169            170           1          0.0       41356.6       0.0X
-codegen HiveHash version                             27             28           0          0.1        6709.4       0.0X
+interpreted version                                   0              0           0         90.4          11.1       1.0X
+codegen version                                     271            271           0          0.0       66159.1       0.0X
+codegen version 64-bit                              185            185           0          0.0       45145.5       0.0X
+codegen HiveHash version                             30             30           0          0.1        7378.6       0.0X
 
 
diff --git a/sql/catalyst/benchmarks/HashBenchmark-results.txt b/sql/catalyst/benchmarks/HashBenchmark-results.txt
index 571a8a1d82881..30934fe57cd42 100644
--- a/sql/catalyst/benchmarks/HashBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HashBenchmark-results.txt
@@ -2,69 +2,69 @@
 single ints
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2276           2276           0        235.9           4.2       1.0X
-codegen version                                    3664           3669           7        146.5           6.8       0.6X
-codegen version 64-bit                             3478           3483           6        154.3           6.5       0.7X
-codegen HiveHash version                           3008           3010           3        178.5           5.6       0.8X
+interpreted version                                2157           2161           6        248.9           4.0       1.0X
+codegen version                                    3655           3660           6        146.9           6.8       0.6X
+codegen version 64-bit                             3509           3510           1        153.0           6.5       0.6X
+codegen HiveHash version                           2857           2859           3        187.9           5.3       0.8X
 
 
 ================================================================================================
 single longs
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                3006           3007           2        178.6           5.6       1.0X
-codegen version                                    5317           5322           7        101.0           9.9       0.6X
-codegen version 64-bit                             3761           3765           6        142.8           7.0       0.8X
-codegen HiveHash version                           3401           3429          41        157.9           6.3       0.9X
+interpreted version                                3009           3011           4        178.4           5.6       1.0X
+codegen version                                    5332           5336           6        100.7           9.9       0.6X
+codegen version 64-bit                             3997           3999           2        134.3           7.4       0.8X
+codegen HiveHash version                           3310           3310           1        162.2           6.2       0.9X
 
 
 ================================================================================================
 normal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2549           2549           0          0.8        1215.4       1.0X
-codegen version                                    2291           2292           1          0.9        1092.5       1.1X
-codegen version 64-bit                              724            726           2          2.9         345.4       3.5X
-codegen HiveHash version                           3719           3726          10          0.6        1773.2       0.7X
+interpreted version                                2557           2557           0          0.8        1219.1       1.0X
+codegen version                                    2217           2218           1          0.9        1057.3       1.2X
+codegen version 64-bit                              703            704           1          3.0         335.3       3.6X
+codegen HiveHash version                           3734           3741          10          0.6        1780.7       0.7X
 
 
 ================================================================================================
 array
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                 990            992           4          0.1        7549.7       1.0X
-codegen version                                    3619           3619           0          0.0       27611.8       0.3X
-codegen version 64-bit                             2385           2386           0          0.1       18199.3       0.4X
-codegen HiveHash version                            727            727           0          0.2        5543.0       1.4X
+interpreted version                                 951            954           3          0.1        7252.8       1.0X
+codegen version                                    3450           3452           3          0.0       26319.1       0.3X
+codegen version 64-bit                             2296           2296           0          0.1       17516.4       0.4X
+codegen HiveHash version                            703            704           1          0.2        5360.9       1.4X
 
 
 ================================================================================================
 map
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                   0              0           0        101.8           9.8       1.0X
-codegen version                                     240            240           0          0.0       58478.2       0.0X
-codegen version 64-bit                              169            170           0          0.0       41373.6       0.0X
-codegen HiveHash version                             29             29           0          0.1        7006.4       0.0X
+interpreted version                                   0              0           0         97.4          10.3       1.0X
+codegen version                                     223            223           0          0.0       54377.2       0.0X
+codegen version 64-bit                              152            152           0          0.0       37102.0       0.0X
+codegen HiveHash version                             26             26           0          0.2        6290.5       0.0X
 
 
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk21-results.txt
index 4e4d54c6da6fe..e1cfa115e2730 100644
--- a/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk21-results.txt
@@ -2,76 +2,76 @@
 Benchmark for MurMurHash 3 and xxHash64
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       10             10           0        205.6           4.9       1.0X
-xxHash 64-bit                                        11             11           0        190.0           5.3       0.9X
-HiveHasher                                           14             14           0        152.3           6.6       0.7X
+Murmur3_x86_32                                       10             10           0        205.7           4.9       1.0X
+xxHash 64-bit                                        10             10           0        200.6           5.0       1.0X
+HiveHasher                                           14             14           1        151.9           6.6       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       14             15           0        145.8           6.9       1.0X
-xxHash 64-bit                                        13             13           0        161.0           6.2       1.1X
-HiveHasher                                           23             23           1         92.4          10.8       0.6X
+Murmur3_x86_32                                       14             15           0        145.5           6.9       1.0X
+xxHash 64-bit                                        12             13           2        168.2           5.9       1.2X
+HiveHasher                                           23             23           1         90.9          11.0       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       20             20           0        104.9           9.5       1.0X
-xxHash 64-bit                                        15             15           0        139.7           7.2       1.3X
-HiveHasher                                           34             34           0         61.9          16.1       0.6X
+Murmur3_x86_32                                       20             20           0        104.0           9.6       1.0X
+xxHash 64-bit                                        15             15           0        143.1           7.0       1.4X
+HiveHasher                                           34             34           0         62.5          16.0       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       31             31           0         68.5          14.6       1.0X
-xxHash 64-bit                                        26             26           0         80.0          12.5       1.2X
-HiveHasher                                           45             45           1         46.9          21.3       0.7X
+Murmur3_x86_32                                       32             32           0         65.9          15.2       1.0X
+xxHash 64-bit                                        27             28           0         76.3          13.1       1.2X
+HiveHasher                                           44             44           0         48.1          20.8       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       68             70           3         30.7          32.6       1.0X
-xxHash 64-bit                                        57             57           0         36.8          27.2       1.2X
-HiveHasher                                          158            158           0         13.3          75.5       0.4X
+Murmur3_x86_32                                       70             71           0         29.8          33.6       1.0X
+xxHash 64-bit                                        58             58           0         36.4          27.5       1.2X
+HiveHasher                                          157            157           0         13.4          74.8       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      208            210           3         10.1          99.2       1.0X
-xxHash 64-bit                                       102            103           0         20.5          48.8       2.0X
-HiveHasher                                          531            532           0          3.9         253.4       0.4X
+Murmur3_x86_32                                      198            198           0         10.6          94.5       1.0X
+xxHash 64-bit                                       102            102           0         20.6          48.6       1.9X
+HiveHasher                                          533            533           0          3.9         254.0       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      711            718          12          3.0         338.9       1.0X
-xxHash 64-bit                                       296            298           4          7.1         141.0       2.4X
-HiveHasher                                         2031           2032           2          1.0         968.4       0.4X
+Murmur3_x86_32                                      709            717          13          3.0         338.3       1.0X
+xxHash 64-bit                                       293            294           1          7.2         139.8       2.4X
+HiveHasher                                         2042           2043           1          1.0         973.9       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     1398           1401           5          1.5         666.7       1.0X
-xxHash 64-bit                                       552            553           1          3.8         263.3       2.5X
-HiveHasher                                         4026           4026           0          0.5        1919.5       0.3X
+Murmur3_x86_32                                     1388           1388           1          1.5         661.7       1.0X
+xxHash 64-bit                                       550            550           1          3.8         262.2       2.5X
+HiveHasher                                         4050           4052           3          0.5        1931.1       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     5707           5709           3          0.4        2721.3       1.0X
-xxHash 64-bit                                      2074           2074           1          1.0         988.7       2.8X
-HiveHasher                                        15993          15993           0          0.1        7626.2       0.4X
+Murmur3_x86_32                                     5726           5727           1          0.4        2730.6       1.0X
+xxHash 64-bit                                      2068           2069           2          1.0         986.1       2.8X
+HiveHasher                                        16089          16098          12          0.1        7671.8       0.4X
 
 
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
index 236b9e5b404d4..d49fd90cd3707 100644
--- a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
@@ -2,76 +2,76 @@
 Benchmark for MurMurHash 3 and xxHash64
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       11             11           0        184.1           5.4       1.0X
-xxHash 64-bit                                        10             10           0        214.5           4.7       1.2X
-HiveHasher                                           14             14           0        150.9           6.6       0.8X
+Murmur3_x86_32                                       11             12           0        183.2           5.5       1.0X
+xxHash 64-bit                                        10             10           0        213.3           4.7       1.2X
+HiveHasher                                           14             14           0        149.8           6.7       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       17             17           0        123.5           8.1       1.0X
-xxHash 64-bit                                        12             12           0        176.1           5.7       1.4X
-HiveHasher                                           22             23           0         93.3          10.7       0.8X
+Murmur3_x86_32                                       17             17           0        123.2           8.1       1.0X
+xxHash 64-bit                                        12             12           0        175.5           5.7       1.4X
+HiveHasher                                           23             23           0         92.6          10.8       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       23             24           0         89.4          11.2       1.0X
-xxHash 64-bit                                        14             14           0        145.9           6.9       1.6X
-HiveHasher                                           33             33           0         63.2          15.8       0.7X
+Murmur3_x86_32                                       24             24           0         89.0          11.2       1.0X
+xxHash 64-bit                                        14             15           0        145.4           6.9       1.6X
+HiveHasher                                           33             33           0         62.9          15.9       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       36             36           0         58.7          17.0       1.0X
-xxHash 64-bit                                        27             28           0         76.4          13.1       1.3X
-HiveHasher                                           42             44           5         49.4          20.2       0.8X
+Murmur3_x86_32                                       36             36           0         59.1          16.9       1.0X
+xxHash 64-bit                                        28             28           0         76.0          13.2       1.3X
+HiveHasher                                           43             44           4         49.2          20.3       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       87             87           0         24.2          41.3       1.0X
-xxHash 64-bit                                        61             62           0         34.1          29.3       1.4X
-HiveHasher                                          158            158           0         13.3          75.2       0.5X
+Murmur3_x86_32                                       87             87           0         24.0          41.6       1.0X
+xxHash 64-bit                                        64             64           0         32.9          30.4       1.4X
+HiveHasher                                          159            159           0         13.2          75.6       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      244            244           0          8.6         116.3       1.0X
-xxHash 64-bit                                       117            117           1         18.0          55.6       2.1X
-HiveHasher                                          531            531           0          3.9         253.4       0.5X
+Murmur3_x86_32                                      244            245           1          8.6         116.4       1.0X
+xxHash 64-bit                                       123            123           0         17.1          58.5       2.0X
+HiveHasher                                          534            535           0          3.9         254.8       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      873            873           0          2.4         416.1       1.0X
-xxHash 64-bit                                       387            388           1          5.4         184.6       2.3X
-HiveHasher                                         2032           2032           0          1.0         968.7       0.4X
+Murmur3_x86_32                                      878            879           0          2.4         418.9       1.0X
+xxHash 64-bit                                       400            401           1          5.2         190.9       2.2X
+HiveHasher                                         2045           2045           0          1.0         974.9       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     1704           1704           0          1.2         812.5       1.0X
-xxHash 64-bit                                       762            763           1          2.8         363.2       2.2X
-HiveHasher                                         4024           4024           0          0.5        1918.7       0.4X
+Murmur3_x86_32                                     1715           1715           0          1.2         817.9       1.0X
+xxHash 64-bit                                       782            782           0          2.7         372.9       2.2X
+HiveHasher                                         4050           4066          22          0.5        1931.3       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     6702           6703           1          0.3        3195.8       1.0X
-xxHash 64-bit                                      2999           3003           6          0.7        1429.8       2.2X
-HiveHasher                                        15981          15981           1          0.1        7620.1       0.4X
+Murmur3_x86_32                                     6744           6747           3          0.3        3216.0       1.0X
+xxHash 64-bit                                      3043           3044           1          0.7        1451.2       2.2X
+HiveHasher                                        16085          16085           0          0.1        7669.8       0.4X
 
 
diff --git a/sql/catalyst/benchmarks/HexBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/HexBenchmark-jdk21-results.txt
index c1b127d9e7884..88eee350370b9 100644
--- a/sql/catalyst/benchmarks/HexBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/HexBenchmark-jdk21-results.txt
@@ -2,13 +2,13 @@
 UnHex Comparison
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Cardinality 1000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Common Codecs                                      4444           4451          11          0.2        4444.1       1.0X
-Java                                               5500           5533          41          0.2        5500.5       0.8X
-Spark                                              3466           3472           6          0.3        3466.0       1.3X
-Spark Binary                                       2625           2627           2          0.4        2625.3       1.7X
+Common Codecs                                      4912           4952          35          0.2        4912.5       1.0X
+Java                                               5772           5781          14          0.2        5772.1       0.9X
+Spark                                              3482           3488          10          0.3        3482.0       1.4X
+Spark Binary                                       2638           2639           0          0.4        2638.3       1.9X
 
 
diff --git a/sql/catalyst/benchmarks/HexBenchmark-results.txt b/sql/catalyst/benchmarks/HexBenchmark-results.txt
index c544346c34d33..adc459ceb8c7c 100644
--- a/sql/catalyst/benchmarks/HexBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HexBenchmark-results.txt
@@ -2,13 +2,13 @@
 UnHex Comparison
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Cardinality 1000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Common Codecs                                      4794           4800           6          0.2        4793.6       1.0X
-Java                                               4247           4262          16          0.2        4247.2       1.1X
-Spark                                              3957           3963           8          0.3        3957.5       1.2X
-Spark Binary                                       2743           2745           2          0.4        2743.4       1.7X
+Common Codecs                                      4900           4906           5          0.2        4900.0       1.0X
+Java                                               4133           4143          10          0.2        4133.2       1.2X
+Spark                                              3987           3988           1          0.3        3986.6       1.2X
+Spark Binary                                       2762           2766           3          0.4        2761.6       1.8X
 
 
diff --git a/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-jdk21-results.txt
index 1cdf1d8e42753..e852e1f715ba4 100644
--- a/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-jdk21-results.txt
@@ -1,7 +1,7 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 internal row comparable wrapper:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-toSet                                               100            102           3          2.0         500.5       1.0X
-mergePartitions                                     183            185           2          1.1         913.5       0.5X
+toSet                                               100            102           3          2.0         501.6       1.0X
+mergePartitions                                     180            182           2          1.1         900.1       0.6X
 
diff --git a/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-results.txt b/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-results.txt
index b920e5255016e..705c53b53bb3a 100644
--- a/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-results.txt
@@ -1,7 +1,7 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 internal row comparable wrapper:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-toSet                                               113            115           3          1.8         566.2       1.0X
-mergePartitions                                     206            208           2          1.0        1030.5       0.5X
+toSet                                               114            116           2          1.8         570.9       1.0X
+mergePartitions                                     208            209           1          1.0        1040.6       0.5X
 
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk21-results.txt
index 384cce30b67aa..c58763bed6876 100644
--- a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk21-results.txt
@@ -2,13 +2,13 @@
 unsafe projection
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-single long                                        1327           1327           0        202.4           4.9       1.0X
-single nullable long                               2362           2377          22        113.6           8.8       0.6X
-7 primitive types                                  7062           7064           2         38.0          26.3       0.2X
-7 nullable primitive types                        10610          10625          21         25.3          39.5       0.1X
+single long                                        1330           1330           0        201.8           5.0       1.0X
+single nullable long                               2375           2389          20        113.0           8.8       0.6X
+7 primitive types                                  7116           7120           6         37.7          26.5       0.2X
+7 nullable primitive types                        10688          10694           8         25.1          39.8       0.1X
 
 
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
index 60c49d2917eb5..06fb444c3e730 100644
--- a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
@@ -2,13 +2,13 @@
 unsafe projection
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-single long                                        1363           1363           1        197.0           5.1       1.0X
-single nullable long                               2454           2456           2        109.4           9.1       0.6X
-7 primitive types                                  6944           6946           2         38.7          25.9       0.2X
-7 nullable primitive types                        10300          10314          19         26.1          38.4       0.1X
+single long                                        1380           1382           3        194.5           5.1       1.0X
+single nullable long                               2449           2450           2        109.6           9.1       0.6X
+7 primitive types                                  7002           7003           2         38.3          26.1       0.2X
+7 nullable primitive types                        10355          10370          20         25.9          38.6       0.1X
 
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index 4200619d3c5f9..310d18ddb3486 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -51,7 +51,7 @@ public class ExpressionInfo {
             "window_funcs", "xml_funcs", "table_funcs", "url_funcs", "variant_funcs"));
 
     private static final Set<String> validSources =
-            new HashSet<>(Arrays.asList("built-in", "hive", "python_udf", "scala_udf",
+            new HashSet<>(Arrays.asList("built-in", "hive", "python_udf", "scala_udf", "sql_udf",
                     "java_udf", "python_udtf", "internal"));
 
     public String getClassName() {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatch.java
index c057c36ca8204..5074348a1fd6a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatch.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatch.java
@@ -174,7 +174,7 @@ public final UnsafeRow getValueRow(int rowId) {
    */
   @Override
   public final long spill(long size, MemoryConsumer trigger) throws IOException {
-    logger.warn("Calling spill() on RowBasedKeyValueBatch. Will not spill but return 0.");
+    logger.debug("Calling spill() on RowBasedKeyValueBatch. Will not spill but return 0.");
     return 0;
   }
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionUtils.java
index 2bad67d426af6..38bdcbec2069d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionUtils.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionUtils.java
@@ -24,7 +24,6 @@
 import com.fasterxml.jackson.core.JsonParser;
 import com.fasterxml.jackson.core.JsonToken;
 
-import org.apache.spark.sql.catalyst.expressions.SharedFactory;
 import org.apache.spark.sql.catalyst.json.CreateJacksonParser;
 import org.apache.spark.sql.catalyst.util.GenericArrayData;
 import org.apache.spark.unsafe.types.UTF8String;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java
index 60b250adb41ef..cbaea8cad8582 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java
@@ -21,7 +21,9 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.connector.metric.CustomTaskMetric;
 import org.apache.spark.sql.connector.write.LogicalWriteInfo;
+import org.apache.spark.sql.connector.write.Write;
 import org.apache.spark.sql.types.StructType;
 
 /**
@@ -52,4 +54,16 @@ public interface StagedTable extends Table {
    * table's writers.
    */
   void abortStagedChanges();
+
+  /**
+   * Retrieve driver metrics after a commit. This is analogous
+   * to {@link Write#reportDriverMetrics()}. Note that these metrics must be included in the
+   * supported custom metrics reported by `supportedCustomMetrics` of the
+   * {@link StagingTableCatalog} that returned the staged table.
+   *
+   * @return an Array of commit metric values. Throws if the table has not been committed yet.
+   */
+  default CustomTaskMetric[] reportDriverMetrics() throws RuntimeException {
+      return new CustomTaskMetric[0];
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
index eead1ade40791..f457a4a3d7863 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
@@ -21,11 +21,13 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.connector.metric.CustomMetric;
 import org.apache.spark.sql.connector.write.LogicalWriteInfo;
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException;
 import org.apache.spark.sql.connector.write.BatchWrite;
+import org.apache.spark.sql.connector.write.Write;
 import org.apache.spark.sql.connector.write.WriterCommitMessage;
 import org.apache.spark.sql.errors.QueryCompilationErrors;
 import org.apache.spark.sql.types.StructType;
@@ -200,4 +202,14 @@ default StagedTable stageCreateOrReplace(
     return stageCreateOrReplace(
       ident, CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties);
   }
+
+  /**
+   * @return An Array of commit metrics that are supported by the catalog. This is analogous to
+   *        {@link Write#supportedCustomMetrics()}. The corresponding
+   *        {@link StagedTable#reportDriverMetrics()} method must be called to
+   *        retrieve the actual metric values after a commit. The methods are not in the same class
+   *        because the supported metrics are required before the staged table object is created
+   *        and only the staged table object can capture the write metrics during the commit.
+   */
+  default CustomMetric[] supportedCustomMetrics() { return new CustomMetric[0]; }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index ba3470f85338c..77dbaa7687b41 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -67,6 +67,11 @@ public interface TableCatalog extends CatalogPlugin {
    */
   String PROP_COMMENT = "comment";
 
+  /**
+   * A reserved property to specify the collation of the table.
+   */
+  String PROP_COLLATION = "collation";
+
   /**
    * A reserved property to specify the provider of the table.
    */
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
index bd2dec9e27be0..49afcd5ebcd50 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -118,7 +118,7 @@ yield visitBinaryArithmetic(
           "COT", "ASIN", "ASINH", "ACOS", "ACOSH", "ATAN", "ATANH", "ATAN2", "CBRT", "DEGREES",
           "RADIANS", "SIGN", "WIDTH_BUCKET", "SUBSTRING", "UPPER", "LOWER", "TRANSLATE",
           "DATE_ADD", "DATE_DIFF", "TRUNC", "AES_ENCRYPT", "AES_DECRYPT", "SHA1", "SHA2", "MD5",
-          "CRC32", "BIT_LENGTH", "CHAR_LENGTH", "CONCAT" ->
+          "CRC32", "BIT_LENGTH", "CHAR_LENGTH", "CONCAT", "RPAD", "LPAD" ->
           visitSQLFunction(name, expressionsToStringArray(e.children()));
         case "CASE_WHEN" -> visitCaseWhen(expressionsToStringArray(e.children()));
         case "TRIM" -> visitTrim("BOTH", expressionsToStringArray(e.children()));
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
index bfb1833b731a7..54b62c00283fa 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
@@ -69,14 +69,14 @@ public abstract class ColumnVector implements AutoCloseable {
   public abstract void close();
 
   /**
-   * Cleans up memory for this column vector if it's not writable. The column vector is not usable
-   * after this.
+   * Cleans up memory for this column vector if it's resources are freeable between batches.
+   * The column vector is not usable after this.
    *
-   * If this is a writable column vector, it is a no-op.
+   * If this is a writable column vector or constant column vector, it is a no-op.
    */
-  public void closeIfNotWritable() {
-    // By default, we just call close() for all column vectors. If a column vector is writable, it
-    // should override this method and do nothing.
+  public void closeIfFreeable() {
+    // By default, we just call close() for all column vectors. If a column vector is writable or
+    // constant, it should override this method and do nothing.
     close();
   }
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
index 52e4115af336a..7ef570a212292 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
@@ -46,12 +46,12 @@ public void close() {
   }
 
   /**
-   * Called to close all the columns if they are not writable. This is used to clean up memory
-   * allocated during columnar processing.
+   * Called to close all the columns if their resources are freeable between batches.
+   * This is used to clean up memory allocated during columnar processing.
    */
-  public void closeIfNotWritable() {
+  public void closeIfFreeable() {
     for (ColumnVector c: columns) {
-      c.closeIfNotWritable();
+      c.closeIfFreeable();
     }
   }
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
index aaac980bb332a..ac05981da5a24 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
@@ -188,6 +188,8 @@ public Object get(int ordinal, DataType dataType) {
       return getInt(ordinal);
     } else if (dataType instanceof TimestampType) {
       return getLong(ordinal);
+    } else if (dataType instanceof TimestampNTZType) {
+      return getLong(ordinal);
     } else if (dataType instanceof ArrayType) {
       return getArray(ordinal);
     } else if (dataType instanceof StructType) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index 2b2a186f76d9d..fab65251ed51b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -66,6 +66,8 @@ object CatalystTypeConverters {
       case arrayType: ArrayType => ArrayConverter(arrayType.elementType)
       case mapType: MapType => MapConverter(mapType.keyType, mapType.valueType)
       case structType: StructType => StructConverter(structType)
+      case CharType(length) => new CharConverter(length)
+      case VarcharType(length) => new VarcharConverter(length)
       case _: StringType => StringConverter
       case DateType if SQLConf.get.datetimeJava8ApiEnabled => LocalDateConverter
       case DateType => DateConverter
@@ -296,6 +298,33 @@ object CatalystTypeConverters {
       toScala(row.getStruct(column, structType.size))
   }
 
+  private class CharConverter(length: Int) extends CatalystTypeConverter[Any, String, UTF8String] {
+    override def toCatalystImpl(scalaValue: Any): UTF8String =
+      CharVarcharCodegenUtils.charTypeWriteSideCheck(
+        StringConverter.toCatalystImpl(scalaValue), length)
+    override def toScala(catalystValue: UTF8String): String = if (catalystValue == null) {
+      null
+    } else {
+      CharVarcharCodegenUtils.charTypeWriteSideCheck(catalystValue, length).toString
+    }
+    override def toScalaImpl(row: InternalRow, column: Int): String =
+      CharVarcharCodegenUtils.charTypeWriteSideCheck(row.getUTF8String(column), length).toString
+  }
+
+  private class VarcharConverter(length: Int)
+    extends CatalystTypeConverter[Any, String, UTF8String] {
+    override def toCatalystImpl(scalaValue: Any): UTF8String =
+      CharVarcharCodegenUtils.varcharTypeWriteSideCheck(
+        StringConverter.toCatalystImpl(scalaValue), length)
+    override def toScala(catalystValue: UTF8String): String = if (catalystValue == null) {
+      null
+    } else {
+      CharVarcharCodegenUtils.varcharTypeWriteSideCheck(catalystValue, length).toString
+    }
+    override def toScalaImpl(row: InternalRow, column: Int): String =
+      CharVarcharCodegenUtils.varcharTypeWriteSideCheck(row.getUTF8String(column), length).toString
+  }
+
   private object StringConverter extends CatalystTypeConverter[Any, String, UTF8String] {
     override def toCatalystImpl(scalaValue: Any): UTF8String = scalaValue match {
       case str: String => UTF8String.fromString(str)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
index 4752434015375..55613b2b20134 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
@@ -20,11 +20,11 @@ package org.apache.spark.sql.catalyst
 import org.apache.spark.sql.catalyst.{expressions => exprs}
 import org.apache.spark.sql.catalyst.analysis.{GetColumnByOrdinal, UnresolvedExtractValue}
 import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, AgnosticEncoders, Codec, JavaSerializationCodec, KryoSerializationCodec}
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BoxedLeafEncoder, DateEncoder, DayTimeIntervalEncoder, InstantEncoder, IterableEncoder, JavaBeanEncoder, JavaBigIntEncoder, JavaDecimalEncoder, JavaEnumEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, OptionEncoder, PrimitiveBooleanEncoder, PrimitiveByteEncoder, PrimitiveDoubleEncoder, PrimitiveFloatEncoder, PrimitiveIntEncoder, PrimitiveLongEncoder, PrimitiveShortEncoder, ProductEncoder, ScalaBigIntEncoder, ScalaDecimalEncoder, ScalaEnumEncoder, StringEncoder, TimestampEncoder, TransformingEncoder, UDTEncoder, YearMonthIntervalEncoder}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BoxedLeafEncoder, CharEncoder, DateEncoder, DayTimeIntervalEncoder, InstantEncoder, IterableEncoder, JavaBeanEncoder, JavaBigIntEncoder, JavaDecimalEncoder, JavaEnumEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, OptionEncoder, PrimitiveBooleanEncoder, PrimitiveByteEncoder, PrimitiveDoubleEncoder, PrimitiveFloatEncoder, PrimitiveIntEncoder, PrimitiveLongEncoder, PrimitiveShortEncoder, ProductEncoder, ScalaBigIntEncoder, ScalaDecimalEncoder, ScalaEnumEncoder, StringEncoder, TimestampEncoder, TransformingEncoder, UDTEncoder, VarcharEncoder, YearMonthIntervalEncoder}
 import org.apache.spark.sql.catalyst.encoders.EncoderUtils.{externalDataTypeFor, isNativeEncoder}
 import org.apache.spark.sql.catalyst.expressions.{Expression, GetStructField, IsNull, Literal, MapKeys, MapValues, UpCast}
 import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, CreateExternalRow, DecodeUsingSerializer, InitializeJavaBean, Invoke, NewInstance, StaticInvoke, UnresolvedCatalystToExternalMap, UnresolvedMapObjects, WrapOption}
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, IntervalUtils}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CharVarcharCodegenUtils, DateTimeUtils, IntervalUtils}
 import org.apache.spark.sql.types._
 
 object DeserializerBuildHelper {
@@ -80,6 +80,32 @@ object DeserializerBuildHelper {
       returnNullable = false)
   }
 
+  def createDeserializerForChar(
+      path: Expression,
+      returnNullable: Boolean,
+      length: Int): Expression = {
+    val expr = StaticInvoke(
+      classOf[CharVarcharCodegenUtils],
+      StringType,
+      "charTypeWriteSideCheck",
+      path :: Literal(length) :: Nil,
+      returnNullable = returnNullable)
+    createDeserializerForString(expr, returnNullable)
+  }
+
+  def createDeserializerForVarchar(
+      path: Expression,
+      returnNullable: Boolean,
+      length: Int): Expression = {
+    val expr = StaticInvoke(
+      classOf[CharVarcharCodegenUtils],
+      StringType,
+      "varcharTypeWriteSideCheck",
+      path :: Literal(length) :: Nil,
+      returnNullable = returnNullable)
+    createDeserializerForString(expr, returnNullable)
+  }
+
   def createDeserializerForString(path: Expression, returnNullable: Boolean): Expression = {
     Invoke(path, "toString", ObjectType(classOf[java.lang.String]),
       returnNullable = returnNullable)
@@ -258,6 +284,10 @@ object DeserializerBuildHelper {
         "withName",
         createDeserializerForString(path, returnNullable = false) :: Nil,
         returnNullable = false)
+    case CharEncoder(length) =>
+      createDeserializerForChar(path, returnNullable = false, length)
+    case VarcharEncoder(length) =>
+      createDeserializerForVarchar(path, returnNullable = false, length)
     case StringEncoder =>
       createDeserializerForString(path, returnNullable = false)
     case _: ScalaDecimalEncoder =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/QueryPlanningTracker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/QueryPlanningTracker.scala
index 2e14c09bc8193..d1007404158f0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/QueryPlanningTracker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/QueryPlanningTracker.scala
@@ -94,6 +94,16 @@ object QueryPlanningTracker {
  * Callbacks after planning phase completion.
  */
 abstract class QueryPlanningTrackerCallback {
+  /**
+   * Called when query fails analysis
+   *
+   * @param tracker      tracker that triggered the callback.
+   * @param parsedPlan   The plan prior to analysis
+   *                     see @org.apache.spark.sql.catalyst.analysis.Analyzer
+   */
+  def analysisFailed(tracker: QueryPlanningTracker, parsedPlan: LogicalPlan): Unit = {
+    // Noop by default for backward compatibility
+  }
   /**
    * Called when query has been analyzed.
    *
@@ -147,6 +157,17 @@ class QueryPlanningTracker(
     ret
   }
 
+  /**
+   * Set when the query has been parsed but failed to be analyzed.
+   * Can be called multiple times upon plan change.
+   *
+   * @param parsedPlan The plan prior analysis
+   *                   see @org.apache.spark.sql.catalyst.analysis.Analyzer
+   */
+  private[sql] def setAnalysisFailed(parsedPlan: LogicalPlan): Unit = {
+    trackerCallback.foreach(_.analysisFailed(this, parsedPlan))
+  }
+
   /**
    * Set when the query has been analysed.
    * Can be called multiple times upon plan change.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
index daebe15c298f6..089d463ecacbb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
@@ -22,11 +22,11 @@ import scala.language.existentials
 import org.apache.spark.sql.catalyst.{expressions => exprs}
 import org.apache.spark.sql.catalyst.DeserializerBuildHelper.expressionWithNullSafety
 import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, AgnosticEncoders, Codec, JavaSerializationCodec, KryoSerializationCodec}
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLeafEncoder, BoxedLongEncoder, BoxedShortEncoder, DateEncoder, DayTimeIntervalEncoder, InstantEncoder, IterableEncoder, JavaBeanEncoder, JavaBigIntEncoder, JavaDecimalEncoder, JavaEnumEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, OptionEncoder, PrimitiveLeafEncoder, ProductEncoder, ScalaBigIntEncoder, ScalaDecimalEncoder, ScalaEnumEncoder, StringEncoder, TimestampEncoder, TransformingEncoder, UDTEncoder, YearMonthIntervalEncoder}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLeafEncoder, BoxedLongEncoder, BoxedShortEncoder, CharEncoder, DateEncoder, DayTimeIntervalEncoder, InstantEncoder, IterableEncoder, JavaBeanEncoder, JavaBigIntEncoder, JavaDecimalEncoder, JavaEnumEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, OptionEncoder, PrimitiveLeafEncoder, ProductEncoder, ScalaBigIntEncoder, ScalaDecimalEncoder, ScalaEnumEncoder, StringEncoder, TimestampEncoder, TransformingEncoder, UDTEncoder, VarcharEncoder, YearMonthIntervalEncoder}
 import org.apache.spark.sql.catalyst.encoders.EncoderUtils.{externalDataTypeFor, isNativeEncoder, lenientExternalDataTypeFor}
 import org.apache.spark.sql.catalyst.expressions.{BoundReference, CheckOverflow, CreateNamedStruct, Expression, IsNull, KnownNotNull, Literal, UnsafeArrayData}
 import org.apache.spark.sql.catalyst.expressions.objects._
-import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils, GenericArrayData, IntervalUtils}
+import org.apache.spark.sql.catalyst.util.{ArrayData, CharVarcharCodegenUtils, DateTimeUtils, GenericArrayData, IntervalUtils}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -63,6 +63,24 @@ object SerializerBuildHelper {
     Invoke(inputObject, "doubleValue", DoubleType)
   }
 
+  def createSerializerForChar(inputObject: Expression, length: Int): Expression = {
+    StaticInvoke(
+      classOf[CharVarcharCodegenUtils],
+      CharType(length),
+      "charTypeWriteSideCheck",
+      createSerializerForString(inputObject) :: Literal(length) :: Nil,
+      returnNullable = false)
+  }
+
+  def createSerializerForVarchar(inputObject: Expression, length: Int): Expression = {
+    StaticInvoke(
+      classOf[CharVarcharCodegenUtils],
+      VarcharType(length),
+      "varcharTypeWriteSideCheck",
+      createSerializerForString(inputObject) :: Literal(length) :: Nil,
+      returnNullable = false)
+  }
+
   def createSerializerForString(inputObject: Expression): Expression = {
     StaticInvoke(
       classOf[UTF8String],
@@ -298,6 +316,8 @@ object SerializerBuildHelper {
     case BoxedDoubleEncoder => createSerializerForDouble(input)
     case JavaEnumEncoder(_) => createSerializerForJavaEnum(input)
     case ScalaEnumEncoder(_, _) => createSerializerForScalaEnum(input)
+    case CharEncoder(length) => createSerializerForChar(input, length)
+    case VarcharEncoder(length) => createSerializerForVarchar(input, length)
     case StringEncoder => createSerializerForString(input)
     case ScalaDecimalEncoder(dt) => createSerializerForBigDecimal(input, dt)
     case JavaDecimalEncoder(dt, false) => createSerializerForBigDecimal(input, dt)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index bed7bea61597f..92cfc4119dd0c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -28,6 +28,13 @@ import scala.util.{Failure, Random, Success, Try}
 import org.apache.spark.{SparkException, SparkThrowable, SparkUnsupportedOperationException}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst._
+import org.apache.spark.sql.catalyst.analysis.resolver.{
+  AnalyzerBridgeState,
+  HybridAnalyzer,
+  Resolver => OperatorResolver,
+  ResolverExtension,
+  ResolverGuard
+}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
 import org.apache.spark.sql.catalyst.expressions._
@@ -146,7 +153,26 @@ case class AnalysisContext(
     //    lookup a temporary function. And export to the view metadata.
     referredTempFunctionNames: mutable.Set[String] = mutable.Set.empty,
     referredTempVariableNames: Seq[Seq[String]] = Seq.empty,
-    outerPlan: Option[LogicalPlan] = None)
+    outerPlan: Option[LogicalPlan] = None,
+
+    /**
+     * This is a bridge state between this fixed-point [[Analyzer]] and a single-pass [[Resolver]].
+     * It's managed ([[setSinglePassResolverBridgeState]] method) by the [[HybridAnalyzer]] - the
+     * goal is to preserve it correctly between the fixed-point and single-pass runs.
+     * [[AnalysisContext.reset]] simply propagates it to prevent it from being reset in
+     * [[Analyzer.execute]]. Normally it's always [[None]], unless
+     * [[ANALYZER_DUAL_RUN_LEGACY_AND_SINGLE_PASS_RESOLVER]] is set to [[true]].
+     *
+     * See [[AnalyzerBridgeState]] and [[HybridAnalyzer]] for more info.
+     */
+    private var singlePassResolverBridgeState: Option[AnalyzerBridgeState] = None) {
+
+    def setSinglePassResolverBridgeState(bridgeState: Option[AnalyzerBridgeState]): Unit =
+      singlePassResolverBridgeState = bridgeState
+
+    def getSinglePassResolverBridgeState: Option[AnalyzerBridgeState] =
+      singlePassResolverBridgeState
+}
 
 object AnalysisContext {
   private val value = new ThreadLocal[AnalysisContext]() {
@@ -154,7 +180,16 @@ object AnalysisContext {
   }
 
   def get: AnalysisContext = value.get()
-  def reset(): Unit = value.remove()
+
+  def reset(): Unit = {
+    // We need to preserve the single-pass resolver bridge state here, since it's managed by the
+    // [[HybridAnalyzer]] (set or reset to `None`) to avoid it being reset in [[execute]].
+    // It acts as a bridge between the single-pass and fixed-point analyzers in the absence of any
+    // other explicit state.
+    val prevSinglePassResolverBridgeState = value.get.getSinglePassResolverBridgeState
+    value.remove()
+    value.get.setSinglePassResolverBridgeState(prevSinglePassResolverBridgeState)
+  }
 
   private def set(context: AnalysisContext): Unit = value.set(context)
 
@@ -219,9 +254,15 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
   def executeAndCheck(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
     if (plan.analyzed) return plan
     AnalysisHelper.markInAnalyzer {
-      val analyzed = executeAndTrack(plan, tracker)
-      checkAnalysis(analyzed)
-      analyzed
+      new HybridAnalyzer(
+        this,
+        new ResolverGuard(catalogManager),
+        new OperatorResolver(
+          catalogManager,
+          singlePassResolverExtensions,
+          singlePassMetadataResolverExtensions
+        )
+      ).apply(plan, tracker)
     }
   }
 
@@ -245,6 +286,20 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       errorOnExceed = true,
       maxIterationsSetting = SQLConf.ANALYZER_MAX_ITERATIONS.key)
 
+  /**
+   * Extensions for the single-pass analyzer.
+   *
+   * See [[ResolverExtension]] for more info.
+   */
+  val singlePassResolverExtensions: Seq[ResolverExtension] = Nil
+
+  /**
+   * Extensions used for early resolution of the single-pass analyzer.
+   *
+   * See [[ResolverExtension]] for more info.
+   */
+  val singlePassMetadataResolverExtensions: Seq[ResolverExtension] = Nil
+
   /**
    * Override to provide additional rules for the "Resolution" batch.
    */
@@ -279,7 +334,8 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       CTESubstitution,
       WindowsSubstitution,
       EliminateUnions,
-      SubstituteUnresolvedOrdinals),
+      SubstituteUnresolvedOrdinals,
+      EliminateLazyExpression),
     Batch("Disable Hints", Once,
       new ResolveHints.DisableHints),
     Batch("Hints", fixedPoint,
@@ -298,6 +354,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       ResolveFieldNameAndPosition ::
       AddMetadataColumns ::
       DeduplicateRelations ::
+      ResolveCollationName ::
       new ResolveReferences(catalogManager) ::
       // Please do not insert any other rules in between. See the TODO comments in rule
       // ResolveLateralColumnAliasReference for more details.
@@ -316,9 +373,12 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       ResolveProcedures ::
       BindProcedures ::
       ResolveTableSpec ::
+      ValidateAndStripPipeExpressions ::
+      ResolveSQLFunctions ::
       ResolveAliases ::
       ResolveSubquery ::
       ResolveSubqueryColumnAliases ::
+      ResolveDefaultStringTypes ::
       ResolveWindowOrder ::
       ResolveWindowFrame ::
       ResolveNaturalAndUsingJoin ::
@@ -978,26 +1038,9 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     // If `AnalysisContext.catalogAndNamespace` is non-empty, analyzer will expand single-part names
     // with it, instead of current catalog and namespace.
     private def resolveViews(plan: LogicalPlan): LogicalPlan = plan match {
-      // The view's child should be a logical plan parsed from the `desc.viewText`, the variable
-      // `viewText` should be defined, or else we throw an error on the generation of the View
-      // operator.
-      case view @ View(desc, isTempView, child) if !child.resolved =>
-        // Resolve all the UnresolvedRelations and Views in the child.
-        val newChild = AnalysisContext.withAnalysisContext(desc) {
-          val nestedViewDepth = AnalysisContext.get.nestedViewDepth
-          val maxNestedViewDepth = AnalysisContext.get.maxNestedViewDepth
-          if (nestedViewDepth > maxNestedViewDepth) {
-            throw QueryCompilationErrors.viewDepthExceedsMaxResolutionDepthError(
-              desc.identifier, maxNestedViewDepth, view)
-          }
-          SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs, isTempView)) {
-            executeSameContext(child)
-          }
-        }
-        // Fail the analysis eagerly because outside AnalysisContext, the unresolved operators
-        // inside a view maybe resolved incorrectly.
-        checkAnalysis(newChild)
-        view.copy(child = newChild)
+      case view: View if !view.child.resolved =>
+        ViewResolution
+          .resolve(view, resolveChild = executeSameContext, checkAnalysis = checkAnalysis)
       case p @ SubqueryAlias(_, view: View) =>
         p.copy(child = resolveViews(view))
       case _ => plan
@@ -1015,7 +1058,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       case i @ InsertIntoStatement(table, _, _, _, _, _, _) =>
         val relation = table match {
           case u: UnresolvedRelation if !u.isStreaming =>
-            relationResolution.resolveRelation(u).getOrElse(u)
+            resolveRelation(u).getOrElse(u)
           case other => other
         }
 
@@ -1032,7 +1075,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       case write: V2WriteCommand =>
         write.table match {
           case u: UnresolvedRelation if !u.isStreaming =>
-            relationResolution.resolveRelation(u).map(unwrapRelationPlan).map {
+            resolveRelation(u).map(unwrapRelationPlan).map {
               case v: View => throw QueryCompilationErrors.writeIntoViewNotAllowedError(
                 v.desc.identifier, write)
               case r: DataSourceV2Relation => write.withNewTable(r)
@@ -1047,12 +1090,12 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         }
 
       case u: UnresolvedRelation =>
-        relationResolution.resolveRelation(u).map(resolveViews).getOrElse(u)
+        resolveRelation(u).map(resolveViews).getOrElse(u)
 
       case r @ RelationTimeTravel(u: UnresolvedRelation, timestamp, version)
           if timestamp.forall(ts => ts.resolved && !SubqueryExpression.hasSubquery(ts)) =>
         val timeTravelSpec = TimeTravelSpec.create(timestamp, version, conf.sessionLocalTimeZone)
-        relationResolution.resolveRelation(u, timeTravelSpec).getOrElse(r)
+        resolveRelation(u, timeTravelSpec).getOrElse(r)
 
       case u @ UnresolvedTable(identifier, cmd, suggestAlternative) =>
         lookupTableOrView(identifier).map {
@@ -1116,6 +1159,25 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         }
       }
     }
+
+    def resolveRelation(
+        unresolvedRelation: UnresolvedRelation,
+        timeTravelSpec: Option[TimeTravelSpec] = None): Option[LogicalPlan] = {
+      relationResolution
+        .resolveRelation(
+          unresolvedRelation,
+          timeTravelSpec
+        )
+        .map { relation =>
+          // We put the synchronously resolved relation into the [[AnalyzerBridgeState]] for
+          // it to be later reused by the single-pass [[Resolver]] to avoid resolving the relation
+          // metadata twice.
+          AnalysisContext.get.getSinglePassResolverBridgeState.map { bridgeState =>
+            bridgeState.relationsWithResolvedMetadata.put(unresolvedRelation, relation)
+          }
+          relation
+        }
+    }
   }
 
   /** Handle INSERT INTO for DSv2 */
@@ -1608,9 +1670,6 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       case s: Sort if !s.resolved || s.missingInput.nonEmpty =>
         resolveReferencesInSort(s)
 
-      case u: UnresolvedWithCTERelations =>
-        UnresolvedWithCTERelations(this.apply(u.unresolvedPlan), u.cteRelations)
-
       case q: LogicalPlan =>
         logTrace(s"Attempting to resolve ${q.simpleString(conf.maxToStringFields)}")
         q.mapExpressions(resolveExpressionByPlanChildren(_, q, includeLastResort = true))
@@ -1830,10 +1889,14 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
 
       // Replace the index with the corresponding expression in aggregateExpressions. The index is
       // a 1-base position of aggregateExpressions, which is output columns (select expression)
-      case Aggregate(groups, aggs, child, hint) if aggs.forall(_.resolved) &&
+      case Aggregate(groups, aggs, child, hint)
+        if aggs
+          .filter(!containUnresolvedPipeAggregateOrdinal(_))
+          .forall(_.resolved) &&
         groups.exists(containUnresolvedOrdinal) =>
-        val newGroups = groups.map(resolveGroupByExpressionOrdinal(_, aggs))
-        Aggregate(newGroups, aggs, child, hint)
+        val newAggs = aggs.map(resolvePipeAggregateExpressionOrdinal(_, child.output))
+        val newGroups = groups.map(resolveGroupByExpressionOrdinal(_, newAggs))
+        Aggregate(newGroups, newAggs, child, hint)
     }
 
     private def containUnresolvedOrdinal(e: Expression): Boolean = e match {
@@ -1842,6 +1905,11 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       case _ => false
     }
 
+    private def containUnresolvedPipeAggregateOrdinal(e: Expression): Boolean = e match {
+      case UnresolvedAlias(_: UnresolvedPipeAggregateOrdinal, _) => true
+      case _ => false
+    }
+
     private def resolveGroupByExpressionOrdinal(
         expr: Expression,
         aggs: Seq[Expression]): Expression = expr match {
@@ -1877,6 +1945,17 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     }
   }
 
+  private def resolvePipeAggregateExpressionOrdinal(
+      expr: NamedExpression,
+      inputs: Seq[Attribute]): NamedExpression = expr match {
+    case UnresolvedAlias(UnresolvedPipeAggregateOrdinal(index), _) =>
+      // In this case, the user applied the SQL pipe aggregate operator ("|> AGGREGATE") and used
+      // ordinals in its GROUP BY clause. This expression then refers to the i-th attribute of the
+      // child operator (one-based). Here we resolve the ordinal to the corresponding attribute.
+      inputs(index - 1)
+    case other =>
+      other
+  }
 
   /**
    * Checks whether a function identifier referenced by an [[UnresolvedFunction]] is defined in the
@@ -2190,23 +2269,12 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
      * can resolve outer references.
      *
      * Outer references of the subquery are updated as children of Subquery expression.
-     *
-     * If hasExplicitOuterRefs is true, the subquery should have an explicit outer reference,
-     * instead of common `UnresolvedAttribute`s. In this case, tries to resolve inner and outer
-     * references separately.
      */
     private def resolveSubQuery(
         e: SubqueryExpression,
-        outer: LogicalPlan,
-        hasExplicitOuterRefs: Boolean = false)(
+        outer: LogicalPlan)(
         f: (LogicalPlan, Seq[Expression]) => SubqueryExpression): SubqueryExpression = {
-      val newSubqueryPlan = if (hasExplicitOuterRefs) {
-        executeSameContext(e.plan).transformAllExpressionsWithPruning(
-          _.containsPattern(UNRESOLVED_OUTER_REFERENCE)) {
-          case u: UnresolvedOuterReference =>
-            resolveOuterReference(u.nameParts, outer).getOrElse(u)
-        }
-      } else AnalysisContext.withOuterPlan(outer) {
+      val newSubqueryPlan = AnalysisContext.withOuterPlan(outer) {
         executeSameContext(e.plan)
       }
 
@@ -2231,11 +2299,10 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
      */
     private def resolveSubQueries(plan: LogicalPlan, outer: LogicalPlan): LogicalPlan = {
       plan.transformAllExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION), ruleId) {
-        case s @ ScalarSubquery(sub, _, exprId, _, _, _, _, hasExplicitOuterRefs)
-            if !sub.resolved =>
-          resolveSubQuery(s, outer, hasExplicitOuterRefs)(ScalarSubquery(_, _, exprId))
-        case e @ Exists(sub, _, exprId, _, _, hasExplicitOuterRefs) if !sub.resolved =>
-          resolveSubQuery(e, outer, hasExplicitOuterRefs)(Exists(_, _, exprId))
+        case s @ ScalarSubquery(sub, _, exprId, _, _, _, _) if !sub.resolved =>
+          resolveSubQuery(s, outer)(ScalarSubquery(_, _, exprId))
+        case e @ Exists(sub, _, exprId, _, _) if !sub.resolved =>
+          resolveSubQuery(e, outer)(Exists(_, _, exprId))
         case InSubquery(values, l @ ListQuery(_, _, exprId, _, _, _))
             if values.forall(_.resolved) && !l.resolved =>
           val expr = resolveSubQuery(l, outer)((plan, exprs) => {
@@ -2298,6 +2365,277 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     }
   }
 
+  /**
+   * This rule resolves SQL function expressions. It pulls out function inputs and place them
+   * in a separate [[Project]] node below the operator and replace the SQL function with its
+   * actual function body. SQL function expressions in [[Aggregate]] are handled in a special
+   * way. Non-aggregated SQL functions in the aggregate expressions of an Aggregate need to be
+   * pulled out into a Project above the Aggregate before replacing the SQL function expressions
+   * with actual function bodies. For example:
+   *
+   * Before:
+   *   Aggregate [c1] [foo(c1), foo(max(c2)), sum(foo(c2)) AS sum]
+   *   +- Relation [c1, c2]
+   *
+   * After:
+   *   Project [foo(c1), foo(max_c2), sum]
+   *   +- Aggregate [c1] [c1, max(c2) AS max_c2, sum(foo(c2)) AS sum]
+   *      +- Relation [c1, c2]
+   */
+  object ResolveSQLFunctions extends Rule[LogicalPlan] {
+
+    private def hasSQLFunctionExpression(exprs: Seq[Expression]): Boolean = {
+      exprs.exists(_.find(_.isInstanceOf[SQLFunctionExpression]).nonEmpty)
+    }
+
+    /**
+     * Check if the function input contains aggregate expressions.
+     */
+    private def checkFunctionInput(f: SQLFunctionExpression): Unit = {
+      if (f.inputs.exists(AggregateExpression.containsAggregate)) {
+        // The input of a SQL function should not contain aggregate functions after
+        // `extractAndRewrite`. If there are aggregate functions, it means they are
+        // nested in another aggregate function, which is not allowed.
+        // For example: SELECT sum(foo(sum(c1))) FROM t
+        // We have to throw the error here because otherwise the query plan after
+        // resolving the SQL function will not be valid.
+        throw new AnalysisException(
+          errorClass = "NESTED_AGGREGATE_FUNCTION",
+          messageParameters = Map.empty)
+      }
+    }
+
+    /**
+     * Resolve a SQL function expression as a logical plan check if it can be analyzed.
+     */
+    private def resolve(f: SQLFunctionExpression): LogicalPlan = {
+      // Validate the SQL function input.
+      checkFunctionInput(f)
+      val plan = v1SessionCatalog.makeSQLFunctionPlan(f.name, f.function, f.inputs)
+      val resolved = SQLFunctionContext.withSQLFunction {
+        // Resolve the SQL function plan using its context.
+        val conf = new SQLConf()
+        f.function.getSQLConfigs.foreach { case (k, v) => conf.settings.put(k, v) }
+        SQLConf.withExistingConf(conf) {
+          executeSameContext(plan)
+        }
+      }
+      // Fail the analysis eagerly if a SQL function cannot be resolved using its input.
+      SimpleAnalyzer.checkAnalysis(resolved)
+      resolved
+    }
+
+    /**
+     * Rewrite SQL function expressions into actual resolved function bodies and extract
+     * function inputs into the given project list.
+     */
+    private def rewriteSQLFunctions[E <: Expression](
+        expression: E,
+        projectList: ArrayBuffer[NamedExpression]): E = {
+      val newExpr = expression match {
+        case f: SQLFunctionExpression if !hasSQLFunctionExpression(f.inputs) &&
+          // Make sure LateralColumnAliasReference in parameters is resolved and eliminated first.
+          // Otherwise, the projectList can contain the LateralColumnAliasReference, which will be
+          // pushed down to a Project without the 'referenced' alias by LCA present, leaving it
+          // unresolved.
+          !f.inputs.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
+          withPosition(f) {
+            val plan = resolve(f)
+            // Extract the function input project list from the SQL function plan and
+            // inline the SQL function expression.
+            plan match {
+              case Project(body :: Nil, Project(aliases, _: LocalRelation)) =>
+                projectList ++= aliases
+                SQLScalarFunction(f.function, aliases.map(_.toAttribute), body)
+              case o =>
+                throw new AnalysisException(
+                  errorClass = "INVALID_SQL_FUNCTION_PLAN_STRUCTURE",
+                  messageParameters = Map("plan" -> o.toString))
+            }
+          }
+        case o => o.mapChildren(rewriteSQLFunctions(_, projectList))
+      }
+      newExpr.asInstanceOf[E]
+    }
+
+    /**
+     * Check if the given expression contains expressions that should be extracted,
+     * i.e. non-aggregated SQL functions with non-foldable inputs.
+     */
+    private def shouldExtract(e: Expression): Boolean = e match {
+      // Return false if the expression is already an aggregate expression.
+      case _: AggregateExpression => false
+      case _: SQLFunctionExpression => true
+      case _: LeafExpression => false
+      case o => o.children.exists(shouldExtract)
+    }
+
+    /**
+     * Extract aggregate expressions from the given expression and replace
+     * them with attribute references.
+     * Example:
+     *   Before: foo(c1) + foo(max(c2)) + max(foo(c2))
+     *   After: foo(c1) + foo(max_c2) + max_foo_c2
+     *   Extracted expressions: [c1, max(c2) AS max_c2, max(foo(c2)) AS max_foo_c2]
+     */
+    private def extractAndRewrite[T <: Expression](
+        expression: T,
+        extractedExprs: ArrayBuffer[NamedExpression]): T = {
+      val newExpr = expression match {
+        case e if !shouldExtract(e) =>
+          val exprToAdd: NamedExpression = e match {
+            case o: OuterReference => Alias(o, toPrettySQL(o.e))()
+            case ne: NamedExpression => ne
+            case o => Alias(o, toPrettySQL(o))()
+          }
+          extractedExprs += exprToAdd
+          exprToAdd.toAttribute
+        case f: SQLFunctionExpression =>
+          val newInputs = f.inputs.map(extractAndRewrite(_, extractedExprs))
+          f.copy(inputs = newInputs)
+        case o => o.mapChildren(extractAndRewrite(_, extractedExprs))
+      }
+      newExpr.asInstanceOf[T]
+    }
+
+    /**
+     * Replace all [[SQLFunctionExpression]]s in an expression with attribute references
+     * from the aliasMap.
+     */
+    private def replaceSQLFunctionWithAttr[T <: Expression](
+        expr: T,
+        aliasMap: mutable.HashMap[Expression, Alias]): T = {
+      expr.transform {
+        case f: SQLFunctionExpression if aliasMap.contains(f.canonicalized) =>
+          aliasMap(f.canonicalized).toAttribute
+      }.asInstanceOf[T]
+    }
+
+    private def rewrite(plan: LogicalPlan): LogicalPlan = plan match {
+      // Return if a sub-tree does not contain SQLFunctionExpression.
+      case p: LogicalPlan if !p.containsPattern(SQL_FUNCTION_EXPRESSION) => p
+
+      case f @ Filter(cond, a: Aggregate)
+        if !f.resolved || AggregateExpression.containsAggregate(cond) ||
+          ResolveGroupingAnalytics.hasGroupingFunction(cond) ||
+          cond.containsPattern(TEMP_RESOLVED_COLUMN) =>
+        // If the filter's condition contains aggregate expressions or grouping expressions or temp
+        // resolved column, we cannot rewrite both the filter and the aggregate until they are
+        // resolved by ResolveAggregateFunctions or ResolveGroupingAnalytics, because rewriting SQL
+        // functions in aggregate can add an additional project on top of the aggregate
+        // which breaks the pattern matching in those rules.
+        f.copy(child = a.copy(child = rewrite(a.child)))
+
+      case h @ UnresolvedHaving(_, a: Aggregate) =>
+        // Similarly UnresolvedHaving should be resolved by ResolveAggregateFunctions first
+        // before rewriting aggregate.
+        h.copy(child = a.copy(child = rewrite(a.child)))
+
+      case a: Aggregate if a.resolved && hasSQLFunctionExpression(a.expressions) =>
+        val child = rewrite(a.child)
+        // Extract SQL functions in the grouping expressions and place them in a project list
+        // below the current aggregate. Also update their appearances in the aggregate expressions.
+        val bottomProjectList = ArrayBuffer.empty[NamedExpression]
+        val aliasMap = mutable.HashMap.empty[Expression, Alias]
+        val newGrouping = a.groupingExpressions.map { expr =>
+          expr.transformDown {
+            case f: SQLFunctionExpression =>
+              val alias = aliasMap.getOrElseUpdate(f.canonicalized, Alias(f, f.name)())
+              bottomProjectList += alias
+              alias.toAttribute
+          }
+        }
+        val aggregateExpressions = a.aggregateExpressions.map(
+          replaceSQLFunctionWithAttr(_, aliasMap))
+
+        // Rewrite SQL functions in the aggregate expressions that are not wrapped in
+        // aggregate functions. They need to be extracted into a project list above the
+        // current aggregate.
+        val aggExprs = ArrayBuffer.empty[NamedExpression]
+        val topProjectList = aggregateExpressions.map(extractAndRewrite(_, aggExprs))
+
+        // Rewrite SQL functions in the new aggregate expressions that are wrapped inside
+        // aggregate functions.
+        val newAggExprs = aggExprs.map(rewriteSQLFunctions(_, bottomProjectList))
+
+        val bottomProject = if (bottomProjectList.nonEmpty) {
+          Project(child.output ++ bottomProjectList, child)
+        } else {
+          child
+        }
+        val newAgg = if (newGrouping.nonEmpty || newAggExprs.nonEmpty) {
+          a.copy(
+            groupingExpressions = newGrouping,
+            aggregateExpressions = newAggExprs.toSeq,
+            child = bottomProject)
+        } else {
+          bottomProject
+        }
+        if (topProjectList.nonEmpty) Project(topProjectList, newAgg) else newAgg
+
+      case p: Project if p.resolved && hasSQLFunctionExpression(p.expressions) =>
+        val newChild = rewrite(p.child)
+        val projectList = ArrayBuffer.empty[NamedExpression]
+        val newPList = p.projectList.map(rewriteSQLFunctions(_, projectList))
+        if (newPList != newChild.output) {
+          p.copy(newPList, Project(newChild.output ++ projectList, newChild))
+        } else {
+          assert(projectList.isEmpty)
+          p.copy(child = newChild)
+        }
+
+      case f: Filter if f.resolved && hasSQLFunctionExpression(f.expressions) =>
+        val newChild = rewrite(f.child)
+        val projectList = ArrayBuffer.empty[NamedExpression]
+        val newCond = rewriteSQLFunctions(f.condition, projectList)
+        if (newCond != f.condition) {
+          Project(f.output, Filter(newCond, Project(newChild.output ++ projectList, newChild)))
+        } else {
+          assert(projectList.isEmpty)
+          f.copy(child = newChild)
+        }
+
+      case j: Join if j.resolved && hasSQLFunctionExpression(j.expressions) =>
+        val newLeft = rewrite(j.left)
+        val newRight = rewrite(j.right)
+        val projectList = ArrayBuffer.empty[NamedExpression]
+        val joinCond = j.condition.map(rewriteSQLFunctions(_, projectList))
+        if (joinCond != j.condition) {
+          // Join condition cannot have non-deterministic expressions. We can safely
+          // replace the aliases with the original SQL function input expressions.
+          val aliasMap = projectList.collect { case a: Alias => a.toAttribute -> a.child }.toMap
+          val newJoinCond = joinCond.map(_.transform {
+            case a: Attribute => aliasMap.getOrElse(a, a)
+          })
+          j.copy(left = newLeft, right = newRight, condition = newJoinCond)
+        } else {
+          assert(projectList.isEmpty)
+          j.copy(left = newLeft, right = newRight)
+        }
+
+      case o: LogicalPlan if o.resolved && hasSQLFunctionExpression(o.expressions) =>
+        o.transformExpressionsWithPruning(_.containsPattern(SQL_FUNCTION_EXPRESSION)) {
+          case f: SQLFunctionExpression =>
+            f.failAnalysis(
+              errorClass = "UNSUPPORTED_SQL_UDF_USAGE",
+              messageParameters = Map(
+                "functionName" -> toSQLId(f.function.name.nameParts),
+                "nodeName" -> o.nodeName.toString))
+        }
+
+      case p: LogicalPlan => p.mapChildren(rewrite)
+    }
+
+    def apply(plan: LogicalPlan): LogicalPlan = {
+      // Only rewrite SQL functions when they are not in nested function calls.
+      if (SQLFunctionContext.get.nestedSQLFunctionDepth > 0) {
+        plan
+      } else {
+        rewrite(plan)
+      }
+    }
+  }
+
   /**
    * Turns projections that contain aggregate expressions into aggregations.
    */
@@ -2782,6 +3120,9 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
           ne
         case e: Expression if e.foldable =>
           e // No need to create an attribute reference if it will be evaluated as a Literal.
+        case e: SortOrder =>
+          // For SortOder just recursively extract the from child expression.
+          e.copy(child = extractExpr(e.child))
         case e: NamedArgumentExpression =>
           // For NamedArgumentExpression, we extract the value and replace it with
           // an AttributeReference (with an internal column name, e.g. "_w0").
@@ -3653,7 +3994,6 @@ object CleanupAliases extends Rule[LogicalPlan] with AliasHelper {
 
 /**
  * Ignore event time watermark in batch query, which is only supported in Structured Streaming.
- * TODO: add this rule into analyzer rule list.
  */
 object EliminateEventTimeWatermark extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiStringPromotionTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiStringPromotionTypeCoercion.scala
index 8345a4b9637e2..e7be95bc645ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiStringPromotionTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiStringPromotionTypeCoercion.scala
@@ -42,6 +42,7 @@ import org.apache.spark.sql.types.{
   IntegralType,
   LongType,
   NullType,
+  StringHelper,
   StringType,
   StringTypeExpression,
   TimestampType
@@ -99,7 +100,7 @@ object AnsiStringPromotionTypeCoercion {
       case (_: StringType, _: AnsiIntervalType) => None
       // [SPARK-50060] If a binary operation contains two collated string types with different
       // collation IDs, we can't decide which collation ID the result should have.
-      case (st1: StringType, st2: StringType) if st1.collationId != st2.collationId => None
+      case (st1: StringType, st2: StringType) => StringHelper.tightestCommonString(st1, st2)
       case (_: StringType, a: AtomicType) => Some(a)
       case (other, st: StringType) if !other.isInstanceOf[StringType] =>
         findWiderTypeForString(st, other)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index 3ba17c8b8e1a3..aa977b240007b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -102,6 +102,8 @@ object AnsiTypeCoercion extends TypeCoercionBase {
     case (NullType, t1) => Some(t1)
     case (t1, NullType) => Some(t1)
 
+    case(s1: StringType, s2: StringType) => StringHelper.tightestCommonString(s1, s2)
+
     case (t1: IntegralType, t2: DecimalType) if t2.isWiderThan(t1) =>
       Some(t2)
     case (t1: DecimalType, t2: IntegralType) if t1.isWiderThan(t2) =>
@@ -168,7 +170,12 @@ object AnsiTypeCoercion extends TypeCoercionBase {
 
       // If a function expects a StringType, no StringType instance should be implicitly cast to
       // StringType with a collation that's not accepted (aka. lockdown unsupported collations).
-      case (_: StringType, _: StringType) => None
+      case (s1: StringType, s2: StringType) =>
+        if (s1.collationId == s2.collationId && StringHelper.isMoreConstrained(s1, s2)) {
+          Some(s2)
+        } else {
+          None
+        }
       case (_: StringType, _: AbstractStringType) => None
 
       // If a function expects integral type, fractional input is not allowed.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
index ff0dbcd7ef153..50f149bb28064 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
@@ -123,7 +123,7 @@ object CTESubstitution extends Rule[LogicalPlan] {
       startOfQuery: Boolean = true): Unit = {
     val resolver = conf.resolver
     plan match {
-      case UnresolvedWith(child, relations) =>
+      case UnresolvedWith(child, relations, _) =>
         val newNames = ArrayBuffer.empty[String]
         newNames ++= outerCTERelationNames
         relations.foreach {
@@ -149,10 +149,15 @@ object CTESubstitution extends Rule[LogicalPlan] {
       plan: LogicalPlan,
       cteDefs: ArrayBuffer[CTERelationDef]): LogicalPlan = {
     plan.resolveOperatorsUp {
-      case UnresolvedWith(child, relations) =>
-        val resolvedCTERelations =
-          resolveCTERelations(relations, isLegacy = true, forceInline = false, Seq.empty, cteDefs)
-        substituteCTE(child, alwaysInline = true, resolvedCTERelations)
+      case cte @ UnresolvedWith(child, relations, allowRecursion) =>
+        if (allowRecursion) {
+          cte.failAnalysis(
+            errorClass = "RECURSIVE_CTE_IN_LEGACY_MODE",
+            messageParameters = Map.empty)
+        }
+        val resolvedCTERelations = resolveCTERelations(relations, isLegacy = true,
+          forceInline = false, Seq.empty, cteDefs, allowRecursion)
+        substituteCTE(child, alwaysInline = true, resolvedCTERelations, None)
     }
   }
 
@@ -202,14 +207,21 @@ object CTESubstitution extends Rule[LogicalPlan] {
     var firstSubstituted: Option[LogicalPlan] = None
     val newPlan = plan.resolveOperatorsDownWithPruning(
         _.containsAnyPattern(UNRESOLVED_WITH, PLAN_EXPRESSION)) {
-      case UnresolvedWith(child: LogicalPlan, relations) =>
+      // allowRecursion flag is set to `True` by the parser if the `RECURSIVE` keyword is used.
+      case cte @ UnresolvedWith(child: LogicalPlan, relations, allowRecursion) =>
+        if (allowRecursion && forceInline) {
+          cte.failAnalysis(
+            errorClass = "RECURSIVE_CTE_WHEN_INLINING_IS_FORCED",
+            messageParameters = Map.empty)
+        }
         val resolvedCTERelations =
-          resolveCTERelations(relations, isLegacy = false, forceInline, outerCTEDefs, cteDefs) ++
-            outerCTEDefs
+          resolveCTERelations(relations, isLegacy = false, forceInline, outerCTEDefs, cteDefs,
+            allowRecursion) ++ outerCTEDefs
         val substituted = substituteCTE(
           traverseAndSubstituteCTE(child, forceInline, resolvedCTERelations, cteDefs)._1,
           forceInline,
-          resolvedCTERelations)
+          resolvedCTERelations,
+          None)
         if (firstSubstituted.isEmpty) {
           firstSubstituted = Some(substituted)
         }
@@ -228,7 +240,8 @@ object CTESubstitution extends Rule[LogicalPlan] {
       isLegacy: Boolean,
       forceInline: Boolean,
       outerCTEDefs: Seq[(String, CTERelationDef)],
-      cteDefs: ArrayBuffer[CTERelationDef]): Seq[(String, CTERelationDef)] = {
+      cteDefs: ArrayBuffer[CTERelationDef],
+      allowRecursion: Boolean): Seq[(String, CTERelationDef)] = {
     val alwaysInline = isLegacy || forceInline
     var resolvedCTERelations = if (alwaysInline) {
       Seq.empty
@@ -247,30 +260,116 @@ object CTESubstitution extends Rule[LogicalPlan] {
         // NOTE: we must call `traverseAndSubstituteCTE` before `substituteCTE`, as the relations
         // in the inner CTE have higher priority over the relations in the outer CTE when resolving
         // inner CTE relations. For example:
-        // WITH t1 AS (SELECT 1)
-        // t2 AS (
-        //   WITH t1 AS (SELECT 2)
-        //   WITH t3 AS (SELECT * FROM t1)
-        // )
-        // t3 should resolve the t1 to `SELECT 2` instead of `SELECT 1`.
-        traverseAndSubstituteCTE(relation, forceInline, resolvedCTERelations, cteDefs)._1
+        // WITH
+        //   t1 AS (SELECT 1),
+        //   t2 AS (
+        //     WITH
+        //       t1 AS (SELECT 2),
+        //       t3 AS (SELECT * FROM t1)
+        //     SELECT * FROM t1
+        //   )
+        // SELECT * FROM t2
+        // t3 should resolve the t1 to `SELECT 2` ("inner" t1) instead of `SELECT 1`.
+        //
+        // When recursion allowed (RECURSIVE keyword used):
+        // Consider following example:
+        //  WITH
+        //    t1 AS (SELECT 1),
+        //    t2 AS (
+        //      WITH RECURSIVE
+        //        t1 AS (
+        //          SELECT 1 AS level
+        //          UNION (
+        //            WITH t3 AS (SELECT level + 1 FROM t1 WHERE level < 10)
+        //            SELECT * FROM t3
+        //          )
+        //        )
+        //      SELECT * FROM t1
+        //    )
+        //  SELECT * FROM t2
+        // t1 reference within t3 would initially resolve to outer `t1` (SELECT 1), as the inner t1
+        // is not yet known. Therefore, we need to remove definitions that conflict with current
+        // relation `name` from the list of `outerCTEDefs` entering `traverseAndSubstituteCTE()`.
+        // NOTE: It will be recognized later in the code that this is actually a self-reference
+        // (reference to the inner t1).
+        val nonConflictingCTERelations = if (allowRecursion) {
+          resolvedCTERelations.filterNot {
+            case (cteName, cteDef) => cteDef.conf.resolver(cteName, name)
+          }
+        } else {
+          resolvedCTERelations
+        }
+        traverseAndSubstituteCTE(relation, forceInline, nonConflictingCTERelations, cteDefs)._1
       }
-      // CTE definition can reference a previous one
-      val substituted = substituteCTE(innerCTEResolved, alwaysInline, resolvedCTERelations)
+
+      // If recursion is allowed (RECURSIVE keyword specified)
+      // then it has higher priority than outer or previous relations.
+      // Therefore, we construct a `CTERelationDef` for the current relation.
+      // Later if we encounter unresolved relation which we need to find which CTE Def it is
+      // referencing to, we first check if it is a reference to this one. If yes, then we set the
+      // reference as being recursive.
+      val recursiveCTERelation = if (allowRecursion) {
+        Some(name -> CTERelationDef(relation))
+      } else {
+        None
+      }
+      // CTE definition can reference a previous one or itself if recursion allowed.
+      val substituted = substituteCTE(innerCTEResolved, alwaysInline,
+        resolvedCTERelations, recursiveCTERelation)
       val cteRelation = CTERelationDef(substituted)
       if (!alwaysInline) {
         cteDefs += cteRelation
       }
+
       // Prepending new CTEs makes sure that those have higher priority over outer ones.
       resolvedCTERelations +:= (name -> cteRelation)
     }
     resolvedCTERelations
   }
 
+  /**
+   * This function is called from `substituteCTE` to actually substitute unresolved relations
+   * with CTE references.
+   */
+  private def resolveWithCTERelations(
+      table: String,
+      alwaysInline: Boolean,
+      cteRelations: Seq[(String, CTERelationDef)],
+      recursiveCTERelation: Option[(String, CTERelationDef)],
+      unresolvedRelation: UnresolvedRelation): LogicalPlan = {
+    if (recursiveCTERelation.isDefined && conf.resolver(recursiveCTERelation.get._1, table)) {
+      // self-reference is found
+      recursiveCTERelation.map {
+        case (_, d) =>
+          SubqueryAlias(table,
+            CTERelationRef(d.id, d.resolved, d.output, d.isStreaming, recursive = true))
+      }.get
+    } else {
+      cteRelations
+        .find(r => conf.resolver(r._1, table))
+        .map {
+          case (_, d) =>
+            if (alwaysInline) {
+              d.child
+            } else {
+              // Add a `SubqueryAlias` for hint-resolving rules to match relation names.
+              // This is a non-recursive reference, recursive parameter is by default set to false
+              SubqueryAlias(table,
+                CTERelationRef(d.id, d.resolved, d.output, d.isStreaming))
+            }
+        }
+        .getOrElse(unresolvedRelation)
+    }
+  }
+
+  /**
+   * Substitute unresolved relations in the plan with CTE references (CTERelationRef).
+   */
   private def substituteCTE(
       plan: LogicalPlan,
       alwaysInline: Boolean,
-      cteRelations: Seq[(String, CTERelationDef)]): LogicalPlan = {
+      cteRelations: Seq[(String, CTERelationDef)],
+      recursiveCTERelation: Option[(String, CTERelationDef)]): LogicalPlan = {
     plan.resolveOperatorsUpWithPruning(
         _.containsAnyPattern(RELATION_TIME_TRAVEL, UNRESOLVED_RELATION, PLAN_EXPRESSION,
           UNRESOLVED_IDENTIFIER)) {
@@ -279,28 +378,29 @@ object CTESubstitution extends Rule[LogicalPlan] {
         throw QueryCompilationErrors.timeTravelUnsupportedError(toSQLId(table))
 
       case u @ UnresolvedRelation(Seq(table), _, _) =>
-        cteRelations.find(r => plan.conf.resolver(r._1, table)).map { case (_, d) =>
-          if (alwaysInline) {
-            d.child
-          } else {
-            // Add a `SubqueryAlias` for hint-resolving rules to match relation names.
-            SubqueryAlias(table, CTERelationRef(d.id, d.resolved, d.output, d.isStreaming))
-          }
-        }.getOrElse(u)
+        resolveWithCTERelations(table, alwaysInline, cteRelations,
+          recursiveCTERelation, u)
 
       case p: PlanWithUnresolvedIdentifier =>
         // We must look up CTE relations first when resolving `UnresolvedRelation`s,
         // but we can't do it here as `PlanWithUnresolvedIdentifier` is a leaf node
-        // and may produce `UnresolvedRelation` later.
-        // Here we wrap it with `UnresolvedWithCTERelations` so that we can
-        // delay the CTE relations lookup after `PlanWithUnresolvedIdentifier` is resolved.
-        UnresolvedWithCTERelations(p, cteRelations)
+        // and may produce `UnresolvedRelation` later. Instead, we delay CTE resolution
+        // by moving it to the planBuilder of the corresponding `PlanWithUnresolvedIdentifier`.
+        p.copy(planBuilder = (nameParts, children) => {
+          p.planBuilder.apply(nameParts, children) match {
+            case u @ UnresolvedRelation(Seq(table), _, _) =>
+              resolveWithCTERelations(table, alwaysInline, cteRelations,
+                recursiveCTERelation, u)
+            case other => other
+          }
+        })
 
       case other =>
         // This cannot be done in ResolveSubquery because ResolveSubquery does not know the CTE.
         other.transformExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION)) {
           case e: SubqueryExpression =>
-            e.withNewPlan(apply(substituteCTE(e.plan, alwaysInline, cteRelations)))
+            e.withNewPlan(
+              apply(substituteCTE(e.plan, alwaysInline, cteRelations, None)))
         }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 586a0312e1507..0a68524c31241 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, Median, PercentileCont, PercentileDisc}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, ListAgg, Median, PercentileCont, PercentileDisc}
 import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, DecorrelateInnerQuery, InlineCTE}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -76,6 +76,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
     dt.existsRecursively(_.isInstanceOf[MapType])
   }
 
+  protected def hasVariantType(dt: DataType): Boolean = {
+    dt.existsRecursively(_.isInstanceOf[VariantType])
+  }
+
   protected def mapColumnInSetOperation(plan: LogicalPlan): Option[Attribute] = plan match {
     case _: Intersect | _: Except | _: Distinct =>
       plan.output.find(a => hasMapType(a.dataType))
@@ -84,6 +88,21 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
     case _ => None
   }
 
+  protected def variantColumnInSetOperation(plan: LogicalPlan): Option[Attribute] = plan match {
+    case _: Intersect | _: Except | _: Distinct =>
+      plan.output.find(a => hasVariantType(a.dataType))
+    case d: Deduplicate =>
+      d.keys.find(a => hasVariantType(a.dataType))
+    case _ => None
+  }
+
+  protected def variantExprInPartitionExpression(plan: LogicalPlan): Option[Expression] =
+    plan match {
+      case r: RepartitionByExpression =>
+        r.partitionExpressions.find(e => hasVariantType(e.dataType))
+      case _ => None
+    }
+
   private def checkLimitLikeClause(name: String, limitExpr: Expression): Unit = {
     limitExpr match {
       case e if !e.foldable => limitExpr.failAnalysis(
@@ -173,6 +192,15 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
     )
   }
 
+  private def containsUnsupportedLCA(e: Expression, operator: LogicalPlan): Boolean = {
+    e.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE) && operator.expressions.exists {
+      case a: Alias
+        if e.collect { case l: LateralColumnAliasReference => l.nameParts.head }.contains(a.name) =>
+        a.exists(_.isInstanceOf[Generator])
+      case _ => false
+    }
+  }
+
   /**
    * Checks for errors in a `SELECT` clause, such as a trailing comma or an empty select list.
    *
@@ -255,9 +283,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
     plan.foreachUp {
       case p if p.analyzed => // Skip already analyzed sub-plans
 
-      case leaf: LeafNode if leaf.output.map(_.dataType).exists(CharVarcharUtils.hasCharVarchar) =>
+      case leaf: LeafNode if !SQLConf.get.preserveCharVarcharTypeInfo &&
+        leaf.output.map(_.dataType).exists(CharVarcharUtils.hasCharVarchar) =>
         throw SparkException.internalError(
-          "Logical plan should not have output of char/varchar type: " + leaf)
+          s"Logical plan should not have output of char/varchar type when " +
+            s"${SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key} is false: " + leaf)
 
       case u: UnresolvedNamespace =>
         u.schemaNotFound(u.multipartIdentifier)
@@ -340,6 +370,14 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
           // surrounded with single quotes, or there is a typo in the attribute name.
           case GetMapValue(map, key: Attribute) if isMapWithStringKey(map) && !key.resolved =>
             failUnresolvedAttribute(operator, key, "UNRESOLVED_MAP_KEY")
+
+          case e: Expression if containsUnsupportedLCA(e, operator) =>
+            val lcaRefNames =
+              e.collect { case lcaRef: LateralColumnAliasReference => lcaRef.name }.distinct
+            failAnalysis(
+              errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GENERATOR",
+              messageParameters =
+                Map("lca" -> toSQLId(lcaRefNames), "generatorExpr" -> toSQLExpr(e)))
         }
 
         // Fail if we still have an unresolved all in group by. This needs to run before the
@@ -423,10 +461,23 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
                 "funcName" -> toSQLExpr(wf),
                 "windowExpr" -> toSQLExpr(w)))
 
+          case agg @ AggregateExpression(listAgg: ListAgg, _, _, _, _)
+            if agg.isDistinct && listAgg.needSaveOrderValue =>
+            throw QueryCompilationErrors.functionAndOrderExpressionMismatchError(
+              listAgg.prettyName, listAgg.child, listAgg.orderExpressions)
+
           case w: WindowExpression =>
             // Only allow window functions with an aggregate expression or an offset window
             // function or a Pandas window UDF.
             w.windowFunction match {
+              case agg @ AggregateExpression(fun: ListAgg, _, _, _, _)
+                // listagg(...) WITHIN GROUP (ORDER BY ...) OVER (ORDER BY ...) is unsupported
+                if fun.orderingFilled && (w.windowSpec.orderSpec.nonEmpty ||
+                  w.windowSpec.frameSpecification !=
+                  SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing)) =>
+                agg.failAnalysis(
+                  errorClass = "INVALID_WINDOW_SPEC_FOR_AGGREGATION_FUNC",
+                  messageParameters = Map("aggFunc" -> toSQLExpr(agg.aggregateFunction)))
               case agg @ AggregateExpression(
                 _: PercentileCont | _: PercentileDisc | _: Median, _, _, _, _)
                 if w.windowSpec.orderSpec.nonEmpty || w.windowSpec.frameSpecification !=
@@ -457,11 +508,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
               errorClass = "UNBOUND_SQL_PARAMETER",
               messageParameters = Map("name" -> p.name))
 
-          case l: LazyAnalysisExpression =>
-            l.failAnalysis(
-              errorClass = "UNANALYZABLE_EXPRESSION",
-              messageParameters = Map("expr" -> toSQLExpr(l)))
-
+          case ma @ MultiAlias(child, names) if child.resolved && !child.isInstanceOf[Generator] =>
+            ma.failAnalysis(
+              errorClass = "MULTI_ALIAS_WITHOUT_GENERATOR",
+              messageParameters = Map("expr" -> toSQLExpr(child), "names" -> names.mkString(", ")))
           case _ =>
         })
 
@@ -654,13 +704,13 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
             operator.children.tail.zipWithIndex.foreach { case (child, ti) =>
               // Check the number of columns
               if (child.output.length != ref.length) {
-                e.failAnalysis(
-                  errorClass = "NUM_COLUMNS_MISMATCH",
-                  messageParameters = Map(
-                    "operator" -> toSQLStmt(operator.nodeName),
-                    "firstNumColumns" -> ref.length.toString,
-                    "invalidOrdinalNum" -> ordinalNumber(ti + 1),
-                    "invalidNumColumns" -> child.output.length.toString))
+                throw QueryCompilationErrors.numColumnsMismatch(
+                  operator = operator.nodeName,
+                  firstNumColumns = ref.length,
+                  invalidOrdinalNum = ti + 1,
+                  invalidNumColumns = child.output.length,
+                  origin = operator.origin
+                )
               }
 
               val dataTypesAreCompatibleFn = getDataTypesAreCompatibleFn(operator)
@@ -668,15 +718,15 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
               dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, dt2), ci) =>
                 // SPARK-18058: we shall not care about the nullability of columns
                 if (!dataTypesAreCompatibleFn(dt1, dt2)) {
-                  e.failAnalysis(
-                    errorClass = "INCOMPATIBLE_COLUMN_TYPE",
-                    messageParameters = Map(
-                      "operator" -> toSQLStmt(operator.nodeName),
-                      "columnOrdinalNumber" -> ordinalNumber(ci),
-                      "tableOrdinalNumber" -> ordinalNumber(ti + 1),
-                      "dataType1" -> toSQLType(dt1),
-                      "dataType2" -> toSQLType(dt2),
-                      "hint" -> extraHintForAnsiTypeCoercionPlan(operator)))
+                  throw QueryCompilationErrors.incompatibleColumnTypeError(
+                    operator = operator.nodeName,
+                    columnOrdinalNumber = ci,
+                    tableOrdinalNumber = ti + 1,
+                    dataType1 = dt1,
+                    dataType2 = dt2,
+                    hint = extraHintForAnsiTypeCoercionPlan(operator),
+                    origin = operator.origin
+                  )
                 }
               }
             }
@@ -820,6 +870,23 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
                 "colName" -> toSQLId(mapCol.name),
                 "dataType" -> toSQLType(mapCol.dataType)))
 
+          // TODO: Remove this type check once we support Variant ordering
+          case o if variantColumnInSetOperation(o).isDefined =>
+            val variantCol = variantColumnInSetOperation(o).get
+            o.failAnalysis(
+              errorClass = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_VARIANT_TYPE",
+              messageParameters = Map(
+                "colName" -> toSQLId(variantCol.name),
+                "dataType" -> toSQLType(variantCol.dataType)))
+
+          case o if variantExprInPartitionExpression(o).isDefined =>
+            val variantExpr = variantExprInPartitionExpression(o).get
+            o.failAnalysis(
+              errorClass = "UNSUPPORTED_FEATURE.PARTITION_BY_VARIANT",
+              messageParameters = Map(
+                "expr" -> toSQLExpr(variantExpr),
+                "dataType" -> toSQLType(variantExpr.dataType)))
+
           case o if o.expressions.exists(!_.deterministic) &&
             !operatorAllowsNonDeterministicExpressions(o) &&
             !o.isInstanceOf[Project] &&
@@ -1039,6 +1106,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
     @scala.annotation.tailrec
     def cleanQueryInScalarSubquery(p: LogicalPlan): LogicalPlan = p match {
       case s: SubqueryAlias => cleanQueryInScalarSubquery(s.child)
+      // Skip SQL function node added by the Analyzer
+      case s: SQLFunctionNode => cleanQueryInScalarSubquery(s.child)
       case p: Project => cleanQueryInScalarSubquery(p.child)
       case h: ResolvedHint => cleanQueryInScalarSubquery(h.child)
       case child => child
@@ -1067,20 +1136,6 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
       case _ =>
     }
 
-    def checkUnresolvedOuterReference(p: LogicalPlan, expr: SubqueryExpression): Unit = {
-      expr.plan.foreachUp(_.expressions.foreach(_.foreachUp {
-        case o: UnresolvedOuterReference =>
-          val cols = p.inputSet.toSeq.map(attr => toSQLId(attr.name)).mkString(", ")
-          o.failAnalysis(
-            errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
-            messageParameters = Map("objectName" -> toSQLId(o.name), "proposal" -> cols))
-        case _ =>
-      }))
-    }
-
-    // Check if there is unresolved outer attribute in the subquery plan.
-    checkUnresolvedOuterReference(plan, expr)
-
     // Validate the subquery plan.
     checkAnalysis0(expr.plan)
 
@@ -1088,7 +1143,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
     checkOuterReference(plan, expr)
 
     expr match {
-      case ScalarSubquery(query, outerAttrs, _, _, _, _, _, _) =>
+      case ScalarSubquery(query, outerAttrs, _, _, _, _, _) =>
         // Scalar subquery must return one column as output.
         if (query.output.size != 1) {
           throw QueryCompilationErrors.subqueryReturnMoreThanOneColumn(query.output.size,
@@ -1545,15 +1600,23 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
         alter.conf.resolver)
     }
 
+    def checkNoCollationsInMapKeys(colsToAdd: Seq[QualifiedColType]): Unit = {
+      if (!alter.conf.allowCollationsInMapKeys) {
+        colsToAdd.foreach(col => SchemaUtils.checkNoCollationsInMapKeys(col.dataType))
+      }
+    }
+
     alter match {
       case AddColumns(table: ResolvedTable, colsToAdd) =>
         colsToAdd.foreach { colToAdd =>
           checkColumnNotExists("add", colToAdd.name, table.schema)
         }
         checkColumnNameDuplication(colsToAdd)
+        checkNoCollationsInMapKeys(colsToAdd)
 
       case ReplaceColumns(_: ResolvedTable, colsToAdd) =>
         checkColumnNameDuplication(colsToAdd)
+        checkNoCollationsInMapKeys(colsToAdd)
 
       case RenameColumn(table: ResolvedTable, col: ResolvedFieldName, newName) =>
         checkColumnNotExists("rename", col.path :+ newName, table.schema)
@@ -1592,9 +1655,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
             case (CharType(l1), CharType(l2)) => l1 == l2
             case (CharType(l1), VarcharType(l2)) => l1 <= l2
             case (VarcharType(l1), VarcharType(l2)) => l1 <= l2
-            case _ =>
-              Cast.canUpCast(from, to) ||
-                DataType.equalsIgnoreCompatibleCollation(field.dataType, newDataType)
+            case _ => Cast.canUpCast(from, to)
           }
           if (!canAlterColumnType(field.dataType, newDataType)) {
             alter.failAnalysis(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala
index 532e5e0d0a066..168eadbd65cd6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala
@@ -17,14 +17,13 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import scala.annotation.tailrec
-
 import org.apache.spark.sql.catalyst.analysis.CollationStrength.{Default, Explicit, Implicit}
-import org.apache.spark.sql.catalyst.analysis.TypeCoercion.{hasStringType, haveSameType}
+import org.apache.spark.sql.catalyst.analysis.TypeCoercion.haveSameType
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Project}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StringType}
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StringType, StructType}
 import org.apache.spark.sql.util.SchemaUtils
 
 /**
@@ -32,16 +31,13 @@ import org.apache.spark.sql.util.SchemaUtils
  */
 object CollationTypeCoercion {
 
-  private val COLLATION_CONTEXT_TAG = new TreeNodeTag[CollationContext]("collationContext")
+  private val COLLATION_CONTEXT_TAG = new TreeNodeTag[DataType]("collationContext")
 
   private def hasCollationContextTag(expr: Expression): Boolean = {
     expr.getTagValue(COLLATION_CONTEXT_TAG).isDefined
   }
 
   def apply(expression: Expression): Expression = expression match {
-    case cast: Cast if shouldRemoveCast(cast) =>
-      cast.child
-
     case ifExpr: If =>
       ifExpr.withNewChildren(
         ifExpr.predicate +: collateToSingleType(Seq(ifExpr.trueValue, ifExpr.falseValue))
@@ -53,10 +49,10 @@ object CollationTypeCoercion {
       outputStringType match {
         case Some(st) =>
           val newBranches = caseWhenExpr.branches.map { case (condition, value) =>
-            (condition, castStringType(value, st))
+            (condition, changeType(value, st))
           }
           val newElseValue =
-            caseWhenExpr.elseValue.map(e => castStringType(e, st))
+            caseWhenExpr.elseValue.map(e => changeType(e, st))
           CaseWhen(newBranches, newElseValue)
 
         case _ =>
@@ -93,13 +89,6 @@ object CollationTypeCoercion {
       val Seq(newStr, newPad) = collateToSingleType(Seq(str, pad))
       stringPadExpr.withNewChildren(Seq(newStr, len, newPad))
 
-    case raiseError: RaiseError =>
-      val newErrorParams = raiseError.errorParms.dataType match {
-        case MapType(StringType, StringType, _) => raiseError.errorParms
-        case _ => Cast(raiseError.errorParms, MapType(StringType, StringType))
-      }
-      raiseError.withNewChildren(Seq(raiseError.errorClass, newErrorParams))
-
     case framelessOffsetWindow @ (_: Lag | _: Lead) =>
       val Seq(input, offset, default) = framelessOffsetWindow.children
       val Seq(newInput, newDefault) = collateToSingleType(Seq(input, default))
@@ -112,11 +101,9 @@ object CollationTypeCoercion {
       val newValues = collateToSingleType(mapCreate.values)
       mapCreate.withNewChildren(newKeys.zip(newValues).flatMap(pair => Seq(pair._1, pair._2)))
 
-    case namedStruct: CreateNamedStruct if namedStruct.children.size % 2 == 0 =>
-      val newNames = collateToSingleType(namedStruct.nameExprs)
-      val newValues = collateToSingleType(namedStruct.valExprs)
-      val interleaved = newNames.zip(newValues).flatMap(pair => Seq(pair._1, pair._2))
-      namedStruct.withNewChildren(interleaved)
+    case namedStruct: CreateNamedStruct =>
+      // since each child is separate we should not coerce them at all
+      namedStruct
 
     case splitPart: SplitPart =>
       val Seq(str, delimiter, partNum) = splitPart.children
@@ -156,46 +143,123 @@ object CollationTypeCoercion {
   }
 
   /**
-   * If childType is collated and target is UTF8_BINARY, the collation of the output
-   * should be that of the childType.
+   * Returns true if the given data type has any StringType in it.
    */
-  private def shouldRemoveCast(cast: Cast): Boolean = {
-    val isUserDefined = cast.getTagValue(Cast.USER_SPECIFIED_CAST).isDefined
-    val isChildTypeCollatedString = cast.child.dataType match {
-      case st: StringType => !st.isUTF8BinaryCollation
-      case _ => false
-    }
-    val targetType = cast.dataType
+  private def hasStringType(dt: DataType): Boolean = dt.existsRecursively {
+    case _: StringType => true
+    case _ => false
+  }
 
-    isUserDefined && isChildTypeCollatedString && targetType == StringType
+  /**
+   * Changes the data type of the expression to the given `newType`.
+   */
+  private def changeType(expr: Expression, newType: DataType): Expression = {
+    mergeTypes(expr.dataType, newType) match {
+      case Some(newDataType) if newDataType != expr.dataType =>
+        assert(!newDataType.existsRecursively(_.isInstanceOf[StringTypeWithContext]))
+
+        expr match {
+          case lit: Literal => lit.copy(dataType = newDataType)
+          case cast: Cast => cast.copy(dataType = newDataType)
+          case subquery: SubqueryExpression =>
+            changeTypeInSubquery(subquery, newType)
+
+          case _ => Cast(expr, newDataType)
+        }
+
+      case _ =>
+        expr
+    }
   }
 
   /**
-   * Extracts StringTypes from filtered hasStringType
+   * Changes the data type of the expression in the subquery to the given `newType`.
+   * Currently only supports subqueries with [[Project]] and [[Aggregate]] plan.
    */
-  @tailrec
-  private def extractStringType(dt: DataType): Option[StringType] = dt match {
-    case st: StringType => Some(st)
-    case ArrayType(et, _) => extractStringType(et)
-    case _ => None
+  private def changeTypeInSubquery(
+      subqueryExpression: SubqueryExpression,
+      newType: DataType): SubqueryExpression = {
+
+    def transformNamedExpressions(ex: NamedExpression): NamedExpression = {
+      changeType(ex, newType) match {
+        case named: NamedExpression => named
+        case other => Alias(other, ex.name)()
+      }
+    }
+
+    val newPlan = subqueryExpression.plan match {
+      case project: Project =>
+        val newProjectList = project.projectList.map(transformNamedExpressions)
+        project.copy(projectList = newProjectList)
+
+      case agg: Aggregate =>
+        val newAggregateExpressions = agg.aggregateExpressions.map(transformNamedExpressions)
+        agg.copy(aggregateExpressions = newAggregateExpressions)
+
+      case other => other
+    }
+
+    subqueryExpression.withNewPlan(newPlan)
   }
 
   /**
-   * Casts given expression to collated StringType with id equal to collationId only
-   * if expression has StringType in the first place.
+   * If possible, returns the new data type from `inType` by applying
+   * the collation of `castType`.
    */
-  def castStringType(expr: Expression, st: StringType): Expression = {
-    castStringType(expr.dataType, st)
-      .map(dt => Cast(expr, dt))
-      .getOrElse(expr)
+  private def mergeTypes(inType: DataType, castType: DataType): Option[DataType] = {
+    val outType = mergeStructurally(inType, castType) {
+      case (_: StringType, right: StringTypeWithContext) =>
+        right.stringType
+    }
+
+    outType
   }
 
-  private def castStringType(inType: DataType, castType: StringType): Option[DataType] = {
-    inType match {
-      case st: StringType if st.collationId != castType.collationId =>
-        Some(castType)
-      case ArrayType(arrType, nullable) =>
-        castStringType(arrType, castType).map(ArrayType(_, nullable))
+  /**
+   * Merges two data types structurally according to the given base case.
+   */
+  private def mergeStructurally(
+      leftType: DataType,
+      rightType: DataType)
+      (baseCase: PartialFunction[(DataType, DataType), DataType]): Option[DataType] = {
+    (leftType, rightType) match {
+
+      // handle the base cases first
+      case _ if baseCase.isDefinedAt((leftType, rightType)) =>
+        Option(baseCase(leftType, rightType))
+
+      case _ if leftType == rightType =>
+        Some(leftType)
+
+      case (ArrayType(leftElemType, nullable), ArrayType(rightElemType, _)) =>
+        mergeStructurally(leftElemType, rightElemType)(baseCase).map(ArrayType(_, nullable))
+
+      case (MapType(leftKey, leftValue, nullable), MapType(rightKey, rightValue, _)) =>
+        for {
+          newKeyType <- mergeStructurally(leftKey, rightKey)(baseCase)
+          newValueType <- mergeStructurally(leftValue, rightValue)(baseCase)
+        } yield MapType(newKeyType, newValueType, nullable)
+
+      case (ArrayType(elementType, nullable), right) =>
+        mergeStructurally(elementType, right)(baseCase).map(ArrayType(_, nullable))
+
+      case (left, ArrayType(elementType, _)) =>
+        mergeStructurally(left, elementType)(baseCase)
+
+      case (StructType(leftFields), StructType(rightFields)) =>
+        if (leftFields.length != rightFields.length) {
+          return None
+        }
+        val newFields = leftFields.zip(rightFields).map {
+          case (leftField, rightField) =>
+            val newType = mergeStructurally(leftField.dataType, rightField.dataType)(baseCase)
+            if (newType.isEmpty) {
+              return None
+            }
+            leftField.copy(dataType = newType.get)
+        }
+        Some(StructType(newFields))
+
       case _ => None
     }
   }
@@ -208,7 +272,7 @@ object CollationTypeCoercion {
 
     lctOpt match {
       case Some(lct) =>
-        expressions.map(e => castStringType(e, lct))
+        expressions.map(e => changeType(e, lct))
       case _ =>
         expressions
     }
@@ -217,70 +281,83 @@ object CollationTypeCoercion {
   /**
    * Tries to find the least common StringType among the given expressions.
    */
-  private def findLeastCommonStringType(expressions: Seq[Expression]): Option[StringType] = {
+  private def findLeastCommonStringType(expressions: Seq[Expression]): Option[DataType] = {
     if (!expressions.exists(e => SchemaUtils.hasNonUTF8BinaryCollation(e.dataType))) {
+      // if there are no collated types we don't need to do anything
+      return None
+    } else if (ResolveDefaultStringTypes.needsResolution(expressions)) {
+      // if any of the strings types are still not resolved
+      // we need to wait for them to be resolved first
       return None
     }
 
     val collationContextWinner = expressions.foldLeft(findCollationContext(expressions.head)) {
       case (Some(left), right) =>
         findCollationContext(right).flatMap { ctx =>
-          collationPrecedenceWinner(left, ctx)
+          mergeWinner(left, ctx)
         }
-      case (None, _) => return None
-    }
-
-    collationContextWinner.flatMap { cc =>
-      extractStringType(cc.dataType)
+      case (None, _) => None
     }
+    collationContextWinner
   }
 
   /**
-   * Tries to find the collation context for the given expression.
+   * Tries to find the data type with the collation context for the given expression.
    * If found, it will also set the [[COLLATION_CONTEXT_TAG]] on the expression,
    * so that the context can be reused later.
    */
-  private def findCollationContext(expr: Expression): Option[CollationContext] = {
+  private def findCollationContext(expr: Expression): Option[DataType] = {
     val contextOpt = expr match {
-      case _ if hasCollationContextTag(expr) =>
-        Some(expr.getTagValue(COLLATION_CONTEXT_TAG).get)
-
-      // if `expr` doesn't have a string in its dataType then it doesn't
-      // have the collation context either
-      case _ if !expr.dataType.existsRecursively(_.isInstanceOf[StringType]) =>
-        None
 
-      case collate: Collate =>
-        Some(CollationContext(collate.dataType, Explicit))
+      case _ if collationStrengthBaseCases.isDefinedAt(expr) =>
+        collationStrengthBaseCases(expr)
 
-      case _: Alias | _: SubqueryExpression | _: AttributeReference | _: VariableReference =>
-        Some(CollationContext(expr.dataType, Implicit))
+      case getStruct: GetStructField =>
+        val childContext = findCollationContext(getStruct.child)
+        childContext match {
+          case Some(struct: StructType) =>
+            val field = struct.fields(getStruct.ordinal)
+            Some(field.dataType)
+          case _ => None
+        }
 
-      case _: Literal =>
-        Some(CollationContext(expr.dataType, Default))
+      case getMapValue: GetMapValue =>
+        findCollationContext(getMapValue.child) match {
+          case Some(MapType(_, valueType, _)) =>
+            mergeWinner(getMapValue.dataType, valueType)
+          case _ =>
+            None
+        }
 
-      // if it does have a string type but none of its children do
-      // then the collation context strength is default
-      case _ if !expr.children.exists(_.dataType.existsRecursively(_.isInstanceOf[StringType])) =>
-        Some(CollationContext(expr.dataType, Default))
+      case struct: CreateNamedStruct =>
+        val childrenContexts = struct.valExprs.map(findCollationContext)
+        if (childrenContexts.isEmpty) {
+          return None
+        }
+        val newFields = struct.dataType.fields.zip(childrenContexts).map {
+          case (field, Some(context)) =>
+            field.copy(dataType = context)
+          case (field, None) => field
+        }
+        Some(StructType(newFields))
 
-      case _ =>
-        val contextWinnerOpt = getContextRelevantChildren(expr)
-          .flatMap(findCollationContext)
-          .foldLeft(Option.empty[CollationContext]) {
-            case (Some(left), right) =>
-              collationPrecedenceWinner(left, right)
-            case (None, right) =>
-              Some(right)
-          }
+      case map: CreateMap =>
+        val keyContexts = map.keys.flatMap(findCollationContext)
+        val valueContexts = map.values.flatMap(findCollationContext)
+        if (keyContexts.length + valueContexts.length != map.children.length) {
+          return None
+        }
 
-        contextWinnerOpt.map { context =>
-          if (hasStringType(expr.dataType)) {
-            CollationContext(expr.dataType, context.strength)
-          } else {
-            context
-          }
+        val keyContextWinner = mergeWinners(map.dataType.keyType, keyContexts)
+        val valueContextWinner = mergeWinners(map.dataType.valueType, valueContexts)
+        if (keyContextWinner.isEmpty || valueContextWinner.isEmpty) {
+          return None
         }
+        Some(MapType(keyContextWinner.get, valueContextWinner.get))
+
+      case _ =>
+        val childContexts = expr.children.flatMap(findCollationContext)
+        mergeWinners(expr.dataType, childContexts)
     }
 
     contextOpt.foreach(expr.setTagValue(COLLATION_CONTEXT_TAG, _))
@@ -288,69 +365,100 @@ object CollationTypeCoercion {
   }
 
   /**
-   * Returns the children of the given expression that should be used for calculating the
-   * winning collation context.
+   * Base cases for determining the strength of the collation.
    */
-  private def getContextRelevantChildren(expression: Expression): Seq[Expression] = {
-    expression match {
-      // collation context for named struct should be calculated based on its values only
-      case createStruct: CreateNamedStruct =>
-        createStruct.valExprs
+  private def collationStrengthBaseCases: PartialFunction[Expression, Option[DataType]] = {
+    case expr if hasCollationContextTag(expr) =>
+      Some(expr.getTagValue(COLLATION_CONTEXT_TAG).get)
+
+    // if `expr` doesn't have a string in its dataType then it doesn't
+    // have the collation context either
+    case expr if !expr.dataType.existsRecursively(_.isInstanceOf[StringType]) =>
+      None
+
+    case collate: Collate =>
+      Some(addContextToStringType(collate.dataType, Explicit))
+
+    case cast: Cast =>
+      val castStrength = if (hasStringType(cast.child.dataType)) {
+        Implicit
+      } else {
+        Default
+      }
 
-      // collation context does not depend on the key for extracting the value
-      case extract: ExtractValue =>
-        Seq(extract.child)
+      Some(addContextToStringType(cast.dataType, castStrength))
 
-      // we currently don't support collation precedence for maps,
-      // as this would involve calculating them for keys and values separately
-      case _: CreateMap =>
-        Seq.empty
+    case expr @ (_: NamedExpression | _: SubqueryExpression | _: VariableReference) =>
+      Some(addContextToStringType(expr.dataType, Implicit))
 
-      case _ =>
-        expression.children
+    case lit: Literal =>
+      Some(addContextToStringType(lit.dataType, Default))
+
+    // if it does have a string type but none of its children do
+    // then the collation context strength is default
+    case expr if !expr.children.exists(_.dataType.existsRecursively(_.isInstanceOf[StringType])) =>
+      Some(addContextToStringType(expr.dataType, Default))
+  }
+
+  /**
+   * Adds collation context to the given string type so we can know its strength.
+   */
+  private def addContextToStringType(dt: DataType, strength: CollationStrength): DataType = {
+    dt.transformRecursively {
+      case st: StringType => StringTypeWithContext(st, strength)
     }
   }
 
   /**
-   * Returns the collation context that wins in precedence between left and right.
+   * Merges multiple data types structurally according to strength of the collations into the
+   * data type of the `start`.
+   *
+   * If any of the data types cannot be merged, it returns None.
    */
-  private def collationPrecedenceWinner(
-      left: CollationContext,
-      right: CollationContext): Option[CollationContext] = {
-
-    val (leftStringType, rightStringType) =
-      (extractStringType(left.dataType), extractStringType(right.dataType)) match {
-        case (Some(l), Some(r)) =>
-          (l, r)
-        case (None, None) =>
-          return None
-        case (Some(_), None) =>
-          return Some(left)
-        case (None, Some(_)) =>
-          return Some(right)
-      }
+  private def mergeWinners(start: DataType, rest: Seq[DataType]): Option[DataType] = {
+    rest.foldLeft(Option(start)) {
+      case (Some(acc), childContext) =>
+        mergeWinner(acc, childContext)
+      case (None, _) =>
+        None
+    }
+  }
 
-    (left.strength, right.strength) match {
-      case (Explicit, Explicit) if leftStringType != rightStringType =>
-        throw QueryCompilationErrors.explicitCollationMismatchError(
-          Seq(leftStringType, rightStringType))
+  /**
+   * Merges two data types structurally according to strength of the collations.
+   */
+  private def mergeWinner(left: DataType, right: DataType): Option[DataType] = {
+    mergeStructurally(left, right) {
+      case (left: StringTypeWithContext, right: StringTypeWithContext) =>
+        getWinningStringType(left, right)
 
-      case (Explicit, _) => Some(left)
-      case (_, Explicit) => Some(right)
+      case (_: StringType, right: StringTypeWithContext) =>
+        right
+    }
+  }
 
-      case (Implicit, Implicit) if leftStringType != rightStringType =>
+  /** Determines the winning StringTypeWithContext based on the strength of the collation. */
+  private def getWinningStringType(
+      left: StringTypeWithContext,
+      right: StringTypeWithContext): StringTypeWithContext = {
+    def handleMismatch(): Nothing = {
+      if (left.strength == Explicit) {
+        throw QueryCompilationErrors.explicitCollationMismatchError(
+          Seq(left.stringType, right.stringType))
+      } else {
         throw QueryCompilationErrors.implicitCollationMismatchError(
-          Seq(leftStringType, rightStringType))
-
-      case (Implicit, _) => Some(left)
-      case (_, Implicit) => Some(right)
+          Seq(left.stringType, right.stringType))
+      }
+    }
 
-      case (Default, Default) if leftStringType != rightStringType =>
-        throw QueryCompilationErrors.implicitCollationMismatchError(
-          Seq(leftStringType, rightStringType))
+    (left.strength.priority, right.strength.priority) match {
+      case (leftPriority, rightPriority) if leftPriority == rightPriority =>
+        if (left.sameType(right)) left
+        else handleMismatch()
 
-      case _ =>
-        Some(left)
+      case (leftPriority, rightPriority) =>
+        if (leftPriority < rightPriority) left
+        else right
     }
   }
 }
@@ -358,18 +466,32 @@ object CollationTypeCoercion {
 /**
  * Represents the strength of collation used for determining precedence in collation resolution.
  */
-private sealed trait CollationStrength {}
+private sealed trait CollationStrength {
+  val priority: Int
+}
 
   private object CollationStrength {
-  case object Explicit extends CollationStrength {}
-  case object Implicit extends CollationStrength {}
-  case object Default extends CollationStrength {}
+  case object Explicit extends CollationStrength {
+    override val priority: Int = 0
+  }
+  case object Implicit extends CollationStrength {
+    override val priority: Int = 1
+  }
+  case object Default extends CollationStrength {
+    override val priority: Int = 2
+  }
 }
 
 /**
  * Encapsulates the context for collation, including data type and strength.
  *
- * @param dataType The data type associated with this collation context.
+ * @param stringType StringType.
  * @param strength The strength level of the collation, which determines its precedence.
  */
-private case class CollationContext(dataType: DataType, strength: CollationStrength) {}
+private case class StringTypeWithContext(stringType: StringType, strength: CollationStrength)
+  extends DataType {
+
+  override def defaultSize: Int = stringType.defaultSize
+
+  override private[spark] def asNullable: DataType = this
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
index e869cb281ce05..56b2103c555db 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
@@ -53,9 +53,10 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
       (exprs, plan)
     } else {
       plan match {
-        // For `Distinct` and `SubqueryAlias`, we can't recursively resolve and add attributes
-        // via its children.
-        case u: UnaryNode if !u.isInstanceOf[Distinct] && !u.isInstanceOf[SubqueryAlias] =>
+        // For `Distinct` and `SubqueryAlias` and `PipeOperator`, we can't recursively resolve and
+        // add attributes via its children.
+        case u: UnaryNode if !u.isInstanceOf[Distinct] && !u.isInstanceOf[SubqueryAlias]
+          && !u.isInstanceOf[PipeOperator] =>
           val (newExprs, newChild) = {
             // Resolving expressions against current plan.
             val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, u))
@@ -221,35 +222,35 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
     val outerPlan = AnalysisContext.get.outerPlan
     if (outerPlan.isEmpty) return e
 
-    e.transformWithPruning(_.containsAnyPattern(UNRESOLVED_ATTRIBUTE, TEMP_RESOLVED_COLUMN)) {
+    def resolve(nameParts: Seq[String]): Option[Expression] = try {
+      outerPlan.get match {
+        // Subqueries in UnresolvedHaving can host grouping expressions and aggregate functions.
+        // We should resolve columns with `agg.output` and the rule `ResolveAggregateFunctions` will
+        // push them down to Aggregate later. This is similar to what we do in `resolveColumns`.
+        case u @ UnresolvedHaving(_, agg: Aggregate) =>
+          agg.resolveChildren(nameParts, conf.resolver)
+            .orElse(u.resolveChildren(nameParts, conf.resolver))
+            .map(wrapOuterReference)
+        case other =>
+          other.resolveChildren(nameParts, conf.resolver).map(wrapOuterReference)
+      }
+    } catch {
+      case ae: AnalysisException =>
+        logDebug(ae.getMessage)
+        None
+    }
+
+    e.transformWithPruning(
+      _.containsAnyPattern(UNRESOLVED_ATTRIBUTE, TEMP_RESOLVED_COLUMN)) {
       case u: UnresolvedAttribute =>
-        resolveOuterReference(u.nameParts, outerPlan.get).getOrElse(u)
+        resolve(u.nameParts).getOrElse(u)
       // Re-resolves `TempResolvedColumn` as outer references if it has tried to be resolved with
       // Aggregate but failed.
       case t: TempResolvedColumn if t.hasTried =>
-        resolveOuterReference(t.nameParts, outerPlan.get).getOrElse(t)
+        resolve(t.nameParts).getOrElse(t)
     }
   }
 
-  protected def resolveOuterReference(
-      nameParts: Seq[String], outerPlan: LogicalPlan): Option[Expression] = try {
-    outerPlan match {
-      // Subqueries in UnresolvedHaving can host grouping expressions and aggregate functions.
-      // We should resolve columns with `agg.output` and the rule `ResolveAggregateFunctions` will
-      // push them down to Aggregate later. This is similar to what we do in `resolveColumns`.
-      case u @ UnresolvedHaving(_, agg: Aggregate) =>
-        agg.resolveChildren(nameParts, conf.resolver)
-          .orElse(u.resolveChildren(nameParts, conf.resolver))
-          .map(wrapOuterReference)
-      case other =>
-        other.resolveChildren(nameParts, conf.resolver).map(wrapOuterReference)
-    }
-  } catch {
-    case ae: AnalysisException =>
-      logDebug(ae.getMessage)
-      None
-  }
-
   def lookupVariable(nameParts: Seq[String]): Option[VariableReference] = {
     // The temp variables live in `SYSTEM.SESSION`, and the name can be qualified or not.
     def maybeTempVariableName(nameParts: Seq[String]): Boolean = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
index c1535343d7686..8398fb8d1e830 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
@@ -132,6 +132,13 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
         _.output.map(_.exprId.id),
         newFlatMap => newFlatMap.copy(output = newFlatMap.output.map(_.newInstance())))
 
+    case f: FlatMapGroupsInArrow =>
+      deduplicateAndRenew[FlatMapGroupsInArrow](
+        existingRelations,
+        f,
+        _.output.map(_.exprId.id),
+        newFlatMap => newFlatMap.copy(output = newFlatMap.output.map(_.newInstance())))
+
     case f: FlatMapCoGroupsInPandas =>
       deduplicateAndRenew[FlatMapCoGroupsInPandas](
         existingRelations,
@@ -139,6 +146,13 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
         _.output.map(_.exprId.id),
         newFlatMap => newFlatMap.copy(output = newFlatMap.output.map(_.newInstance())))
 
+    case f: FlatMapCoGroupsInArrow =>
+      deduplicateAndRenew[FlatMapCoGroupsInArrow](
+        existingRelations,
+        f,
+        _.output.map(_.exprId.id),
+        newFlatMap => newFlatMap.copy(output = newFlatMap.output.map(_.newInstance())))
+
     case m: MapInPandas =>
       deduplicateAndRenew[MapInPandas](
         existingRelations,
@@ -378,12 +392,24 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
         newVersion.copyTagsFrom(oldVersion)
         Seq((oldVersion, newVersion))
 
+      case oldVersion @ FlatMapGroupsInArrow(_, _, output, _)
+        if oldVersion.outputSet.intersect(conflictingAttributes).nonEmpty =>
+        val newVersion = oldVersion.copy(output = output.map(_.newInstance()))
+        newVersion.copyTagsFrom(oldVersion)
+        Seq((oldVersion, newVersion))
+
       case oldVersion @ FlatMapCoGroupsInPandas(_, _, _, output, _, _)
         if oldVersion.outputSet.intersect(conflictingAttributes).nonEmpty =>
         val newVersion = oldVersion.copy(output = output.map(_.newInstance()))
         newVersion.copyTagsFrom(oldVersion)
         Seq((oldVersion, newVersion))
 
+      case oldVersion @ FlatMapCoGroupsInArrow(_, _, _, output, _, _)
+        if oldVersion.outputSet.intersect(conflictingAttributes).nonEmpty =>
+        val newVersion = oldVersion.copy(output = output.map(_.newInstance()))
+        newVersion.copyTagsFrom(oldVersion)
+        Seq((oldVersion, newVersion))
+
       case oldVersion @ MapInPandas(_, output, _, _, _)
         if oldVersion.outputSet.intersect(conflictingAttributes).nonEmpty =>
         val newVersion = oldVersion.copy(output = output.map(_.newInstance()))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/EliminateLazyExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/EliminateLazyExpression.scala
new file mode 100644
index 0000000000000..68f3f90e193b6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/EliminateLazyExpression.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.LAZY_EXPRESSION
+
+/**
+ * `LazyExpression` is a marker node to trigger lazy analysis in DataFrames. It's useless when
+ * entering the analyzer and this rule removes it.
+ */
+object EliminateLazyExpression extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.resolveExpressionsUpWithPruning(_.containsPattern(LAZY_EXPRESSION)) {
+      case l: LazyExpression => l.child
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 5103f8048856a..54f6820d2091f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -506,6 +506,8 @@ object FunctionRegistry {
     expression[CollectList]("collect_list"),
     expression[CollectList]("array_agg", true, Some("3.3.0")),
     expression[CollectSet]("collect_set"),
+    expression[ListAgg]("listagg"),
+    expression[ListAgg]("string_agg", setAlias = true),
     expressionBuilder("count_min_sketch", CountMinSketchAggExpressionBuilder),
     expression[BoolAnd]("every", true),
     expression[BoolAnd]("bool_and"),
@@ -882,6 +884,7 @@ object FunctionRegistry {
     // Avro
     expression[FromAvro]("from_avro"),
     expression[ToAvro]("to_avro"),
+    expression[SchemaOfAvro]("schema_of_avro"),
 
     // Protobuf
     expression[FromProtobuf]("from_protobuf"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala
index 5a27a72190325..800126e0030e8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala
@@ -128,18 +128,15 @@ class FunctionResolution(
       numArgs: Int,
       u: UnresolvedFunction): Expression = {
     func match {
-      case owg: SupportsOrderingWithinGroup if u.isDistinct =>
-        throw QueryCompilationErrors.distinctInverseDistributionFunctionUnsupportedError(
-          owg.prettyName
-        )
+      case owg: SupportsOrderingWithinGroup if !owg.isDistinctSupported && u.isDistinct =>
+        throw QueryCompilationErrors.distinctWithOrderingFunctionUnsupportedError(owg.prettyName)
       case owg: SupportsOrderingWithinGroup
-          if !owg.orderingFilled && u.orderingWithinGroup.isEmpty =>
-        throw QueryCompilationErrors.inverseDistributionFunctionMissingWithinGroupError(
-          owg.prettyName
-        )
+          if owg.isOrderingMandatory && !owg.orderingFilled && u.orderingWithinGroup.isEmpty =>
+        throw QueryCompilationErrors.functionMissingWithinGroupError(owg.prettyName)
       case owg: SupportsOrderingWithinGroup
           if owg.orderingFilled && u.orderingWithinGroup.nonEmpty =>
-        throw QueryCompilationErrors.wrongNumOrderingsForInverseDistributionFunctionError(
+        // e.g mode(expr1) within group (order by expr2) is not supported
+        throw QueryCompilationErrors.wrongNumOrderingsForFunctionError(
           owg.prettyName,
           0,
           u.orderingWithinGroup.length
@@ -198,7 +195,7 @@ class FunctionResolution(
       case agg: AggregateFunction =>
         // Note: PythonUDAF does not support these advanced clauses.
         if (agg.isInstanceOf[PythonUDAF]) checkUnsupportedAggregateClause(agg, u)
-        // After parse, the inverse distribution functions not set the ordering within group yet.
+        // After parse, the functions not set the ordering within group yet.
         val newAgg = agg match {
           case owg: SupportsOrderingWithinGroup
               if !owg.orderingFilled && u.orderingWithinGroup.nonEmpty =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCollationName.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCollationName.scala
new file mode 100644
index 0000000000000..50f36f78a4724
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCollationName.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.UNRESOLVED_COLLATION
+import org.apache.spark.sql.catalyst.util.CollationFactory
+
+/**
+ * Resolves fully qualified collation name and replaces [[UnresolvedCollation]] with
+ * [[ResolvedCollation]].
+ */
+object ResolveCollationName extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan =
+    plan.resolveExpressionsWithPruning(_.containsPattern(UNRESOLVED_COLLATION), ruleId) {
+      case UnresolvedCollation(collationName) =>
+        ResolvedCollation(CollationFactory.resolveFullyQualifiedName(collationName.toArray))
+    }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDefaultStringTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDefaultStringTypes.scala
new file mode 100644
index 0000000000000..75958ff3e1177
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDefaultStringTypes.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, Literal}
+import org.apache.spark.sql.catalyst.plans.logical.{AddColumns, AlterColumn, AlterViewAs, ColumnDefinition, CreateView, LogicalPlan, QualifiedColType, ReplaceColumns, V1CreateTablePlan, V2CreateTablePlan}
+import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
+import org.apache.spark.sql.types.{DataType, StringType}
+
+/**
+ * Resolves default string types in queries and commands. For queries, the default string type is
+ * determined by the session's default string type. For DDL, the default string type is the
+ * default type of the object (table -> schema -> catalog). However, this is not implemented yet.
+ * So, we will just use UTF8_BINARY for now.
+ */
+object ResolveDefaultStringTypes extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = {
+    val newPlan = apply0(plan)
+    if (plan.ne(newPlan)) {
+      // Due to how tree transformations work and StringType object being equal to
+      // StringType("UTF8_BINARY"), we need to transform the plan twice
+      // to ensure the correct results for occurrences of default string type.
+      val finalPlan = apply0(newPlan)
+      RuleExecutor.forceAdditionalIteration(finalPlan)
+      finalPlan
+    } else {
+      newPlan
+    }
+  }
+
+  private def apply0(plan: LogicalPlan): LogicalPlan = {
+    if (isDDLCommand(plan)) {
+      transformDDL(plan)
+    } else {
+      transformPlan(plan, sessionDefaultStringType)
+    }
+  }
+
+  /**
+   * Returns whether any of the given `plan` needs to have its
+   * default string type resolved.
+   */
+  def needsResolution(plan: LogicalPlan): Boolean = {
+    if (!isDDLCommand(plan) && isDefaultSessionCollationUsed) {
+      return false
+    }
+
+    plan.exists(node => needsResolution(node.expressions))
+  }
+
+  /**
+   * Returns whether any of the given `expressions` needs to have its
+   * default string type resolved.
+   */
+  def needsResolution(expressions: Seq[Expression]): Boolean = {
+    expressions.exists(needsResolution)
+  }
+
+  /**
+   * Returns whether the given `expression` needs to have its
+   * default string type resolved.
+   */
+  def needsResolution(expression: Expression): Boolean = {
+    expression.exists(e => transformExpression.isDefinedAt(e))
+  }
+
+  private def isDefaultSessionCollationUsed: Boolean = conf.defaultStringType == StringType
+
+  /**
+   * Returns the default string type that should be used in a given DDL command (for now always
+   * UTF8_BINARY).
+   */
+  private def stringTypeForDDLCommand(table: LogicalPlan): StringType =
+    StringType("UTF8_BINARY")
+
+  /** Returns the session default string type */
+  private def sessionDefaultStringType: StringType =
+    StringType(conf.defaultStringType.collationId)
+
+  private def isDDLCommand(plan: LogicalPlan): Boolean = plan exists {
+    case _: AddColumns | _: ReplaceColumns | _: AlterColumn => true
+    case _ => isCreateOrAlterPlan(plan)
+  }
+
+  private def isCreateOrAlterPlan(plan: LogicalPlan): Boolean = plan match {
+    case _: V1CreateTablePlan | _: V2CreateTablePlan | _: CreateView | _: AlterViewAs => true
+    case _ => false
+  }
+
+  private def transformDDL(plan: LogicalPlan): LogicalPlan = {
+    val newType = stringTypeForDDLCommand(plan)
+
+    plan resolveOperators {
+      case p if isCreateOrAlterPlan(p) =>
+        transformPlan(p, newType)
+
+      case addCols: AddColumns =>
+        addCols.copy(columnsToAdd = replaceColumnTypes(addCols.columnsToAdd, newType))
+
+      case replaceCols: ReplaceColumns =>
+        replaceCols.copy(columnsToAdd = replaceColumnTypes(replaceCols.columnsToAdd, newType))
+
+      case alter: AlterColumn
+        if alter.dataType.isDefined && hasDefaultStringType(alter.dataType.get) =>
+        alter.copy(dataType = Some(replaceDefaultStringType(alter.dataType.get, newType)))
+    }
+  }
+
+  /**
+   * Transforms the given plan, by transforming all expressions in its operators to use the given
+   * new type instead of the default string type.
+   */
+  private def transformPlan(plan: LogicalPlan, newType: StringType): LogicalPlan = {
+    plan resolveExpressionsUp { expression =>
+      transformExpression
+        .andThen(_.apply(newType))
+        .applyOrElse(expression, identity[Expression])
+    }
+  }
+
+  /**
+   * Transforms the given expression, by changing all default string types to the given new type.
+   */
+  private def transformExpression: PartialFunction[Expression, StringType => Expression] = {
+    case columnDef: ColumnDefinition if hasDefaultStringType(columnDef.dataType) =>
+      newType => columnDef.copy(dataType = replaceDefaultStringType(columnDef.dataType, newType))
+
+    case cast: Cast if hasDefaultStringType(cast.dataType) =>
+      newType => cast.copy(dataType = replaceDefaultStringType(cast.dataType, newType))
+
+    case Literal(value, dt) if hasDefaultStringType(dt) =>
+      newType => Literal(value, replaceDefaultStringType(dt, newType))
+  }
+
+  private def hasDefaultStringType(dataType: DataType): Boolean =
+    dataType.existsRecursively(isDefaultStringType)
+
+  private def isDefaultStringType(dataType: DataType): Boolean = {
+    dataType match {
+      case st: StringType =>
+        // should only return true for StringType object and not StringType("UTF8_BINARY")
+        st.eq(StringType) || st.isInstanceOf[TemporaryStringType]
+      case _ => false
+    }
+  }
+
+  private def replaceDefaultStringType(dataType: DataType, newType: StringType): DataType = {
+    dataType.transformRecursively {
+      case currentType: StringType if isDefaultStringType(currentType) =>
+        if (currentType == newType) {
+          TemporaryStringType()
+        } else {
+          newType
+        }
+    }
+  }
+
+  private def replaceColumnTypes(
+      colTypes: Seq[QualifiedColType],
+      newType: StringType): Seq[QualifiedColType] = {
+    colTypes.map {
+      case colWithDefault if hasDefaultStringType(colWithDefault.dataType) =>
+        val replaced = replaceDefaultStringType(colWithDefault.dataType, newType)
+        colWithDefault.copy(dataType = replaced)
+
+      case col => col
+    }
+  }
+}
+
+case class TemporaryStringType() extends StringType(1) {
+  override def toString: String = s"TemporaryStringType($collationId)"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
index 0e1e71a658c8b..2cf3c6390d5fb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.catalyst.expressions.{AliasHelper, EvalHelper, Expression}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.plans.logical.{CTERelationRef, LogicalPlan, SubqueryAlias}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.{UNRESOLVED_IDENTIFIER, UNRESOLVED_IDENTIFIER_WITH_CTE}
+import org.apache.spark.sql.catalyst.trees.TreePattern.UNRESOLVED_IDENTIFIER
 import org.apache.spark.sql.types.StringType
 
 /**
@@ -30,18 +30,9 @@ import org.apache.spark.sql.types.StringType
 object ResolveIdentifierClause extends Rule[LogicalPlan] with AliasHelper with EvalHelper {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
-    _.containsAnyPattern(UNRESOLVED_IDENTIFIER, UNRESOLVED_IDENTIFIER_WITH_CTE)) {
+    _.containsPattern(UNRESOLVED_IDENTIFIER)) {
     case p: PlanWithUnresolvedIdentifier if p.identifierExpr.resolved && p.childrenResolved =>
       p.planBuilder.apply(evalIdentifierExpr(p.identifierExpr), p.children)
-    case u @ UnresolvedWithCTERelations(p, cteRelations) =>
-      this.apply(p) match {
-        case u @ UnresolvedRelation(Seq(table), _, _) =>
-          cteRelations.find(r => plan.conf.resolver(r._1, table)).map { case (_, d) =>
-            // Add a `SubqueryAlias` for hint-resolving rules to match relation names.
-            SubqueryAlias(table, CTERelationRef(d.id, d.resolved, d.output, d.isStreaming))
-          }.getOrElse(u)
-        case other => other
-      }
     case other =>
       other.transformExpressionsWithPruning(_.containsAnyPattern(UNRESOLVED_IDENTIFIER)) {
         case e: ExpressionWithUnresolvedIdentifier if e.identifierExpr.resolved =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
index 62f3997491c07..b9e9e49a39647 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
@@ -29,8 +29,12 @@ import org.apache.spark.sql.catalyst.trees.AlwaysProcess
 object ResolveInlineTables extends Rule[LogicalPlan] with EvalHelper {
   override def apply(plan: LogicalPlan): LogicalPlan = {
     plan.resolveOperatorsWithPruning(AlwaysProcess.fn, ruleId) {
-      case table: UnresolvedInlineTable if table.expressionsResolved =>
+      case table: UnresolvedInlineTable if canResolveTable(table) =>
         EvaluateUnresolvedInlineTable.evaluateUnresolvedInlineTable(table)
     }
   }
+
+  private def canResolveTable(table: UnresolvedInlineTable): Boolean = {
+    table.expressionsResolved && !ResolveDefaultStringTypes.needsResolution(table)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala
index da8065eab606d..cb26820a0c79d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import java.util.LinkedHashSet
+
+import scala.jdk.CollectionConverters._
+
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.WindowExpression.hasWindowExpression
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
@@ -147,7 +151,7 @@ object ResolveLateralColumnAliasReference extends Rule[LogicalPlan] {
           && pOriginal.projectList.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
         val p @ Project(projectList, child) = pOriginal.mapChildren(apply0)
         var aliasMap = AttributeMap.empty[AliasEntry]
-        val referencedAliases = collection.mutable.Set.empty[AliasEntry]
+        val referencedAliases = new LinkedHashSet[AliasEntry]
         def unwrapLCAReference(e: NamedExpression): NamedExpression = {
           e.transformWithPruning(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
             case lcaRef: LateralColumnAliasReference if aliasMap.contains(lcaRef.a) =>
@@ -156,7 +160,7 @@ object ResolveLateralColumnAliasReference extends Rule[LogicalPlan] {
               // and unwrap the LateralColumnAliasReference to the NamedExpression inside
               // If there is chaining, don't resolve and save to future rounds
               if (!aliasEntry.alias.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
-                referencedAliases += aliasEntry
+                referencedAliases.add(aliasEntry)
                 lcaRef.ne
               } else {
                 lcaRef
@@ -182,7 +186,7 @@ object ResolveLateralColumnAliasReference extends Rule[LogicalPlan] {
           val outerProjectList = collection.mutable.Seq(newProjectList: _*)
           val innerProjectList =
             collection.mutable.ArrayBuffer(child.output.map(_.asInstanceOf[NamedExpression]): _*)
-          referencedAliases.foreach { case AliasEntry(alias: Alias, idx) =>
+          referencedAliases.forEach { case AliasEntry(alias: Alias, idx) =>
             outerProjectList.update(idx, alias.toAttribute)
             innerProjectList += alias
           }
@@ -222,7 +226,7 @@ object ResolveLateralColumnAliasReference extends Rule[LogicalPlan] {
         if (!aggregateExpressions.forall(eligibleToLiftUp)) {
           agg
         } else {
-          val newAggExprs = collection.mutable.Set.empty[NamedExpression]
+          val newAggExprs = new LinkedHashSet[NamedExpression]
           val expressionMap = collection.mutable.LinkedHashMap.empty[Expression, NamedExpression]
           // Extract the expressions to keep in the Aggregate. Return the transformed expression
           // fully substituted with the attribute reference to the extracted expressions.
@@ -249,11 +253,11 @@ object ResolveLateralColumnAliasReference extends Rule[LogicalPlan] {
                   }
                 }
                 val ne = expressionMap.getOrElseUpdate(aggExpr.canonicalized, assignAlias(aggExpr))
-                newAggExprs += ne
+                newAggExprs.add(ne)
                 ne.toAttribute
               case e if groupingExpressions.exists(_.semanticEquals(e)) =>
                 val ne = expressionMap.getOrElseUpdate(e.canonicalized, assignAlias(e))
-                newAggExprs += ne
+                newAggExprs.add(ne)
                 ne.toAttribute
               case e => e.mapChildren(extractExpressions)
             }
@@ -262,7 +266,7 @@ object ResolveLateralColumnAliasReference extends Rule[LogicalPlan] {
             extractExpressions(_).asInstanceOf[NamedExpression])
           Project(
             projectList = projectExprs,
-            child = agg.copy(aggregateExpressions = newAggExprs.toSeq)
+            child = agg.copy(aggregateExpressions = newAggExprs.asScala.toSeq)
           )
         }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableSpec.scala
index cc9979ad4c5e5..05158fbee3de6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableSpec.scala
@@ -92,6 +92,7 @@ object ResolveTableSpec extends Rule[LogicalPlan] {
         options = newOptions.toMap,
         location = u.location,
         comment = u.comment,
+        collation = u.collation,
         serde = u.serde,
         external = u.external)
       withNewSpec(newTableSpec)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SQLFunctionExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SQLFunctionExpression.scala
new file mode 100644
index 0000000000000..37981f47287da
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SQLFunctionExpression.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.catalog.SQLFunction
+import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression, Unevaluable}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{SQL_FUNCTION_EXPRESSION, SQL_SCALAR_FUNCTION, TreePattern}
+import org.apache.spark.sql.types.DataType
+
+/**
+ * Represent a SQL function expression resolved from the catalog SQL function builder.
+ */
+case class SQLFunctionExpression(
+    name: String,
+    function: SQLFunction,
+    inputs: Seq[Expression],
+    returnType: Option[DataType]) extends Expression with Unevaluable {
+  override def children: Seq[Expression] = inputs
+  override def dataType: DataType = returnType.get
+  override def nullable: Boolean = true
+  override def prettyName: String = name
+  override def toString: String = s"$name(${children.mkString(", ")})"
+  override protected def withNewChildrenInternal(
+    newChildren: IndexedSeq[Expression]): SQLFunctionExpression = copy(inputs = newChildren)
+  final override val nodePatterns: Seq[TreePattern] = Seq(SQL_FUNCTION_EXPRESSION)
+}
+
+/**
+ * A wrapper node for a SQL scalar function expression.
+ */
+case class SQLScalarFunction(function: SQLFunction, inputs: Seq[Expression], child: Expression)
+    extends UnaryExpression with Unevaluable {
+  override def dataType: DataType = child.dataType
+  override def toString: String = s"${function.name}(${inputs.mkString(", ")})"
+  override def sql: String = s"${function.name}(${inputs.map(_.sql).mkString(", ")})"
+  override protected def withNewChildInternal(newChild: Expression): SQLScalarFunction = {
+    copy(child = newChild)
+  }
+  final override val nodePatterns: Seq[TreePattern] = Seq(SQL_SCALAR_FUNCTION)
+  // The `inputs` is for display only and does not matter in execution.
+  override lazy val canonicalized: Expression = copy(inputs = Nil, child = child.canonicalized)
+  override lazy val deterministic: Boolean = {
+    function.deterministic.getOrElse(true) && children.forall(_.deterministic)
+  }
+}
+
+/**
+ * Provide a way to keep state during analysis for resolving nested SQL functions.
+ *
+ * @param nestedSQLFunctionDepth The nested depth in the SQL function resolution. A SQL function
+ *                               expression should only be expanded as a [[SQLScalarFunction]] if
+ *                               the nested depth is 0.
+ */
+case class SQLFunctionContext(nestedSQLFunctionDepth: Int = 0)
+
+object SQLFunctionContext {
+
+  private val value = new ThreadLocal[SQLFunctionContext]() {
+    override def initialValue: SQLFunctionContext = SQLFunctionContext()
+  }
+
+  def get: SQLFunctionContext = value.get()
+
+  def reset(): Unit = value.remove()
+
+  private def set(context: SQLFunctionContext): Unit = value.set(context)
+
+  def withSQLFunction[A](f: => A): A = {
+    val originContext = value.get()
+    val context = originContext.copy(
+      nestedSQLFunctionDepth = originContext.nestedSQLFunctionDepth + 1)
+    set(context)
+    try f finally { set(originContext) }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SQLFunctionNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SQLFunctionNode.scala
new file mode 100644
index 0000000000000..38059d9810a7b
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SQLFunctionNode.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.catalog.SQLFunction
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, UnaryNode}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{FUNCTION_TABLE_RELATION_ARGUMENT_EXPRESSION, SQL_TABLE_FUNCTION, TreePattern}
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+/**
+ * A container for holding a SQL function query plan and its function identifier.
+ *
+ * @param function: the SQL function that this node represents.
+ * @param child: the SQL function body.
+ */
+case class SQLFunctionNode(
+    function: SQLFunction,
+    child: LogicalPlan) extends UnaryNode {
+  override def output: Seq[Attribute] = child.output
+  override def stringArgs: Iterator[Any] = Iterator(function.name, child)
+  override protected def withNewChildInternal(newChild: LogicalPlan): SQLFunctionNode =
+    copy(child = newChild)
+
+  // Throw a reasonable error message when trying to call a SQL UDF with TABLE argument(s).
+  if (child.containsPattern(FUNCTION_TABLE_RELATION_ARGUMENT_EXPRESSION)) {
+    throw QueryCompilationErrors
+      .tableValuedArgumentsNotYetImplementedForSqlFunctions("call", toSQLId(function.name.funcName))
+  }
+}
+
+/**
+ * Represent a SQL table function plan resolved from the catalog SQL table function builder.
+ */
+case class SQLTableFunction(
+    name: String,
+    function: SQLFunction,
+    inputs: Seq[Expression],
+    override val output: Seq[Attribute]) extends LeafNode {
+  final override val nodePatterns: Seq[TreePattern] = Seq(SQL_TABLE_FUNCTION)
+
+  // Throw a reasonable error message when trying to call a SQL UDF with TABLE argument(s) because
+  // this functionality is not implemented yet.
+  if (inputs.exists(_.containsPattern(FUNCTION_TABLE_RELATION_ARGUMENT_EXPRESSION))) {
+    throw QueryCompilationErrors
+      .tableValuedArgumentsNotYetImplementedForSqlFunctions("call", toSQLId(name))
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index c30aa9bf91a1d..4769970b51421 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -77,6 +77,8 @@ object TypeCoercion extends TypeCoercionBase {
       case (NullType, t1) => Some(t1)
       case (t1, NullType) => Some(t1)
 
+      case(s1: StringType, s2: StringType) => StringHelper.tightestCommonString(s1, s2)
+
       case (t1: IntegralType, t2: DecimalType) if t2.isWiderThan(t1) =>
         Some(t2)
       case (t1: DecimalType, t2: IntegralType) if t1.isWiderThan(t2) =>
@@ -149,6 +151,7 @@ object TypeCoercion extends TypeCoercionBase {
     case (DecimalType.Fixed(_, s), _: StringType) if s > 0 => Some(DoubleType)
     case (_: StringType, DecimalType.Fixed(_, s)) if s > 0 => Some(DoubleType)
 
+    case (s1: StringType, s2: StringType) => StringHelper.tightestCommonString(s1, s2)
     case (l: StringType, r: AtomicType) if canPromoteAsInBinaryComparison(r) => Some(r)
     case (l: AtomicType, r: StringType) if canPromoteAsInBinaryComparison(l) => Some(l)
     case (l, r) => None
@@ -190,6 +193,12 @@ object TypeCoercion extends TypeCoercionBase {
       // Cast null type (usually from null literals) into target types
       case (NullType, target) => target.defaultConcreteType
 
+      case (s1: StringType, s2: StringType) =>
+        if (s1.collationId == s2.collationId && StringHelper.isMoreConstrained(s1, s2)) {
+          s2
+        } else {
+          null
+        }
       // If the function accepts any numeric type and the input is a string, we follow the hive
       // convention and cast that input into a double
       case (_: StringType, NumericType) => NumericType.defaultConcreteType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala
index 5b4d76a2a73ed..3b3cf748014b7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala
@@ -44,6 +44,7 @@ import org.apache.spark.sql.catalyst.expressions.{
   MapConcat,
   MapZipWith,
   NaNvl,
+  RandStr,
   RangeFrame,
   ScalaUDF,
   Sequence,
@@ -318,7 +319,8 @@ abstract class TypeCoercionHelper {
         }
 
       case aj @ ArrayJoin(arr, d, nr)
-          if !AbstractArrayType(StringTypeWithCollation).acceptsType(arr.dataType) &&
+          if !AbstractArrayType(StringTypeWithCollation(supportsTrimCollation = true)).
+            acceptsType(arr.dataType) &&
           ArrayType.acceptsType(arr.dataType) =>
         val containsNull = arr.dataType.asInstanceOf[ArrayType].containsNull
         implicitCast(arr, ArrayType(StringType, containsNull)) match {
@@ -399,6 +401,11 @@ abstract class TypeCoercionHelper {
         NaNvl(Cast(l, DoubleType), r)
       case NaNvl(l, r) if r.dataType == NullType => NaNvl(l, Cast(r, l.dataType))
 
+      case r: RandStr if r.length.dataType != IntegerType =>
+        implicitCast(r.length, IntegerType).map { casted =>
+          r.copy(length = casted)
+        }.getOrElse(r)
+
       case other => other
     }
   }
@@ -415,7 +422,7 @@ abstract class TypeCoercionHelper {
           if conf.concatBinaryAsString ||
           !children.map(_.dataType).forall(_ == BinaryType) =>
         val newChildren = c.children.map { e =>
-          implicitCast(e, SQLConf.get.defaultStringType).getOrElse(e)
+          implicitCast(e, StringType).getOrElse(e)
         }
         c.copy(children = newChildren)
       case other => other
@@ -465,7 +472,7 @@ abstract class TypeCoercionHelper {
           if (conf.eltOutputAsString ||
             !children.tail.map(_.dataType).forall(_ == BinaryType)) {
             children.tail.map { e =>
-              implicitCast(e, SQLConf.get.defaultStringType).getOrElse(e)
+              implicitCast(e, StringType).getOrElse(e)
             }
           } else {
             children.tail
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 4f33c26d5c3c3..f7ab41bd6f96c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import java.util.Locale
+
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.{ANALYSIS_ERROR, QUERY_PLAN}
 import org.apache.spark.sql.AnalysisException
@@ -103,6 +105,7 @@ object UnsupportedOperationChecker extends Logging {
     case d: Deduplicate if d.isStreaming && d.keys.exists(hasEventTimeCol) => true
     case d: DeduplicateWithinWatermark if d.isStreaming => true
     case t: TransformWithState if t.isStreaming => true
+    case t: TransformWithStateInPandas if t.isStreaming => true
     case _ => false
   }
 
@@ -139,6 +142,38 @@ object UnsupportedOperationChecker extends Logging {
     }
   }
 
+  private def checkAvroSupportForStatefulOperator(p: LogicalPlan): Option[String] = p match {
+    // TODO: remove operators from this list as support for avro encoding is added
+    case s: Aggregate if s.isStreaming => Some("aggregation")
+    // Since the Distinct node will be replaced to Aggregate in the optimizer rule
+    // [[ReplaceDistinctWithAggregate]], here we also need to check all Distinct node by
+    // assuming it as Aggregate.
+    case d @ Distinct(_: LogicalPlan) if d.isStreaming => Some("distinct")
+    case _ @ Join(left, right, _, _, _) if left.isStreaming && right.isStreaming => Some("join")
+    case f: FlatMapGroupsWithState if f.isStreaming => Some("flatMapGroupsWithState")
+    case f: FlatMapGroupsInPandasWithState if f.isStreaming =>
+      Some("applyInPandasWithState")
+    case d: Deduplicate if d.isStreaming => Some("dropDuplicates")
+    case d: DeduplicateWithinWatermark if d.isStreaming => Some("dropDuplicatesWithinWatermark")
+    case _ => None
+  }
+
+  // Rule to check that avro encoding format is not supported in case any
+  // non-transformWithState stateful streaming operators are present in the query.
+  def checkSupportedStoreEncodingFormats(plan: LogicalPlan): Unit = {
+    val storeEncodingFormat = SQLConf.get.stateStoreEncodingFormat
+    if (storeEncodingFormat.toLowerCase(Locale.ROOT) == "avro") {
+      plan.foreach { subPlan =>
+        val operatorOpt = checkAvroSupportForStatefulOperator(subPlan)
+        if (operatorOpt.isDefined) {
+          val errorMsg = "State store encoding format as avro is not supported for " +
+            s"operator=${operatorOpt.get} used within the query"
+          throwError(errorMsg)(plan)
+        }
+      }
+    }
+  }
+
   def checkForStreaming(plan: LogicalPlan, outputMode: OutputMode): Unit = {
     if (!plan.isStreaming) {
       throwError(
@@ -198,6 +233,11 @@ object UnsupportedOperationChecker extends Logging {
           "DataFrames/Datasets")(plan)
     }
 
+    // check to see that if store encoding format is set to true, then we have no stateful
+    // operators in the query or only variants of operators that support avro encoding such as
+    // transformWithState.
+    checkSupportedStoreEncodingFormats(plan)
+
     val aggregates = collectStreamingAggregates(plan)
     // Disallow some output mode
     outputMode match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala
new file mode 100644
index 0000000000000..89ef29ddaaf1c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ViewResolution.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, View}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.SQLConf
+
+object ViewResolution {
+  def resolve(
+      view: View,
+      resolveChild: LogicalPlan => LogicalPlan,
+      checkAnalysis: LogicalPlan => Unit): View = {
+    // The view's child should be a logical plan parsed from the `desc.viewText`, the variable
+    // `viewText` should be defined, or else we throw an error on the generation of the View
+    // operator.
+
+    // Resolve all the UnresolvedRelations and Views in the child.
+    val newChild = AnalysisContext.withAnalysisContext(view.desc) {
+      val nestedViewDepth = AnalysisContext.get.nestedViewDepth
+      val maxNestedViewDepth = AnalysisContext.get.maxNestedViewDepth
+      if (nestedViewDepth > maxNestedViewDepth) {
+        throw QueryCompilationErrors.viewDepthExceedsMaxResolutionDepthError(
+          view.desc.identifier,
+          maxNestedViewDepth,
+          view
+        )
+      }
+      SQLConf.withExistingConf(View.effectiveSQLConf(view.desc.viewSQLConfigs, view.isTempView)) {
+        resolveChild(view.child)
+      }
+    }
+
+    // Fail the analysis eagerly because outside AnalysisContext, the unresolved operators
+    // inside a view maybe resolved incorrectly.
+    checkAnalysis(newChild)
+
+    view.copy(child = newChild)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/executeImmediate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/executeImmediate.scala
index c92171ec5c750..b452ca15bed58 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/executeImmediate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/executeImmediate.scala
@@ -54,15 +54,18 @@ class SubstituteExecuteImmediate(val catalogManager: CatalogManager)
   def resolveVariable(e: Expression): Expression = {
 
     /**
-     * We know that the expression is either UnresolvedAttribute or Alias, as passed from the
-     * parser. If it is an UnresolvedAttribute, we look it up in the catalog and return it. If it
-     * is an Alias, we resolve the child and return an Alias with the same name.
+     * We know that the expression is either UnresolvedAttribute, Alias or Parameter, as passed from
+     * the parser. If it is an UnresolvedAttribute, we look it up in the catalog and return it. If
+     * it is an Alias, we resolve the child and return an Alias with the same name. If it is
+     * a Parameter, we leave it as is because the parameter belongs to another parameterized
+     * query and should be resolved later.
      */
     e match {
       case u: UnresolvedAttribute =>
         getVariableReference(u, u.nameParts)
       case a: Alias =>
         Alias(resolveVariable(a.child), a.name)()
+      case p: Parameter => p
       case other =>
         throw QueryCompilationErrors.unsupportedParameterExpression(other)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
index f24227abbb651..2cfc2a8c90dc5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.expressions.{Alias, CreateArray, CreateMap, CreateNamedStruct, Expression, LeafExpression, Literal, MapFromArrays, MapFromEntries, SubqueryExpression, Unevaluable, VariableReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SupervisingCommand}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.{COMMAND, PARAMETER, PARAMETERIZED_QUERY, TreePattern, UNRESOLVED_IDENTIFIER_WITH_CTE, UNRESOLVED_WITH}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{COMMAND, PARAMETER, PARAMETERIZED_QUERY, TreePattern, UNRESOLVED_WITH}
 import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.types.DataType
 
@@ -104,18 +104,6 @@ case class PosParameterizedQuery(child: LogicalPlan, args: Seq[Expression])
     copy(child = newChild)
 }
 
-/**
- * Base class for rules that process parameterized queries.
- */
-abstract class ParameterizedQueryProcessor extends Rule[LogicalPlan] {
-  def assertUnresolvedPlanHasSingleParameterizedQuery(plan: LogicalPlan): Unit = {
-    if (plan.containsPattern(PARAMETERIZED_QUERY)) {
-      val parameterizedQueries = plan.collect { case p: ParameterizedQuery => p }
-      assert(parameterizedQueries.length == 1)
-    }
-  }
-}
-
 /**
  * Moves `ParameterizedQuery` inside `SupervisingCommand` for their supervised plans to be
  * resolved later by the analyzer.
@@ -127,10 +115,8 @@ abstract class ParameterizedQueryProcessor extends Rule[LogicalPlan] {
  * `PosParameterizedQuery(ExplainCommand(ExplainCommand(SomeQuery(...))))` =>
  * `ExplainCommand(ExplainCommand(PosParameterizedQuery(SomeQuery(...))))`
  */
-object MoveParameterizedQueriesDown extends ParameterizedQueryProcessor {
+object MoveParameterizedQueriesDown extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    assertUnresolvedPlanHasSingleParameterizedQuery(plan)
-
     plan.resolveOperatorsWithPruning(_.containsPattern(PARAMETERIZED_QUERY)) {
       case pq: ParameterizedQuery if pq.exists(isSupervisingCommand) =>
         moveParameterizedQueryIntoSupervisingCommand(pq)
@@ -161,7 +147,7 @@ object MoveParameterizedQueriesDown extends ParameterizedQueryProcessor {
  * by collection constructor functions such as `map()`, `array()`, `struct()`
  * from the user-specified arguments.
  */
-object BindParameters extends ParameterizedQueryProcessor with QueryErrorsBase {
+object BindParameters extends Rule[LogicalPlan] with QueryErrorsBase {
   private def checkArgs(args: Iterable[(String, Expression)]): Unit = {
     def isNotAllowed(expr: Expression): Boolean = expr.exists {
       case _: Literal | _: CreateArray | _: CreateNamedStruct |
@@ -176,20 +162,23 @@ object BindParameters extends ParameterizedQueryProcessor with QueryErrorsBase {
     }
   }
 
-  private def bind(p: LogicalPlan)(f: PartialFunction[Expression, Expression]): LogicalPlan = {
-    p.resolveExpressionsWithPruning(_.containsPattern(PARAMETER)) (f orElse {
-      case sub: SubqueryExpression => sub.withNewPlan(bind(sub.plan)(f))
-    })
+  private def bind(p0: LogicalPlan)(f: PartialFunction[Expression, Expression]): LogicalPlan = {
+    var stop = false
+    p0.resolveOperatorsDownWithPruning(_.containsPattern(PARAMETER) && !stop) {
+      case p1 =>
+        stop = p1.isInstanceOf[ParameterizedQuery]
+        p1.transformExpressionsWithPruning(_.containsPattern(PARAMETER)) (f orElse {
+          case sub: SubqueryExpression => sub.withNewPlan(bind(sub.plan)(f))
+        })
+    }
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    assertUnresolvedPlanHasSingleParameterizedQuery(plan)
-
     plan.resolveOperatorsWithPruning(_.containsPattern(PARAMETERIZED_QUERY)) {
       // We should wait for `CTESubstitution` to resolve CTE before binding parameters, as CTE
       // relations are not children of `UnresolvedWith`.
       case NameParameterizedQuery(child, argNames, argValues)
-        if !child.containsAnyPattern(UNRESOLVED_WITH, UNRESOLVED_IDENTIFIER_WITH_CTE) &&
+        if !child.containsPattern(UNRESOLVED_WITH) &&
           argValues.forall(_.resolved) =>
         if (argNames.length != argValues.length) {
           throw SparkException.internalError(s"The number of argument names ${argNames.length} " +
@@ -200,7 +189,7 @@ object BindParameters extends ParameterizedQueryProcessor with QueryErrorsBase {
         bind(child) { case NamedParameter(name) if args.contains(name) => args(name) }
 
       case PosParameterizedQuery(child, args)
-        if !child.containsAnyPattern(UNRESOLVED_WITH, UNRESOLVED_IDENTIFIER_WITH_CTE) &&
+        if !child.containsPattern(UNRESOLVED_WITH) &&
           args.forall(_.resolved) =>
         val indexedArgs = args.zipWithIndex
         checkArgs(indexedArgs.map(arg => (s"_${arg._2}", arg._1)))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AliasResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AliasResolver.scala
new file mode 100644
index 0000000000000..7b652437dbd8b
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AliasResolver.scala
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.analysis.{AliasResolution, UnresolvedAlias}
+import org.apache.spark.sql.catalyst.expressions.{
+  Alias,
+  Cast,
+  CreateNamedStruct,
+  Expression,
+  NamedExpression
+}
+
+/**
+ * Resolver class that resolves unresolved aliases and handles user-specified aliases.
+ */
+class AliasResolver(expressionResolver: ExpressionResolver, scopes: NameScopeStack)
+    extends TreeNodeResolver[UnresolvedAlias, Expression]
+    with ResolvesExpressionChildren {
+
+  /**
+   * Resolves [[UnresolvedAlias]] by handling two specific cases:
+   *  - Alias(CreateNamedStruct(...)) - instead of calling [[CreateNamedStructResolver]] which will
+   *  clean up its inner aliases, we manually resolve [[CreateNamedStruct]]'s children, because we
+   *  need to preserve inner aliases until after the alias name is computed. This is a hack because
+   *  fixed-point analyzer computes [[Alias]] name before removing inner aliases.
+   *  - Alias(...) - recursively call [[ExpressionResolver]] to resolve the child expression.
+   *
+   * After the children are resolved, call [[AliasResolution]] to compute the alias name. Finally,
+   * clean up inner aliases from [[CreateNamedStruct]].
+   */
+  override def resolve(unresolvedAlias: UnresolvedAlias): NamedExpression = {
+    val aliasWithResolvedChildren = withResolvedChildren(
+      unresolvedAlias, {
+        case createNamedStruct: CreateNamedStruct =>
+          withResolvedChildren(createNamedStruct, expressionResolver.resolve)
+        case other => expressionResolver.resolve(other)
+      }
+    )
+
+    val resolvedAlias =
+      AliasResolution.resolve(aliasWithResolvedChildren).asInstanceOf[NamedExpression]
+
+    scopes.top.addAlias(resolvedAlias.name)
+    AliasResolver.cleanupAliases(resolvedAlias)
+  }
+
+  /**
+   * Handle already resolved [[Alias]] nodes, i.e. user-specified aliases. We disallow stacking
+   * of [[Alias]] nodes by collapsing them so that only the top node remains.
+   *
+   * For an example query like:
+   *
+   * {{{ SELECT 1 AS a }}}
+   *
+   * parsed plan will be:
+   *
+   * Project [Alias(1, a)]
+   * +- OneRowRelation
+   *
+   */
+  def handleResolvedAlias(alias: Alias): Alias = {
+    val aliasWithResolvedChildren = withResolvedChildren(alias, expressionResolver.resolve)
+    scopes.top.addAlias(aliasWithResolvedChildren.name)
+    AliasResolver.collapseAlias(aliasWithResolvedChildren)
+  }
+}
+
+object AliasResolver {
+
+  /**
+   * For a query like:
+   *
+   * {{{ SELECT STRUCT(1 AS a, 2 AS b) AS st }}}
+   *
+   * After resolving [[CreateNamedStruct]] the plan will be:
+   *     CreateNamedStruct(Seq("a", Alias(1, "a"), "b", Alias(2, "b")))
+   *
+   * For a query like:
+   *
+   * {{{ df.select($"col1".cast("int").cast("double")) }}}
+   *
+   * After resolving top-most [[Alias]] the plan will be:
+   *     Alias(Cast(Alias(Cast(col1, int), col1)), double), col1)
+   *
+   * Both examples contain inner aliases that are not expected in the analyzed logical plan,
+   * therefore need to be removed. However, in both examples inner aliases are necessary in order
+   * for the outer alias to compute its name. To achieve this, we delay removal of inner aliases
+   * until after the outer alias name is computed.
+   *
+   * For cases where there are no dependencies on inner alias, inner alias should be removed by the
+   * resolver that produces it.
+   */
+  private def cleanupAliases(namedExpression: NamedExpression): NamedExpression =
+    namedExpression
+      .withNewChildren(namedExpression.children.map {
+        case cast @ Cast(alias: Alias, _, _, _) =>
+          cast.copy(child = alias.child)
+        case createNamedStruct: CreateNamedStruct =>
+          CreateNamedStructResolver.cleanupAliases(createNamedStruct)
+        case other => other
+      })
+      .asInstanceOf[NamedExpression]
+
+  /**
+   * If an [[Alias]] node appears on top of another [[Alias]], remove the bottom one. Here we don't
+   * handle a case where a node of different type appears between two [[Alias]] nodes: in this
+   * case, removal of inner alias (if it is unnecessary) should be handled by respective node's
+   * resolver, in order to preserve the bottom-up contract.
+   */
+  private def collapseAlias(alias: Alias): Alias =
+    alias.child match {
+      case innerAlias: Alias =>
+        val metadata = if (alias.metadata.isEmpty) {
+          None
+        } else {
+          Some(alias.metadata)
+        }
+        alias.copy(child = innerAlias.child)(
+          exprId = alias.exprId,
+          qualifier = alias.qualifier,
+          explicitMetadata = metadata,
+          nonInheritableMetadataKeys = alias.nonInheritableMetadataKeys
+        )
+      case _ => alias
+    }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AnalyzerBridgeState.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AnalyzerBridgeState.scala
new file mode 100644
index 0000000000000..d3e93c82dfa21
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AnalyzerBridgeState.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import java.util.HashMap
+
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+/**
+ * The [[AnalyzerBridgeState]] is a state passed from legacy [[Analyzer]] to the single-pass
+ * [[Resolver]].
+ *
+ * @param relationsWithResolvedMetadata A map from [[UnresolvedRelation]] to the relations with
+ *   resolved metadata. It allows us to reuse the relation metadata and avoid duplicate
+ *   catalog/table lookups in dual-run mode (when
+ *   [[ANALYZER_SINGLE_PASS_RESOLVER_RELATION_BRIDGING_ENABLED]] is true).
+ */
+case class AnalyzerBridgeState(
+    relationsWithResolvedMetadata: AnalyzerBridgeState.RelationsWithResolvedMetadata =
+      new AnalyzerBridgeState.RelationsWithResolvedMetadata)
+
+object AnalyzerBridgeState {
+  type RelationsWithResolvedMetadata = HashMap[UnresolvedRelation, LogicalPlan]
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AttributeScopeStack.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AttributeScopeStack.scala
new file mode 100644
index 0000000000000..6f9d6defd2edb
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/AttributeScopeStack.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import java.util.ArrayDeque
+
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet}
+
+/**
+ * The [[AttributeScopeStack]] is used to validate that the attribute which was encountered by the
+ * [[ExpressionResolutionValidator]] is in the current operator's visibility scope. We use
+ * [[AttributeSet]] as scope implementation here to check the equality of attributes based on their
+ * expression IDs.
+ *
+ * E.g. for the following SQL query:
+ * {{{
+ * SELECT a, a, a + col2 FROM (SELECT col1 as a, col2 FROM VALUES (1, 2));
+ * }}}
+ *
+ * Having the following logical plan:
+ * {{{
+ * Project [a#2, a#2, (a#2 + col2#1) AS (a + col2)#3]
+ * +- SubqueryAlias __auto_generated_subquery_name
+ *    +- Project [col1#0 AS a#2, col2#1]
+ *       +- LocalRelation [col1#0, col2#1]
+ * }}}
+ *
+ * The [[LocalRelation]] outputs attributes with IDs #0 and #1, which can be referenced by the lower
+ * [[Project]]. This [[Project]] produces a new attribute ID #2 for an alias and retains the old
+ * ID #1 for col2. The upper [[Project]] references `a` twice using the same ID #2 and produces a
+ * new ID #3 for an alias of `a + col2`.
+ */
+class AttributeScopeStack {
+  private val stack = new ArrayDeque[AttributeSet]
+  push()
+
+  /**
+   * Get the relevant attribute scope in the context of the current operator.
+   */
+  def top: AttributeSet = {
+    stack.peek()
+  }
+
+  /**
+   * Overwrite current relevant scope with a sequence of attributes which is an output of some
+   * operator. `attributes` can have duplicate IDs if the output of the operator contains multiple
+   * occurrences of the same attribute.
+   */
+  def overwriteTop(attributes: Seq[Attribute]): Unit = {
+    stack.pop()
+    stack.push(AttributeSet(attributes))
+  }
+
+  /**
+   * Execute `body` in the context of a fresh attribute scope. Used by [[Project]] and [[Aggregate]]
+   * validation code since those operators introduce a new scope with fresh expression IDs.
+   */
+  def withNewScope[R](body: => R): Unit = {
+    push()
+    try {
+      body
+    } finally {
+      pop()
+    }
+  }
+
+  private def push(): Unit = {
+    stack.push(AttributeSet(Seq.empty))
+  }
+
+  private def pop(): Unit = {
+    stack.pop()
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/BinaryArithmeticResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/BinaryArithmeticResolver.scala
new file mode 100644
index 0000000000000..7d9c6752094d7
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/BinaryArithmeticResolver.scala
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.analysis.{
+  AnsiStringPromotionTypeCoercion,
+  AnsiTypeCoercion,
+  BinaryArithmeticWithDatetimeResolver,
+  DecimalPrecisionTypeCoercion,
+  DivisionTypeCoercion,
+  IntegralDivisionTypeCoercion,
+  StringPromotionTypeCoercion,
+  TypeCoercion
+}
+import org.apache.spark.sql.catalyst.expressions.{
+  Add,
+  BinaryArithmetic,
+  DateAdd,
+  Divide,
+  Expression,
+  Multiply,
+  Subtract,
+  SubtractDates
+}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DateType, StringType}
+
+/**
+ * [[BinaryArithmeticResolver]] is invoked by [[ExpressionResolver]] in order to resolve
+ * [[BinaryArithmetic]] nodes. During resolution, calling [[BinaryArithmeticWithDatetimeResolver]]
+ * and applying type coercion can result in [[BinaryArithmetic]] producing some other type of node
+ * or a subtree of nodes. In such cases a downwards traversal is necessary, but not going deeper
+ * than the original expression's children, since all nodes below that point are guaranteed to be
+ * already resolved.
+ *
+ * For example, given a query:
+ *
+ *  SELECT '4 11:11' - INTERVAL '4 22:12' DAY TO MINUTE
+ *
+ * [[BinaryArithmeticResolver]] is called for the following expression:
+ *
+ *     Subtract(
+ *         Literal('4 11:11', StringType),
+ *         Literal(Interval('4 22:12' DAY TO MINUTE), DayTimeIntervalType(0,2))
+ *     )
+ *
+ * After calling [[BinaryArithmeticWithDatetimeResolver]] and applying type coercion,
+ * the expression is transformed into:
+ *
+ *     Cast(
+ *         DatetimeSub(
+ *             TimeAdd(
+ *                 Literal('4 11:11', StringType),
+ *                 UnaryMinus(
+ *                     Literal(Interval('4 22:12' DAY TO MINUTE), DayTimeIntervalType(0,2))
+ *                 )
+ *             )
+ *         )
+ *     )
+ *
+ * A single [[Subtract]] node is replaced with a subtree of nodes. In order to resolve this subtree
+ * we need to invoke [[ExpressionResolver]] recursively on the top-most node's children. The
+ * top-most node itself is not resolved recursively in order to avoid recursive calls to
+ * [[BinaryArithmeticResolver]] and other sub-resolvers. To prevent a case where we resolve the
+ * same node twice, we need to mark nodes that will act as a limit for the downwards traversal by
+ * applying a [[ExpressionResolver.SINGLE_PASS_SUBTREE_BOUNDARY]] tag to them. These children
+ * along with all the nodes below them are guaranteed to be resolved at this point. When
+ * [[ExpressionResolver]] reaches one of the tagged nodes, it returns identity rather than
+ * resolving it. Finally, after resolving the subtree, we need to resolve the top-most node itself,
+ * which in this case means applying a timezone, if necessary.
+ */
+class BinaryArithmeticResolver(
+    expressionResolver: ExpressionResolver,
+    timezoneAwareExpressionResolver: TimezoneAwareExpressionResolver)
+    extends TreeNodeResolver[BinaryArithmetic, Expression]
+    with ProducesUnresolvedSubtree {
+
+  private val shouldTrackResolvedNodes =
+    conf.getConf(SQLConf.ANALYZER_SINGLE_PASS_TRACK_RESOLVED_NODES_ENABLED)
+
+  private val typeCoercionRules: Seq[Expression => Expression] =
+    if (conf.ansiEnabled) {
+      BinaryArithmeticResolver.ANSI_TYPE_COERCION_RULES
+    } else {
+      BinaryArithmeticResolver.TYPE_COERCION_RULES
+    }
+  private val typeCoercionResolver: TypeCoercionResolver =
+    new TypeCoercionResolver(timezoneAwareExpressionResolver, typeCoercionRules)
+
+  override def resolve(unresolvedBinaryArithmetic: BinaryArithmetic): Expression = {
+    val binaryArithmeticWithResolvedChildren: BinaryArithmetic =
+      withResolvedChildren(unresolvedBinaryArithmetic, expressionResolver.resolve)
+    val binaryArithmeticWithResolvedSubtree: Expression =
+      withResolvedSubtree(binaryArithmeticWithResolvedChildren, expressionResolver.resolve) {
+        transformBinaryArithmeticNode(binaryArithmeticWithResolvedChildren)
+      }
+    val binaryArithmeticWithResolvedTimezone = timezoneAwareExpressionResolver.withResolvedTimezone(
+      binaryArithmeticWithResolvedSubtree,
+      conf.sessionLocalTimeZone
+    )
+    reallocateKnownNodesForTracking(binaryArithmeticWithResolvedTimezone)
+  }
+
+  /**
+   * Transform [[BinaryArithmetic]] node by calling [[BinaryArithmeticWithDatetimeResolver]] and
+   * applying type coercion. Initial node can be replaced with some other type of node or a subtree
+   * of nodes.
+   */
+  private def transformBinaryArithmeticNode(binaryArithmetic: BinaryArithmetic): Expression = {
+    val binaryArithmeticWithDateTypeReplaced: Expression =
+      replaceDateType(binaryArithmetic)
+    val binaryArithmeticWithTypeCoercion: Expression =
+      typeCoercionResolver.resolve(binaryArithmeticWithDateTypeReplaced)
+    // In case that original expression's children types are DateType and StringType, fixed-point
+    // fails to resolve the expression with a single application of
+    // [[BinaryArithmeticWithDatetimeResolver]]. Therefore, single-pass resolver needs to invoke
+    // [[BinaryArithmeticWithDatetimeResolver.resolve]], type coerce and only after that fix the
+    // date/string case. Instead of invoking [[BinaryArithmeticWithDatetimeResolver]] again, we
+    // handle the case directly.
+    (
+      binaryArithmetic.left.dataType,
+      binaryArithmetic.right.dataType
+    ) match {
+      case (_: DateType, _: StringType) =>
+        binaryArithmeticWithTypeCoercion match {
+          case add: Add => DateAdd(add.left, add.right)
+          case subtract: Subtract => SubtractDates(subtract.left, subtract.right)
+          case other => other
+        }
+      case _ => binaryArithmeticWithTypeCoercion
+    }
+  }
+
+  /**
+   * When DateType like operand is given to [[BinaryArithmetic]], apply
+   * [[BinaryArithmeticWithDatetimeResolver]] in order to replace the [[BinaryArithmetic]] with
+   * the appropriate equivalent for DateTime types.
+   */
+  private def replaceDateType(expression: Expression) = expression match {
+    case arithmetic @ (_: Add | _: Subtract | _: Multiply | _: Divide) =>
+      BinaryArithmeticWithDatetimeResolver.resolve(arithmetic)
+    case other => other
+  }
+
+  /**
+   * Since [[TracksResolvedNodes]] requires all the expressions in the tree to be unique objects,
+   * we reallocate the known nodes in [[ANALYZER_SINGLE_PASS_TRACK_RESOLVED_NODES_ENABLED]] mode,
+   * otherwise we preserve the old object to avoid unnecessary memory allocations.
+   */
+  private def reallocateKnownNodesForTracking(expression: Expression): Expression = {
+    if (shouldTrackResolvedNodes) {
+      expression match {
+        case add: Add => add.copy()
+        case subtract: Subtract => subtract.copy()
+        case multiply: Multiply => multiply.copy()
+        case divide: Divide => divide.copy()
+        case _ => expression
+      }
+    } else {
+      expression
+    }
+  }
+}
+
+object BinaryArithmeticResolver {
+  // Ordering in the list of type coercions should be in sync with the list in [[TypeCoercion]].
+  private val TYPE_COERCION_RULES: Seq[Expression => Expression] = Seq(
+    StringPromotionTypeCoercion.apply,
+    DecimalPrecisionTypeCoercion.apply,
+    DivisionTypeCoercion.apply,
+    IntegralDivisionTypeCoercion.apply,
+    TypeCoercion.ImplicitTypeCoercion.apply,
+    TypeCoercion.DateTimeOperationsTypeCoercion.apply
+  )
+
+  // Ordering in the list of type coercions should be in sync with the list in [[AnsiTypeCoercion]].
+  private val ANSI_TYPE_COERCION_RULES: Seq[Expression => Expression] = Seq(
+    AnsiStringPromotionTypeCoercion.apply,
+    DecimalPrecisionTypeCoercion.apply,
+    DivisionTypeCoercion.apply,
+    IntegralDivisionTypeCoercion.apply,
+    AnsiTypeCoercion.ImplicitTypeCoercion.apply,
+    AnsiTypeCoercion.AnsiDateTimeOperationsTypeCoercion.apply
+  )
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/BridgedRelationsProvider.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/BridgedRelationsProvider.scala
new file mode 100644
index 0000000000000..bc7a9df064c33
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/BridgedRelationsProvider.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.analysis.RelationResolution
+import org.apache.spark.sql.connector.catalog.CatalogManager
+
+/**
+ * The [[BridgedRelationMetadataProvider]] is a [[RelationMetadataProvider]] that just reuses
+ * resolved metadata from the [[AnalyzerBridgeState]]. This is used in the single-pass [[Resolver]]
+ * to avoid duplicate catalog/table lookups in dual-run mode, so metadata is simply reused from the
+ * fixed-point [[Analyzer]] run. We strictly rely on the [[AnalyzerBridgeState]] to avoid any
+ * blocking calls here.
+ */
+class BridgedRelationMetadataProvider(
+    override val catalogManager: CatalogManager,
+    override val relationResolution: RelationResolution,
+    analyzerBridgeState: AnalyzerBridgeState
+) extends RelationMetadataProvider {
+  override val relationsWithResolvedMetadata = getRelationsFromBridgeState(analyzerBridgeState)
+
+  private def getRelationsFromBridgeState(
+      analyzerBridgeState: AnalyzerBridgeState): RelationsWithResolvedMetadata = {
+    val result = new RelationsWithResolvedMetadata
+    analyzerBridgeState.relationsWithResolvedMetadata.forEach(
+      (unresolvedRelation, relationWithResolvedMetadata) => {
+        result.put(
+          relationIdFromUnresolvedRelation(unresolvedRelation),
+          relationWithResolvedMetadata
+        )
+      }
+    )
+    result
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ConditionalExpressionResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ConditionalExpressionResolver.scala
new file mode 100644
index 0000000000000..75ba1b7a01a5c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ConditionalExpressionResolver.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.analysis.{AnsiTypeCoercion, TypeCoercion}
+import org.apache.spark.sql.catalyst.expressions.{ConditionalExpression, Expression}
+
+/**
+ * Resolver for [[If]], [[CaseWhen]] and [[Coalesce]] expressions.
+ */
+class ConditionalExpressionResolver(
+    expressionResolver: ExpressionResolver,
+    timezoneAwareExpressionResolver: TimezoneAwareExpressionResolver)
+  extends TreeNodeResolver[ConditionalExpression, Expression]
+  with ResolvesExpressionChildren
+  with SQLConfHelper {
+
+  private val typeCoercionRules: Seq[Expression => Expression] =
+    if (conf.ansiEnabled) {
+      ConditionalExpressionResolver.ANSI_TYPE_COERCION_RULES
+    } else {
+      ConditionalExpressionResolver.TYPE_COERCION_RULES
+    }
+  private val typeCoercionResolver: TypeCoercionResolver =
+    new TypeCoercionResolver(timezoneAwareExpressionResolver, typeCoercionRules)
+
+  override def resolve(unresolvedConditionalExpression: ConditionalExpression): Expression = {
+    val conditionalExpressionWithResolvedChildren =
+      withResolvedChildren(unresolvedConditionalExpression, expressionResolver.resolve)
+
+    typeCoercionResolver.resolve(conditionalExpressionWithResolvedChildren)
+  }
+}
+
+object ConditionalExpressionResolver {
+  // Ordering in the list of type coercions should be in sync with the list in [[TypeCoercion]].
+  private val TYPE_COERCION_RULES: Seq[Expression => Expression] = Seq(
+    TypeCoercion.CaseWhenTypeCoercion.apply,
+    TypeCoercion.FunctionArgumentTypeCoercion.apply,
+    TypeCoercion.IfTypeCoercion.apply
+  )
+
+  // Ordering in the list of type coercions should be in sync with the list in [[AnsiTypeCoercion]].
+  private val ANSI_TYPE_COERCION_RULES: Seq[Expression => Expression] = Seq(
+    AnsiTypeCoercion.CaseWhenTypeCoercion.apply,
+    AnsiTypeCoercion.FunctionArgumentTypeCoercion.apply,
+    AnsiTypeCoercion.IfTypeCoercion.apply
+  )
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/CreateNamedStructResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/CreateNamedStructResolver.scala
new file mode 100644
index 0000000000000..12c3c71b5e8be
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/CreateNamedStructResolver.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.expressions.{Alias, CreateNamedStruct, Expression}
+
+/**
+ * Resolves [[CreateNamedStruct]] nodes by recursively resolving children. If [[CreateNamedStruct]]
+ * is not directly under an [[Alias]], removes aliases from struct fields. Otherwise, let
+ * [[AliasResolver]] handle the removal.
+ */
+class CreateNamedStructResolver(expressionResolver: ExpressionResolver)
+    extends TreeNodeResolver[CreateNamedStruct, Expression]
+    with ResolvesExpressionChildren {
+
+  override def resolve(createNamedStruct: CreateNamedStruct): Expression = {
+    val createNamedStructWithResolvedChildren =
+      withResolvedChildren(createNamedStruct, expressionResolver.resolve)
+    CreateNamedStructResolver.cleanupAliases(createNamedStructWithResolvedChildren)
+  }
+}
+
+object CreateNamedStructResolver {
+
+  /**
+   * For a query like:
+   *
+   * {{{ SELECT STRUCT(1 AS a, 2 AS b) }}}
+   *
+   * [[CreateNamedStruct]] will be: CreateNamedStruct(Seq("a", Alias(1, "a"), "b", Alias(2, "b")))
+   *
+   * Because inner aliases are not expected in the analyzed logical plan, we need to remove them
+   * here. However, we only do so if [[CreateNamedStruct]] is not directly under an [[Alias]], in
+   * which case the removal will be handled by [[AliasResolver]]. This is because in single-pass,
+   * [[Alias]] is resolved after [[CreateNamedStruct]] and in order to compute the correct output
+   * name, it needs to know complete structure of the child.
+   */
+  def cleanupAliases(createNamedStruct: CreateNamedStruct): CreateNamedStruct = {
+    createNamedStruct
+      .withNewChildren(createNamedStruct.children.map {
+        case a: Alias if a.metadata.isEmpty => a.child
+        case other => other
+      })
+      .asInstanceOf[CreateNamedStruct]
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/DelegatesResolutionToExtensions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/DelegatesResolutionToExtensions.scala
new file mode 100644
index 0000000000000..7d57e4683df40
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/DelegatesResolutionToExtensions.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+/**
+ * The [[DelegatesResolutionToExtensions]] is a trait which provides a method to delegate the
+ * resolution of unresolved operators to a list of [[ResolverExtension]]s.
+ */
+trait DelegatesResolutionToExtensions {
+
+  protected val extensions: Seq[ResolverExtension]
+
+  /**
+   * Find the suitable extension for `unresolvedOperator` resolution and resolve it with that
+   * extension. Usually extensions return resolved relation nodes, so we generically update the name
+   * scope without matching for specific relations, for simplicity.
+   *
+   * We match the extension once to reduce the number of
+   * [[ResolverExtension.resolveOperator.isDefinedAt]] calls, because those can be expensive.
+   *
+   * @returns `Some(resolutionResult)` if the extension was found and `unresolvedOperator` was
+   * resolved, `None` otherwise.
+   *
+   * @throws `AMBIGUOUS_RESOLVER_EXTENSION` if there were several matched extensions for this
+   * operator.
+   */
+  def tryDelegateResolutionToExtension(unresolvedOperator: LogicalPlan): Option[LogicalPlan] = {
+    var resolutionResult: Option[LogicalPlan] = None
+    var matchedExtension: Option[ResolverExtension] = None
+    extensions.foreach { extension =>
+      matchedExtension match {
+        case None =>
+          resolutionResult = extension.resolveOperator.lift(unresolvedOperator)
+
+          if (resolutionResult.isDefined) {
+            matchedExtension = Some(extension)
+          }
+        case Some(matchedExtension) =>
+          if (extension.resolveOperator.isDefinedAt(unresolvedOperator)) {
+            throw QueryCompilationErrors
+              .ambiguousResolverExtension(
+                unresolvedOperator,
+                Seq(matchedExtension, extension).map(_.getClass.getSimpleName)
+              )
+              .withPosition(unresolvedOperator.origin)
+          }
+      }
+    }
+
+    resolutionResult
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ExplicitlyUnsupportedResolverFeature.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ExplicitlyUnsupportedResolverFeature.scala
new file mode 100644
index 0000000000000..e6279c9740395
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ExplicitlyUnsupportedResolverFeature.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+/**
+ * This is an addon to [[ResolverGuard]] functionality for features that cannot be determined by
+ * only looking at the unresolved plan. [[Resolver]] will throw this control-flow exception
+ * when it encounters some explicitly unsupported feature. Later behavior depends on the value of
+ * [[HybridAnalyzer.checkSupportedSinglePassFeatures]] flag:
+ *  - If it is true: It will later be caught by [[HybridAnalyzer]] to abort single-pass
+ *    analysis without comparing single-pass and fixed-point results. The motivation for this
+ *    feature is the same as for the [[ResolverGuard]] - we want to have an explicit allowlist of
+ *    unimplemented features that we are aware of, and `UNSUPPORTED_SINGLE_PASS_ANALYZER_FEATURE`
+ *    will signal us the rest of the gaps.
+ *  - If it is false: It will be thrown by the [[HybridAnalyzer]] in order to get better sense
+ *    of coverage.
+ *
+ * For example, [[UnresolvedRelation]] can be intermediately resolved by [[ResolveRelations]] as
+ * [[UnresolvedCatalogRelation]] or a [[View]] (among all others). Say that for now the views
+ * are not implemented, and we are aware of that, so [[ExplicitlyUnsupportedResolverFeature]] will
+ * be thrown in the middle of the single-pass analysis to abort it.
+ */
+class ExplicitlyUnsupportedResolverFeature(reason: String)
+    extends Exception(
+      s"The single-pass analyzer cannot process this query or command because it does not yet " +
+      s"support $reason."
+    ) {
+  override def getStackTrace(): Array[StackTraceElement] = new Array[StackTraceElement](0)
+  override def fillInStackTrace(): Throwable = this
+}
+
+/**
+ * This object contains all the metadata on explicitly unsupported resolver features.
+ */
+object ExplicitlyUnsupportedResolverFeature {
+  val OPERATORS = Set(
+    "org.apache.spark.sql.catalyst.plans.logical.View",
+    "org.apache.spark.sql.catalyst.streaming.StreamingRelationV2",
+    "org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation",
+    "org.apache.spark.sql.execution.streaming.StreamingRelation"
+  )
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ExpressionResolutionValidator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ExpressionResolutionValidator.scala
new file mode 100644
index 0000000000000..8c80992e2fa2c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ExpressionResolutionValidator.scala
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.expressions.{
+  Alias,
+  ArrayDistinct,
+  ArrayInsert,
+  ArrayJoin,
+  ArrayMax,
+  ArrayMin,
+  ArraysZip,
+  AttributeReference,
+  BinaryExpression,
+  ConditionalExpression,
+  CreateArray,
+  CreateMap,
+  CreateNamedStruct,
+  Expression,
+  ExtractANSIIntervalDays,
+  GetArrayStructFields,
+  GetMapValue,
+  GetStructField,
+  Literal,
+  MapConcat,
+  MapContainsKey,
+  MapEntries,
+  MapFromEntries,
+  MapKeys,
+  MapValues,
+  NamedExpression,
+  Predicate,
+  RuntimeReplaceable,
+  StringRPad,
+  StringToMap,
+  TimeZoneAwareExpression,
+  UnaryMinus
+}
+import org.apache.spark.sql.types.BooleanType
+
+/**
+ * The [[ExpressionResolutionValidator]] performs the validation work on the expression tree for the
+ * [[ResolutionValidator]]. These two components work together recursively validating the
+ * logical plan. You can find more info in the [[ResolutionValidator]] scaladoc.
+ */
+class ExpressionResolutionValidator(resolutionValidator: ResolutionValidator) {
+
+  /**
+   * Validate resolved expression tree. The principle is the same as
+   * [[ResolutionValidator.validate]].
+   */
+  def validate(expression: Expression): Unit = {
+    expression match {
+      case attributeReference: AttributeReference =>
+        validateAttributeReference(attributeReference)
+      case alias: Alias =>
+        validateAlias(alias)
+      case getMapValue: GetMapValue =>
+        validateGetMapValue(getMapValue)
+      case binaryExpression: BinaryExpression =>
+        validateBinaryExpression(binaryExpression)
+      case extractANSIIntervalDay: ExtractANSIIntervalDays =>
+        validateExtractANSIIntervalDays(extractANSIIntervalDay)
+      case literal: Literal =>
+        validateLiteral(literal)
+      case predicate: Predicate =>
+        validatePredicate(predicate)
+      case stringRPad: StringRPad =>
+        validateStringRPad(stringRPad)
+      case unaryMinus: UnaryMinus =>
+        validateUnaryMinus(unaryMinus)
+      case getStructField: GetStructField =>
+        validateGetStructField(getStructField)
+      case createNamedStruct: CreateNamedStruct =>
+        validateCreateNamedStruct(createNamedStruct)
+      case getArrayStructFields: GetArrayStructFields =>
+        validateGetArrayStructFields(getArrayStructFields)
+      case createMap: CreateMap =>
+        validateCreateMap(createMap)
+      case stringToMap: StringToMap =>
+        validateStringToMap(stringToMap)
+      case mapContainsKey: MapContainsKey =>
+        validateMapContainsKey(mapContainsKey)
+      case mapConcat: MapConcat =>
+        validateMapConcat(mapConcat)
+      case mapKeys: MapKeys =>
+        validateMapKeys(mapKeys)
+      case mapValues: MapValues =>
+        validateMapValues(mapValues)
+      case mapEntries: MapEntries =>
+        validateMapEntries(mapEntries)
+      case mapFromEntries: MapFromEntries =>
+        validateMapFromEntries(mapFromEntries)
+      case createArray: CreateArray =>
+        validateCreateArray(createArray)
+      case arrayDistinct: ArrayDistinct =>
+        validateArrayDistinct(arrayDistinct)
+      case arrayInsert: ArrayInsert =>
+        validateArrayInsert(arrayInsert)
+      case arrayJoin: ArrayJoin =>
+        validateArrayJoin(arrayJoin)
+      case arrayMax: ArrayMax =>
+        validateArrayMax(arrayMax)
+      case arrayMin: ArrayMin =>
+        validateArrayMin(arrayMin)
+      case arraysZip: ArraysZip =>
+        validateArraysZip(arraysZip)
+      case conditionalExpression: ConditionalExpression =>
+        validateConditionalExpression(conditionalExpression)
+      case runtimeReplaceable: RuntimeReplaceable =>
+        validateRuntimeReplaceable(runtimeReplaceable)
+      case timezoneExpression: TimeZoneAwareExpression =>
+        validateTimezoneExpression(timezoneExpression)
+    }
+  }
+
+  def validateProjectList(projectList: Seq[NamedExpression]): Unit = {
+    projectList.foreach {
+      case attributeReference: AttributeReference =>
+        validateAttributeReference(attributeReference)
+      case alias: Alias =>
+        validateAlias(alias)
+    }
+  }
+
+  private def validatePredicate(predicate: Predicate) = {
+    predicate.children.foreach(validate)
+    assert(
+      predicate.dataType == BooleanType,
+      s"Output type of a predicate must be a boolean, but got: ${predicate.dataType.typeName}"
+    )
+    assert(
+      predicate.checkInputDataTypes().isSuccess,
+      "Input types of a predicate must be valid, but got: " +
+      predicate.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateStringRPad(stringRPad: StringRPad) = {
+    validate(stringRPad.first)
+    validate(stringRPad.second)
+    validate(stringRPad.third)
+    assert(
+      stringRPad.checkInputDataTypes().isSuccess,
+      "Input types of rpad must be valid, but got: " +
+      stringRPad.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateAttributeReference(attributeReference: AttributeReference): Unit = {
+    assert(
+      resolutionValidator.attributeScopeStack.top.contains(attributeReference),
+      s"Attribute $attributeReference is missing from attribute scope: " +
+      s"${resolutionValidator.attributeScopeStack.top}"
+    )
+  }
+
+  private def validateAlias(alias: Alias): Unit = {
+    validate(alias.child)
+  }
+
+  private def validateBinaryExpression(binaryExpression: BinaryExpression): Unit = {
+    validate(binaryExpression.left)
+    validate(binaryExpression.right)
+    assert(
+      binaryExpression.checkInputDataTypes().isSuccess,
+      "Input types of a binary expression must be valid, but got: " +
+      binaryExpression.children.map(_.dataType.typeName).mkString(", ")
+    )
+
+    binaryExpression match {
+      case timezoneExpression: TimeZoneAwareExpression =>
+        assert(timezoneExpression.timeZoneId.nonEmpty, "Timezone expression must have a timezone")
+      case _ =>
+    }
+  }
+
+  private def validateConditionalExpression(conditionalExpression: ConditionalExpression): Unit =
+    conditionalExpression.children.foreach(validate)
+
+  private def validateExtractANSIIntervalDays(
+      extractANSIIntervalDays: ExtractANSIIntervalDays): Unit = {
+    validate(extractANSIIntervalDays.child)
+  }
+
+  private def validateLiteral(literal: Literal): Unit = {}
+
+  private def validateUnaryMinus(unaryMinus: UnaryMinus): Unit = {
+    validate(unaryMinus.child)
+    assert(
+      unaryMinus.checkInputDataTypes().isSuccess,
+      "Input types of a unary minus must be valid, but got: " +
+      unaryMinus.child.dataType.typeName.mkString(", ")
+    )
+  }
+
+  private def validateGetStructField(getStructField: GetStructField): Unit = {
+    validate(getStructField.child)
+  }
+
+  private def validateCreateNamedStruct(createNamedStruct: CreateNamedStruct): Unit = {
+    createNamedStruct.children.foreach(validate)
+    assert(
+      createNamedStruct.checkInputDataTypes().isSuccess,
+      "Input types of CreateNamedStruct must be valid, but got: " +
+      createNamedStruct.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateGetArrayStructFields(getArrayStructFields: GetArrayStructFields): Unit = {
+    validate(getArrayStructFields.child)
+  }
+
+  private def validateGetMapValue(getMapValue: GetMapValue): Unit = {
+    validate(getMapValue.child)
+    validate(getMapValue.key)
+    assert(
+      getMapValue.checkInputDataTypes().isSuccess,
+      "Input types of GetMapValue must be valid, but got: " +
+      getMapValue.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateCreateMap(createMap: CreateMap): Unit = {
+    createMap.children.foreach(validate)
+    assert(
+      createMap.checkInputDataTypes().isSuccess,
+      "Input types of CreateMap must be valid, but got: " +
+      createMap.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateStringToMap(stringToMap: StringToMap): Unit = {
+    validate(stringToMap.text)
+    validate(stringToMap.pairDelim)
+    validate(stringToMap.keyValueDelim)
+  }
+
+  private def validateMapContainsKey(mapContainsKey: MapContainsKey): Unit = {
+    validate(mapContainsKey.left)
+    validate(mapContainsKey.right)
+    assert(
+      mapContainsKey.checkInputDataTypes().isSuccess,
+      "Input types of MapContainsKey must be valid, but got: " +
+      mapContainsKey.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateMapConcat(mapConcat: MapConcat): Unit = {
+    mapConcat.children.foreach(validate)
+    assert(
+      mapConcat.checkInputDataTypes().isSuccess,
+      "Input types of MapConcat must be valid, but got: " +
+      mapConcat.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateMapKeys(mapKeys: MapKeys): Unit = {
+    validate(mapKeys.child)
+  }
+
+  private def validateMapValues(mapValues: MapValues): Unit = {
+    validate(mapValues.child)
+  }
+
+  private def validateMapEntries(mapEntries: MapEntries): Unit = {
+    validate(mapEntries.child)
+  }
+
+  private def validateMapFromEntries(mapFromEntries: MapFromEntries): Unit = {
+    mapFromEntries.children.foreach(validate)
+    assert(
+      mapFromEntries.checkInputDataTypes().isSuccess,
+      "Input types of MapFromEntries must be valid, but got: " +
+      mapFromEntries.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateCreateArray(createArray: CreateArray): Unit = {
+    createArray.children.foreach(validate)
+    assert(
+      createArray.checkInputDataTypes().isSuccess,
+      "Input types of CreateArray must be valid, but got: " +
+      createArray.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateArrayDistinct(arrayDistinct: ArrayDistinct): Unit = {
+    validate(arrayDistinct.child)
+    assert(
+      arrayDistinct.checkInputDataTypes().isSuccess,
+      "Input types of ArrayDistinct must be valid, but got: " +
+      arrayDistinct.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateArrayInsert(arrayInsert: ArrayInsert): Unit = {
+    validate(arrayInsert.srcArrayExpr)
+    validate(arrayInsert.posExpr)
+    validate(arrayInsert.itemExpr)
+    assert(
+      arrayInsert.checkInputDataTypes().isSuccess,
+      "Input types of ArrayInsert must be valid, but got: " +
+      arrayInsert.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateArrayJoin(arrayJoin: ArrayJoin): Unit = {
+    validate(arrayJoin.array)
+    validate(arrayJoin.delimiter)
+    if (arrayJoin.nullReplacement.isDefined) {
+      validate(arrayJoin.nullReplacement.get)
+    }
+  }
+
+  private def validateArrayMax(arrayMax: ArrayMax): Unit = {
+    validate(arrayMax.child)
+    assert(
+      arrayMax.checkInputDataTypes().isSuccess,
+      "Input types of ArrayMax must be valid, but got: " +
+      arrayMax.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateArrayMin(arrayMin: ArrayMin): Unit = {
+    validate(arrayMin.child)
+    assert(
+      arrayMin.checkInputDataTypes().isSuccess,
+      "Input types of ArrayMin must be valid, but got: " +
+      arrayMin.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateArraysZip(arraysZip: ArraysZip): Unit = {
+    arraysZip.children.foreach(validate)
+    arraysZip.names.foreach(validate)
+    assert(
+      arraysZip.checkInputDataTypes().isSuccess,
+      "Input types of ArraysZip must be valid, but got: " +
+      arraysZip.children.map(_.dataType.typeName).mkString(", ")
+    )
+  }
+
+  private def validateRuntimeReplaceable(runtimeReplaceable: RuntimeReplaceable): Unit = {
+    runtimeReplaceable.children.foreach(validate)
+  }
+
+  private def validateTimezoneExpression(timezoneExpression: TimeZoneAwareExpression): Unit = {
+    timezoneExpression.children.foreach(validate)
+    assert(timezoneExpression.timeZoneId.nonEmpty, "Timezone expression must have a timezone")
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ExpressionResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ExpressionResolver.scala
new file mode 100644
index 0000000000000..1d072509626b7
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ExpressionResolver.scala
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.analysis.{
+  withPosition,
+  FunctionResolution,
+  UnresolvedAlias,
+  UnresolvedAttribute,
+  UnresolvedFunction,
+  UnresolvedStar
+}
+import org.apache.spark.sql.catalyst.expressions.{
+  Alias,
+  AttributeReference,
+  BinaryArithmetic,
+  ConditionalExpression,
+  CreateNamedStruct,
+  Expression,
+  ExtractANSIIntervalDays,
+  InheritAnalysisRules,
+  Literal,
+  NamedExpression,
+  Predicate,
+  RuntimeReplaceable,
+  TimeAdd,
+  TimeZoneAwareExpression,
+  UnaryMinus
+}
+import org.apache.spark.sql.catalyst.trees.TreeNodeTag
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.MetadataBuilder
+
+/**
+ * The [[ExpressionResolver]] is used by the [[Resolver]] during the analysis to resolve
+ * expressions.
+ *
+ * The functions here generally traverse unresolved [[Expression]] nodes recursively,
+ * constructing and returning the resolved [[Expression]] nodes bottom-up.
+ * This is the primary entry point for implementing expression analysis,
+ * wherein the [[resolve]] method accepts a fully unresolved [[Expression]] and returns
+ * a fully resolved [[Expression]] in response with all data types and attribute
+ * reference ID assigned for valid requests. This resolver also takes responsibility
+ * to detect any errors in the initial SQL query or DataFrame and return appropriate
+ * error messages including precise parse locations wherever possible.
+ *
+ * @param resolver [[Resolver]] is passed from the parent to resolve other
+ *   operators which are nested in expressions.
+ * @param scopes [[NameScopeStack]] to resolve the expression tree in the correct scope.
+ * @param functionResolution [[FunctionResolution]] to resolve function expressions.
+ * @param planLogger [[PlanLogger]] to log expression tree resolution events.
+ */
+class ExpressionResolver(
+    resolver: Resolver,
+    scopes: NameScopeStack,
+    functionResolution: FunctionResolution,
+    planLogger: PlanLogger)
+    extends TreeNodeResolver[Expression, Expression]
+    with ProducesUnresolvedSubtree
+    with ResolvesExpressionChildren
+    with TracksResolvedNodes[Expression] {
+  private val shouldTrackResolvedNodes =
+    conf.getConf(SQLConf.ANALYZER_SINGLE_PASS_TRACK_RESOLVED_NODES_ENABLED)
+  private val aliasResolver = new AliasResolver(this, scopes)
+  private val createNamedStructResolver = new CreateNamedStructResolver(this)
+  private val timezoneAwareExpressionResolver = new TimezoneAwareExpressionResolver(this)
+  private val conditionalExpressionResolver =
+    new ConditionalExpressionResolver(this, timezoneAwareExpressionResolver)
+  private val predicateResolver =
+    new PredicateResolver(this, timezoneAwareExpressionResolver)
+  private val binaryArithmeticResolver = {
+    new BinaryArithmeticResolver(
+      this,
+      timezoneAwareExpressionResolver
+    )
+  }
+  private val functionResolver = new FunctionResolver(
+    this,
+    timezoneAwareExpressionResolver,
+    functionResolution
+  )
+  private val timeAddResolver = new TimeAddResolver(this, timezoneAwareExpressionResolver)
+  private val unaryMinusResolver = new UnaryMinusResolver(this, timezoneAwareExpressionResolver)
+
+  /**
+   * This method is an expression analysis entry point. The method first checks if the expression
+   * has already been resolved (necessary because of partially-unresolved subtrees, see
+   * [[ProducesUnresolvedSubtree]]). If not already resolved, method takes an unresolved
+   * [[Expression]] and chooses the right `resolve*` method using pattern matching on the
+   * `unresolvedExpression` type. This pattern matching enumerates all the expression node types
+   * that are supported by the single-pass analysis.
+   * When developers introduce a new [[Expression]] type to the Catalyst, they should implement
+   * a corresponding `resolve*` method in the [[ExpressionResolver]] and add it to this pattern
+   * match list.
+   *
+   * [[resolve]] will be called recursively during the expression tree traversal eventually
+   * producing a fully resolved expression subtree or a descriptive error message.
+   *
+   * [[resolve]] can recursively call `resolver` to resolve nested operators (e.g. scalar
+   * subqueries):
+   *
+   * {{{ SELECT * FROM VALUES (1), (2) WHERE col1 IN (SELECT 1); }}}
+   *
+   * In this case `IN` is an expression and `SELECT 1` is a nested operator tree for which
+   * the [[ExpressionResolver]] would invoke the [[Resolver]].
+   */
+  override def resolve(unresolvedExpression: Expression): Expression = {
+    planLogger.logExpressionTreeResolutionEvent(unresolvedExpression, "Unresolved expression tree")
+
+    if (unresolvedExpression
+        .getTagValue(ExpressionResolver.SINGLE_PASS_SUBTREE_BOUNDARY)
+        .nonEmpty) {
+      unresolvedExpression
+    } else {
+      throwIfNodeWasResolvedEarlier(unresolvedExpression)
+
+      val resolvedExpression = unresolvedExpression match {
+        case unresolvedBinaryArithmetic: BinaryArithmetic =>
+          binaryArithmeticResolver.resolve(unresolvedBinaryArithmetic)
+        case unresolvedExtractANSIIntervalDays: ExtractANSIIntervalDays =>
+          resolveExtractANSIIntervalDays(unresolvedExtractANSIIntervalDays)
+        case unresolvedNamedExpression: NamedExpression =>
+          resolveNamedExpression(unresolvedNamedExpression)
+        case unresolvedFunction: UnresolvedFunction =>
+          functionResolver.resolve(unresolvedFunction)
+        case unresolvedLiteral: Literal =>
+          resolveLiteral(unresolvedLiteral)
+        case unresolvedPredicate: Predicate =>
+          predicateResolver.resolve(unresolvedPredicate)
+        case unresolvedTimeAdd: TimeAdd =>
+          timeAddResolver.resolve(unresolvedTimeAdd)
+        case unresolvedUnaryMinus: UnaryMinus =>
+          unaryMinusResolver.resolve(unresolvedUnaryMinus)
+        case createNamedStruct: CreateNamedStruct =>
+          createNamedStructResolver.resolve(createNamedStruct)
+        case unresolvedConditionalExpression: ConditionalExpression =>
+          conditionalExpressionResolver.resolve(unresolvedConditionalExpression)
+        case unresolvedRuntimeReplaceable: RuntimeReplaceable =>
+          resolveRuntimeReplaceable(unresolvedRuntimeReplaceable)
+        case unresolvedTimezoneExpression: TimeZoneAwareExpression =>
+          timezoneAwareExpressionResolver.resolve(unresolvedTimezoneExpression)
+        case _ =>
+          withPosition(unresolvedExpression) {
+            throwUnsupportedSinglePassAnalyzerFeature(unresolvedExpression)
+          }
+      }
+
+      markNodeAsResolved(resolvedExpression)
+
+      planLogger.logExpressionTreeResolution(unresolvedExpression, resolvedExpression)
+
+      resolvedExpression
+    }
+  }
+
+  private def resolveNamedExpression(
+      unresolvedNamedExpression: Expression,
+      isTopOfProjectList: Boolean = false): Expression =
+    unresolvedNamedExpression match {
+      case alias: Alias =>
+        aliasResolver.handleResolvedAlias(alias)
+      case unresolvedAlias: UnresolvedAlias =>
+        aliasResolver.resolve(unresolvedAlias)
+      case unresolvedAttribute: UnresolvedAttribute =>
+        resolveAttribute(unresolvedAttribute, isTopOfProjectList)
+      case unresolvedStar: UnresolvedStar =>
+        withPosition(unresolvedStar) {
+          throwInvalidStarUsageError(unresolvedStar)
+        }
+      case attributeReference: AttributeReference =>
+        handleResolvedAttributeReference(attributeReference)
+      case _ =>
+        withPosition(unresolvedNamedExpression) {
+          throwUnsupportedSinglePassAnalyzerFeature(unresolvedNamedExpression)
+        }
+    }
+
+  /**
+   * The [[Project]] list can contain different unresolved expressions before the resolution, which
+   * will be resolved using generic [[resolve]]. However, [[UnresolvedStar]] is a special case,
+   * because it is expanded into a sequence of [[NamedExpression]]s. Because of that this method
+   * returns a sequence and doesn't conform to generic [[resolve]] interface - it's called directly
+   * from the [[Resolver]] during [[Project]] resolution.
+   *
+   * The output sequence can be larger than the input sequence due to [[UnresolvedStar]] expansion.
+   */
+  def resolveProjectList(unresolvedProjectList: Seq[NamedExpression]): Seq[NamedExpression] = {
+    unresolvedProjectList.flatMap {
+      case unresolvedStar: UnresolvedStar =>
+        resolveStar(unresolvedStar)
+      case other =>
+        Seq(resolveNamedExpression(other, isTopOfProjectList = true).asInstanceOf[NamedExpression])
+    }
+  }
+
+  /**
+   * [[UnresolvedAttribute]] resolution relies on [[NameScope]] to lookup the attribute by its
+   * multipart name. The resolution can result in three different outcomes which are handled in the
+   * [[NameTarget.pickCandidate]]:
+   *
+   * - No results from the [[NameScope]] mean that the attribute lookup failed as in:
+   *   {{{ SELECT col1 FROM (SELECT 1 as col2); }}}
+   *
+   * - Several results from the [[NameScope]] mean that the reference is ambiguous as in:
+   *   {{{ SELECT col1 FROM (SELECT 1 as col1), (SELECT 2 as col1); }}}
+   *
+   * - Single result from the [[NameScope]] means that the attribute was found as in:
+   *   {{{ SELECT col1 FROM VALUES (1); }}}
+   *
+   * If the attribute is at the top of the project list (which is indicated by
+   * [[isTopOfProjectList]]), we preserve the [[Alias]] or remove it otherwise.
+   */
+  private def resolveAttribute(
+      unresolvedAttribute: UnresolvedAttribute,
+      isTopOfProjectList: Boolean): Expression =
+    withPosition(unresolvedAttribute) {
+      if (scopes.top.isExistingAlias(unresolvedAttribute.nameParts.head)) {
+        // Temporarily disable referencing aliases until we support LCA resolution.
+        throw new ExplicitlyUnsupportedResolverFeature("unsupported expression: LateralColumnAlias")
+      }
+
+      val nameTarget: NameTarget = scopes.top.matchMultipartName(unresolvedAttribute.nameParts)
+
+      val candidate = nameTarget.pickCandidate(unresolvedAttribute)
+      if (isTopOfProjectList && nameTarget.aliasName.isDefined) {
+        Alias(candidate, nameTarget.aliasName.get)()
+      } else {
+        candidate
+      }
+    }
+
+  /**
+   * [[AttributeReference]] is already resolved if it's passed to us from DataFrame `col(...)`
+   * function, for example.
+   */
+  private def handleResolvedAttributeReference(attributeReference: AttributeReference) =
+    tryStripAmbiguousSelfJoinMetadata(attributeReference)
+
+  /**
+   * [[ExtractANSIIntervalDays]] resolution doesn't require any specific resolution logic apart
+   * from resolving its children.
+   */
+  private def resolveExtractANSIIntervalDays(
+      unresolvedExtractANSIIntervalDays: ExtractANSIIntervalDays) =
+    withResolvedChildren(unresolvedExtractANSIIntervalDays, resolve)
+
+  /**
+   * [[UnresolvedStar]] resolution relies on the [[NameScope]]'s ability to get the attributes by a
+   * multipart name ([[UnresolvedStar]]'s `target` field):
+   *
+   * - Star target is defined:
+   *
+   * {{{
+   * SELECT t.* FROM VALUES (1) AS t;
+   * ->
+   * Project [col1#19]
+   * }}}
+   *
+   *
+   * - Star target is not defined:
+   *
+   * {{{
+   * SELECT * FROM (SELECT 1 as col1), (SELECT 2 as col2);
+   * ->
+   * Project [col1#19, col2#20]
+   * }}}
+   */
+  def resolveStar(unresolvedStar: UnresolvedStar): Seq[NamedExpression] =
+    withPosition(unresolvedStar) {
+      scopes.top.expandStar(unresolvedStar)
+    }
+
+  /**
+   * [[Literal]] resolution doesn't require any specific resolution logic at this point.
+   *
+   * Since [[TracksResolvedNodes]] requires all the expressions in the tree to be unique objects,
+   * we reallocate the literal in [[ANALYZER_SINGLE_PASS_TRACK_RESOLVED_NODES_ENABLED]] mode,
+   * otherwise we preserve the old object to avoid unnecessary memory allocations.
+   */
+  private def resolveLiteral(literal: Literal): Expression = {
+    if (shouldTrackResolvedNodes) {
+      literal.copy()
+    } else {
+      literal
+    }
+  }
+
+  /**
+   * When [[RuntimeReplaceable]] is mixed in with [[InheritAnalysisRules]], child expression will
+   * be runtime replacement. In that case we need to resolve the children of the expression.
+   * otherwise, no resolution is necessary because replacement is already resolved.
+   */
+  private def resolveRuntimeReplaceable(unresolvedRuntimeReplaceable: RuntimeReplaceable) =
+    unresolvedRuntimeReplaceable match {
+      case inheritAnalysisRules: InheritAnalysisRules =>
+        withResolvedChildren(inheritAnalysisRules, resolve)
+      case other => other
+    }
+
+  /**
+   * [[DetectAmbiguousSelfJoin]] rule in the fixed-point Analyzer detects ambiguous references in
+   * self-joins based on special metadata added by [[Dataset]] code (see SPARK-27547). Just strip
+   * this for now since we don't support joins yet.
+   */
+  private def tryStripAmbiguousSelfJoinMetadata(attributeReference: AttributeReference) = {
+    val metadata = attributeReference.metadata
+    if (ExpressionResolver.AMBIGUOUS_SELF_JOIN_METADATA.exists(metadata.contains(_))) {
+      val metadataBuilder = new MetadataBuilder().withMetadata(metadata)
+      for (metadataKey <- ExpressionResolver.AMBIGUOUS_SELF_JOIN_METADATA) {
+        metadataBuilder.remove(metadataKey)
+      }
+      attributeReference.withMetadata(metadataBuilder.build())
+    } else {
+      attributeReference
+    }
+  }
+
+  private def throwUnsupportedSinglePassAnalyzerFeature(unresolvedExpression: Expression): Nothing =
+    throw QueryCompilationErrors.unsupportedSinglePassAnalyzerFeature(
+      s"${unresolvedExpression.getClass} expression resolution"
+    )
+
+  private def throwInvalidStarUsageError(unresolvedStar: UnresolvedStar): Nothing =
+    // TODO(vladimirg-db): Use parent operator name instead of "query"
+    throw QueryCompilationErrors.invalidStarUsageError("query", Seq(unresolvedStar))
+}
+
+object ExpressionResolver {
+  private val AMBIGUOUS_SELF_JOIN_METADATA = Seq("__dataset_id", "__col_position")
+  val SINGLE_PASS_SUBTREE_BOUNDARY = TreeNodeTag[Unit]("single_pass_subtree_boundary")
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolver.scala
new file mode 100644
index 0000000000000..b7311b83e872e
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/FunctionResolver.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.analysis.{
+  AnsiTypeCoercion,
+  CollationTypeCoercion,
+  FunctionResolution,
+  TypeCoercion,
+  UnresolvedFunction,
+  UnresolvedStar
+}
+import org.apache.spark.sql.catalyst.expressions.Expression
+
+/**
+ * A resolver for [[UnresolvedFunction]]s that resolves functions to concrete [[Expression]]s.
+ * It resolves the children of the function first by calling [[ExpressionResolver.resolve]] on them
+ * if they are not [[UnresolvedStar]]s. If the children are [[UnresolvedStar]]s, it resolves them
+ * using [[ExpressionResolver.resolveStar]]. Examples are following:
+ *
+ *  - Function doesn't contain any [[UnresolvedStar]]:
+ *  {{{ SELECT ARRAY(col1) FROM VALUES (1); }}}
+ *  it is resolved only using [[ExpressionResolver.resolve]].
+ *  - Function contains [[UnresolvedStar]]:
+ *  {{{ SELECT ARRAY(*) FROM VALUES (1); }}}
+ *  it is resolved using [[ExpressionResolver.resolveStar]].
+ *
+ * It applies appropriate [[TypeCoercion]] (or [[AnsiTypeCoercion]]) rules after resolving the
+ * function using the [[FunctionResolution]] code.
+ */
+class FunctionResolver(
+    expressionResolver: ExpressionResolver,
+    timezoneAwareExpressionResolver: TimezoneAwareExpressionResolver,
+    functionResolution: FunctionResolution)
+  extends TreeNodeResolver[UnresolvedFunction, Expression]
+  with ProducesUnresolvedSubtree {
+
+  private val typeCoercionRules: Seq[Expression => Expression] =
+    if (conf.ansiEnabled) {
+      FunctionResolver.ANSI_TYPE_COERCION_RULES
+    } else {
+      FunctionResolver.TYPE_COERCION_RULES
+    }
+  private val typeCoercionResolver: TypeCoercionResolver =
+    new TypeCoercionResolver(timezoneAwareExpressionResolver, typeCoercionRules)
+
+  override def resolve(unresolvedFunction: UnresolvedFunction): Expression = {
+    val functionWithResolvedChildren =
+      unresolvedFunction.copy(arguments = unresolvedFunction.arguments.flatMap {
+        case s: UnresolvedStar => expressionResolver.resolveStar(s)
+        case other => Seq(expressionResolver.resolve(other))
+      })
+    val resolvedFunction = functionResolution.resolveFunction(functionWithResolvedChildren)
+    typeCoercionResolver.resolve(resolvedFunction)
+  }
+}
+
+object FunctionResolver {
+  // Ordering in the list of type coercions should be in sync with the list in [[TypeCoercion]].
+  private val TYPE_COERCION_RULES: Seq[Expression => Expression] = Seq(
+    CollationTypeCoercion.apply,
+    TypeCoercion.InTypeCoercion.apply,
+    TypeCoercion.FunctionArgumentTypeCoercion.apply,
+    TypeCoercion.IfTypeCoercion.apply,
+    TypeCoercion.ImplicitTypeCoercion.apply
+  )
+
+  // Ordering in the list of type coercions should be in sync with the list in [[AnsiTypeCoercion]].
+  private val ANSI_TYPE_COERCION_RULES: Seq[Expression => Expression] = Seq(
+    CollationTypeCoercion.apply,
+    AnsiTypeCoercion.InTypeCoercion.apply,
+    AnsiTypeCoercion.FunctionArgumentTypeCoercion.apply,
+    AnsiTypeCoercion.IfTypeCoercion.apply,
+    AnsiTypeCoercion.ImplicitTypeCoercion.apply
+  )
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/HybridAnalyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/HybridAnalyzer.scala
new file mode 100644
index 0000000000000..039c07f5edbc2
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/HybridAnalyzer.scala
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import scala.util.control.NonFatal
+
+import org.apache.spark.sql.catalyst.{QueryPlanningTracker, SQLConfHelper}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, Analyzer}
+import org.apache.spark.sql.catalyst.plans.NormalizePlan
+import org.apache.spark.sql.catalyst.plans.logical.{AnalysisHelper, LogicalPlan}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * The HybridAnalyzer routes the unresolved logical plan between the legacy Analyzer and
+ * a single-pass Analyzer when the query that we are processing is being run from unit tests
+ * depending on the testing flags set and the structure of this unresolved logical plan:
+ *   - If the "spark.sql.analyzer.singlePassResolver.soloRunEnabled" is "true", the
+ *      [[HybridAnalyzer]] will unconditionally run the single-pass Analyzer, which would
+ *      usually result in some unexpected behavior and failures. This flag is used only for
+ *      development.
+ *   - If the "spark.sql.analyzer.singlePassResolver.dualRunEnabled" is "true", the
+ *      [[HybridAnalyzer]] will invoke the legacy analyzer and optionally _also_ the fixed-point
+ *      one depending on the structure of the unresolved plan. This decision is based on which
+ *      features are supported by the single-pass Analyzer, and the checking is implemented in
+ *      the [[ResolverGuard]]. After that we validate the results using the following
+ *      logic:
+ *        - If the fixed-point Analyzer fails and the single-pass one succeeds, we throw an
+ *          appropriate exception (please check the
+ *          [[QueryCompilationErrors.fixedPointFailedSinglePassSucceeded]] method)
+ *        - If both the fixed-point and the single-pass Analyzers failed, we throw the exception
+ *          from the fixed-point Analyzer.
+ *        - If the single-pass Analyzer failed, we throw an exception from its failure.
+ *        - If both the fixed-point and the single-pass Analyzers succeeded, we compare the logical
+ *          plans and output schemas, and return the resolved plan from the fixed-point Analyzer.
+ *   - Otherwise we run the legacy analyzer.
+ * */
+class HybridAnalyzer(
+    legacyAnalyzer: Analyzer,
+    resolverGuard: ResolverGuard,
+    resolver: Resolver,
+    checkSupportedSinglePassFeatures: Boolean = true)
+    extends SQLConfHelper {
+  private var singlePassResolutionDuration: Option[Long] = None
+  private var fixedPointResolutionDuration: Option[Long] = None
+
+  def apply(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
+    val dualRun =
+      conf.getConf(SQLConf.ANALYZER_DUAL_RUN_LEGACY_AND_SINGLE_PASS_RESOLVER) &&
+      checkResolverGuard(plan)
+
+    withTrackedAnalyzerBridgeState(dualRun) {
+      if (dualRun) {
+        resolveInDualRun(plan, tracker)
+      } else if (conf.getConf(SQLConf.ANALYZER_SINGLE_PASS_RESOLVER_ENABLED)) {
+        resolveInSinglePass(plan)
+      } else {
+        resolveInFixedPoint(plan, tracker)
+      }
+    }
+  }
+
+  def getSinglePassResolutionDuration: Option[Long] = singlePassResolutionDuration
+
+  def getFixedPointResolutionDuration: Option[Long] = fixedPointResolutionDuration
+
+  /**
+   * Call `body` in the context of tracked [[AnalyzerBridgeState]]. Set the new bridge state
+   * depending on whether we are in dual-run mode or not:
+   * - If [[dualRun]] and [[ANALYZER_SINGLE_PASS_RESOLVER_RELATION_BRIDGING_ENABLED]] are true,
+   *   create and set a new [[AnalyzerBridgeState]].
+   * - Otherwise, reset [[AnalyzerBridgeState]].
+   *
+   * Finally, set the bridge state back to the previous one after the `body` is executed to avoid
+   * disrupting the possible upper-level [[Analyzer]] invocation in case it's recursive
+   * [[Analyzer]] call.
+   * */
+  private def withTrackedAnalyzerBridgeState(dualRun: Boolean)(
+      body: => LogicalPlan): LogicalPlan = {
+    val bridgeRelations = dualRun && conf.getConf(
+        SQLConf.ANALYZER_SINGLE_PASS_RESOLVER_RELATION_BRIDGING_ENABLED
+      )
+
+    val prevSinglePassResolverBridgeState = AnalysisContext.get.getSinglePassResolverBridgeState
+
+    AnalysisContext.get.setSinglePassResolverBridgeState(if (bridgeRelations) {
+      Some(new AnalyzerBridgeState)
+    } else {
+      None
+    })
+
+    try {
+      body
+    } finally {
+      AnalysisContext.get.setSinglePassResolverBridgeState(prevSinglePassResolverBridgeState)
+    }
+  }
+
+  /**
+   * This method is used to run both the legacy Analyzer and single-pass Analyzer,
+   * and then compare the results or check the errors. For more context please check the
+   * [[HybridAnalyzer]] scaladoc.
+   * */
+  private def resolveInDualRun(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
+    var fixedPointException: Option[Throwable] = None
+    val fixedPointResult = try {
+      val (resolutionDuration, result) = recordDuration {
+        Some(resolveInFixedPoint(plan, tracker))
+      }
+      fixedPointResolutionDuration = Some(resolutionDuration)
+      result
+    } catch {
+      case NonFatal(e) =>
+        fixedPointException = Some(e)
+        None
+    }
+
+    var singlePassException: Option[Throwable] = None
+    val singlePassResult = try {
+      val (resolutionDuration, result) = recordDuration {
+        Some(resolveInSinglePass(plan))
+      }
+      singlePassResolutionDuration = Some(resolutionDuration)
+      result
+    } catch {
+      case NonFatal(e) =>
+        singlePassException = Some(e)
+        None
+    }
+
+    fixedPointException match {
+      case Some(fixedPointEx) =>
+        singlePassException match {
+          case Some(_) =>
+            throw fixedPointEx
+          case None =>
+            throw QueryCompilationErrors.fixedPointFailedSinglePassSucceeded(
+              singlePassResult.get,
+              fixedPointEx
+            )
+        }
+      case None =>
+        singlePassException match {
+          case Some(singlePassEx: ExplicitlyUnsupportedResolverFeature)
+              if checkSupportedSinglePassFeatures =>
+            fixedPointResult.get
+          case Some(singlePassEx) =>
+            throw singlePassEx
+          case None =>
+            validateLogicalPlans(fixedPointResult.get, singlePassResult.get)
+            fixedPointResult.get
+        }
+    }
+  }
+
+  /**
+   * This method is used to run the single-pass Analyzer which will return the resolved plan
+   * or throw an exception if the resolution fails. Both cases are handled in the caller method.
+   * */
+  private def resolveInSinglePass(plan: LogicalPlan): LogicalPlan = {
+    val resolvedPlan = resolver.lookupMetadataAndResolve(
+      plan,
+      analyzerBridgeState = AnalysisContext.get.getSinglePassResolverBridgeState
+    )
+    if (conf.getConf(SQLConf.ANALYZER_SINGLE_PASS_RESOLVER_VALIDATION_ENABLED)) {
+      val validator = new ResolutionValidator
+      validator.validatePlan(resolvedPlan)
+    }
+    resolvedPlan
+  }
+
+  /**
+   * This method is used to run the legacy Analyzer which will return the resolved plan
+   * or throw an exception if the resolution fails. Both cases are handled in the caller method.
+   * */
+  private def resolveInFixedPoint(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
+    val resolvedPlan = legacyAnalyzer.executeAndTrack(plan, tracker)
+    QueryPlanningTracker.withTracker(tracker) {
+      legacyAnalyzer.checkAnalysis(resolvedPlan)
+    }
+    resolvedPlan
+  }
+
+  private def validateLogicalPlans(fixedPointResult: LogicalPlan, singlePassResult: LogicalPlan) = {
+    if (fixedPointResult.schema != singlePassResult.schema) {
+      throw QueryCompilationErrors.hybridAnalyzerOutputSchemaComparisonMismatch(
+        fixedPointResult.schema,
+        singlePassResult.schema
+      )
+    }
+    if (normalizePlan(fixedPointResult) != normalizePlan(singlePassResult)) {
+      throw QueryCompilationErrors.hybridAnalyzerLogicalPlanComparisonMismatch(
+        fixedPointResult,
+        singlePassResult
+      )
+    }
+  }
+
+  private def normalizePlan(plan: LogicalPlan) = AnalysisHelper.allowInvokingTransformsInAnalyzer {
+    NormalizePlan(plan)
+  }
+
+  private def checkResolverGuard(plan: LogicalPlan): Boolean =
+    !checkSupportedSinglePassFeatures || resolverGuard.apply(plan)
+
+  private def recordDuration[T](thunk: => T): (Long, T) = {
+    val start = System.nanoTime()
+    val res = thunk
+    (System.nanoTime() - start, res)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/IdentifierMap.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/IdentifierMap.scala
new file mode 100644
index 0000000000000..899eb7d71e813
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/IdentifierMap.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import java.util.Locale
+
+/**
+ * The [[IdentifierMap]] is an implementation of a [[KeyTransformingMap]] that uses SQL/DataFrame
+ * identifiers as keys. The implementation is case-insensitive for keys.
+ */
+private class IdentifierMap[V] extends KeyTransformingMap[String, V] {
+  override def mapKey(key: String): String = key.toLowerCase(Locale.ROOT)
+}
+
+/**
+ * The [[OptionalIdentifierMap]] is an implementation of a [[KeyTransformingMap]] that uses optional
+ * SQL/DataFrame identifiers as keys. The implementation is case-insensitive for non-empty keys.
+ */
+private class OptionalIdentifierMap[V] extends KeyTransformingMap[Option[String], V] {
+  override def mapKey(key: Option[String]): Option[String] =
+    key.map(_.toLowerCase(Locale.ROOT))
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/KeyTransformingMap.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/KeyTransformingMap.scala
new file mode 100644
index 0000000000000..ff6e118fcc3c9
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/KeyTransformingMap.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import scala.collection.mutable
+
+/**
+ * The [[KeyTransformingMap]] is a partial implementation of [[mutable.Map]] that transforms input
+ * keys with a custom [[mapKey]] method.
+ */
+private abstract class KeyTransformingMap[K, V] {
+  private val impl = new mutable.HashMap[K, V]
+
+  def get(key: K): Option[V] = impl.get(mapKey(key))
+
+  def contains(key: K): Boolean = impl.contains(mapKey(key))
+
+  def iterator: Iterator[(K, V)] = impl.iterator
+
+  def +=(kv: (K, V)): this.type = {
+    impl += (mapKey(kv._1) -> kv._2)
+    this
+  }
+
+  def -=(key: K): this.type = {
+    impl -= mapKey(key)
+    this
+  }
+
+  def mapKey(key: K): K
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/LimitExpressionResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/LimitExpressionResolver.scala
new file mode 100644
index 0000000000000..a25616ba50b6a
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/LimitExpressionResolver.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.errors.QueryErrorsBase
+import org.apache.spark.sql.types.IntegerType
+
+/**
+ * The [[LimitExpressionResolver]] is a resolver that resolves a [[LocalLimit]] or [[GlobalLimit]]
+ * expression and performs all the necessary validation.
+ */
+class LimitExpressionResolver(expressionResolver: TreeNodeResolver[Expression, Expression])
+    extends TreeNodeResolver[Expression, Expression]
+    with QueryErrorsBase {
+
+  /**
+   * Resolve a limit expression of [[GlobalLimit]] or [[LocalLimit]] and perform validation.
+   */
+  override def resolve(unresolvedLimitExpression: Expression): Expression = {
+    val resolvedLimitExpression = expressionResolver.resolve(unresolvedLimitExpression)
+    validateLimitExpression(resolvedLimitExpression, expressionName = "limit")
+    resolvedLimitExpression
+  }
+
+  /**
+   * Validate a resolved limit expression of [[GlobalLimit]] or [[LocalLimit]]:
+   *  - The expression has to be foldable
+   *  - The result data type has to be [[IntegerType]]
+   *  - The evaluated expression has to be non-null
+   *  - The evaluated expression has to be positive
+   *
+   * The `foldable` check is implemented in some expressions
+   * as a recursive expression tree traversal.
+   * It is not an ideal approach for the single-pass [[ExpressionResolver]],
+   * but __is__ practical, since:
+   *  - We have to call `eval` here anyway, and it's recursive
+   *  - In practice `LIMIT` expression trees are very small
+   */
+  private def validateLimitExpression(expression: Expression, expressionName: String): Unit = {
+    if (!expression.foldable) {
+      throwInvalidLimitLikeExpressionIsUnfoldable(expressionName, expression)
+    }
+    if (expression.dataType != IntegerType) {
+      throwInvalidLimitLikeExpressionDataType(expressionName, expression)
+    }
+    expression.eval() match {
+      case null =>
+        throwInvalidLimitLikeExpressionIsNull(expressionName, expression)
+      case value: Int if value < 0 =>
+        throwInvalidLimitLikeExpressionIsNegative(expressionName, expression, value)
+      case _ =>
+    }
+  }
+
+  private def throwInvalidLimitLikeExpressionIsUnfoldable(
+      name: String,
+      expression: Expression): Nothing =
+    throw new AnalysisException(
+      errorClass = "INVALID_LIMIT_LIKE_EXPRESSION.IS_UNFOLDABLE",
+      messageParameters = Map(
+        "name" -> name,
+        "expr" -> toSQLExpr(expression)
+      ),
+      origin = expression.origin
+    )
+
+  private def throwInvalidLimitLikeExpressionDataType(
+      name: String,
+      expression: Expression): Nothing =
+    throw new AnalysisException(
+      errorClass = "INVALID_LIMIT_LIKE_EXPRESSION.DATA_TYPE",
+      messageParameters = Map(
+        "name" -> name,
+        "expr" -> toSQLExpr(expression),
+        "dataType" -> toSQLType(expression.dataType)
+      ),
+      origin = expression.origin
+    )
+
+  private def throwInvalidLimitLikeExpressionIsNull(name: String, expression: Expression): Nothing =
+    throw new AnalysisException(
+      errorClass = "INVALID_LIMIT_LIKE_EXPRESSION.IS_NULL",
+      messageParameters = Map("name" -> name, "expr" -> toSQLExpr(expression)),
+      origin = expression.origin
+    )
+
+  private def throwInvalidLimitLikeExpressionIsNegative(
+      name: String,
+      expression: Expression,
+      value: Int): Nothing =
+    throw new AnalysisException(
+      errorClass = "INVALID_LIMIT_LIKE_EXPRESSION.IS_NEGATIVE",
+      messageParameters =
+        Map("name" -> name, "expr" -> toSQLExpr(expression), "v" -> toSQLValue(value, IntegerType)),
+      origin = expression.origin
+    )
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/MetadataResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/MetadataResolver.scala
new file mode 100644
index 0000000000000..e1334fc56575e
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/MetadataResolver.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import java.util.ArrayDeque
+
+import org.apache.spark.sql.catalyst.analysis.{withPosition, RelationResolution, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.expressions.{Expression, PlanExpression}
+import org.apache.spark.sql.catalyst.plans.logical.{AnalysisHelper, LogicalPlan}
+import org.apache.spark.sql.connector.catalog.CatalogManager
+
+/**
+ * The [[MetadataResolver]] performs relation metadata resolution based on the unresolved plan
+ * at the start of the analysis phase. Usually it does RPC calls to some table catalog and to table
+ * metadata itself.
+ *
+ * [[RelationsWithResolvedMetadata]] is a map from relation ID to the relations with resolved
+ * metadata. It's produced by [[resolve]] and is used later in [[Resolver]] to replace
+ * [[UnresolvedRelation]]s.
+ *
+ * This object is one-shot per SQL query or DataFrame program resolution.
+ */
+class MetadataResolver(
+    override val catalogManager: CatalogManager,
+    override val relationResolution: RelationResolution,
+    override val extensions: Seq[ResolverExtension] = Seq.empty)
+    extends RelationMetadataProvider
+    with DelegatesResolutionToExtensions {
+  override val relationsWithResolvedMetadata = new RelationsWithResolvedMetadata
+
+  /**
+   * Resolves the relation metadata for `unresolvedPlan`. Usually this involves several blocking
+   * calls for the [[UnresolvedRelation]]s present in that tree. During the `unresolvedPlan`
+   * traversal we fill [[relationsWithResolvedMetadata]] with resolved metadata by relation id.
+   * This map will be used to resolve the plan in single-pass by the [[Resolver]] using
+   * [[getRelationWithResolvedMetadata]]. If the generic metadata resolution using
+   * [[RelationResolution]] wasn't successful, we resort to using [[extensions]].
+   * Otherwise, we fail with an exception.
+   */
+  def resolve(unresolvedPlan: LogicalPlan): Unit = {
+    traverseLogicalPlanTree(unresolvedPlan) { unresolvedOperator =>
+      unresolvedOperator match {
+        case unresolvedRelation: UnresolvedRelation =>
+          val relationId = relationIdFromUnresolvedRelation(unresolvedRelation)
+
+          if (!relationsWithResolvedMetadata.containsKey(relationId)) {
+            val relationWithResolvedMetadata = resolveRelation(unresolvedRelation).orElse {
+              // In case the generic metadata resolution returned `None`, we try to check if any
+              // of the [[extensions]] matches this `unresolvedRelation`, and resolve it using
+              // that extension.
+              tryDelegateResolutionToExtension(unresolvedRelation)
+            }
+
+            relationWithResolvedMetadata match {
+              case Some(relationWithResolvedMetadata) =>
+                relationsWithResolvedMetadata.put(
+                  relationId,
+                  relationWithResolvedMetadata
+                )
+              case None =>
+                withPosition(unresolvedRelation) {
+                  unresolvedRelation.tableNotFound(unresolvedRelation.multipartIdentifier)
+                }
+            }
+          }
+        case _ =>
+      }
+    }
+  }
+
+  /**
+   * Resolves the metadata for the given unresolved relation and returns a relation with the
+   * resolved metadata. This method is blocking.
+   */
+  private def resolveRelation(unresolvedRelation: UnresolvedRelation): Option[LogicalPlan] =
+    AnalysisHelper.allowInvokingTransformsInAnalyzer {
+      relationResolution.resolveRelation(
+        u = unresolvedRelation
+      )
+    }
+
+  /**
+   * Traverse the logical plan tree from `root` in a pre-order DFS manner and apply `visitor` to
+   * each node.
+   */
+  private def traverseLogicalPlanTree(root: LogicalPlan)(visitor: LogicalPlan => Unit) = {
+    val stack = new ArrayDeque[Either[LogicalPlan, Expression]]
+    stack.push(Left(root))
+
+    while (!stack.isEmpty) {
+      stack.pop() match {
+        case Left(logicalPlan) =>
+          visitor(logicalPlan)
+
+          for (child <- logicalPlan.children) {
+            stack.push(Left(child))
+          }
+          for (expression <- logicalPlan.expressions) {
+            stack.push(Right(expression))
+          }
+        case Right(expression) =>
+          for (child <- expression.children) {
+            stack.push(Right(child))
+          }
+
+          expression match {
+            case planExpression: PlanExpression[_] =>
+              planExpression.plan match {
+                case plan: LogicalPlan =>
+                  stack.push(Left(plan))
+                case _ =>
+              }
+            case _ =>
+          }
+      }
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/NameScope.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/NameScope.scala
new file mode 100644
index 0000000000000..8abf4e04b8836
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/NameScope.scala
@@ -0,0 +1,393 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import java.util.{ArrayDeque, ArrayList, HashSet}
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.analysis.{Resolver => NameComparator, UnresolvedStar}
+import org.apache.spark.sql.catalyst.expressions.{
+  Alias,
+  Attribute,
+  AttributeSeq,
+  Expression,
+  NamedExpression
+}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+/**
+ * The [[NameScope]] is used during the analysis to control the visibility of names: plan names
+ * and output attributes. New [[NameScope]] can be created both in the [[Resolver]] and in
+ * the [[ExpressionResolver]] using the [[NameScopeStack]] api. The name resolution for identifiers
+ * is case-insensitive.
+ *
+ * In this example:
+ *
+ * {{{
+ * WITH table_1_cte AS (
+ *   SELECT
+ *     col1,
+ *     col2,
+ *     col2
+ *   FROM
+ *     table_1
+ * )
+ * SELECT
+ *   table_1_cte.col1,
+ *   table_2.col1
+ * FROM
+ *   table_1_cte
+ * INNER JOIN
+ *   table_2
+ * ON
+ *   table_1_cte.col2 = table_2.col3
+ * ;
+ * }}}
+ *
+ * there are two named subplans in the scope: table_1_cte -> [col1, col2, col2] and
+ * table_2 -> [col1, col3].
+ *
+ * State breakout:
+ * - `planOutputs`: list of named plan outputs. Order matters here (e.g. to correctly expand `*`).
+ *   Can contain duplicate names, since it's possible to select same column twice, or to select
+ *   columns with the same name from different relations. [[OptionalIdentifierMap]] is used here,
+ *   since some plans don't have an explicit name, so output attributes from those plans will reside
+ *   under the `None` key.
+ *   In our example it will be {{{ [(table_1_cte, [col1, col2, col2]), (table_2, [col1, col3])] }}}
+ *
+ * - `planNameToOffset`: mapping from plan output names to their offsets in the `planOutputs` array.
+ *   It's used to lookup attributes by plan output names (multipart names are not supported yet).
+ *   In our example it will be {{{ [table_1_cte -> 0, table_2 -> 1] }}}
+ */
+class NameScope extends SQLConfHelper {
+  private val planOutputs = new ArrayList[PlanOutput]()
+  private val planNameToOffset = new OptionalIdentifierMap[Int]
+  private val nameComparator: NameComparator = conf.resolver
+  private val existingAliases = new HashSet[String]
+
+  /**
+   * Register the named plan output in this [[NameScope]]. The named plan is usually a
+   * [[NamedRelation]]. `attributes` sequence can contain duplicate names both for this named plan
+   * and for the scope in general, despite the fact that their further resolution _may_ throw an
+   * error in case of ambiguous reference. After calling this method, the code can lookup the
+   * attributes using `get*` methods of this [[NameScope]].
+   *
+   * Duplicate plan names are merged into the same [[PlanOutput]]. For example, this query:
+   *
+   * {{{ SELECT t.* FROM (SELECT * FROM VALUES (1)) as t, (SELECT * FROM VALUES (2)) as t; }}}
+   *
+   * will have the following output schema:
+   *
+   * {{{ [col1, col1] }}}
+   *
+   * Same logic applies for the unnamed plan outputs. This query:
+   *
+   * {{{ SELECT * FROM (SELECT * FROM VALUES (1)), (SELECT * FROM VALUES (2)); }}}
+   *
+   * will have the same output schema:
+   *
+   * {{{ [col1, col1] }}}
+   *
+   * @param name The name of this named plan.
+   * @param attributes The output of this named plan. Can contain duplicate names.
+   */
+  def update(name: String, attributes: Seq[Attribute]): Unit = {
+    update(attributes, Some(name))
+  }
+
+  /**
+   * Register the unnamed plan output in this [[NameScope]]. Some examples of the unnamed plan are
+   * [[Project]] and [[Aggregate]].
+   *
+   * See the [[update]] method for more details.
+   *
+   * @param attributes The output of the unnamed plan. Can contain duplicate names.
+   */
+  def +=(attributes: Seq[Attribute]): Unit = {
+    update(attributes)
+  }
+
+  /**
+   * Get all the attributes from all the plans registered in this [[NameScope]]. The output can
+   * contain duplicate names. This is used for star (`*`) resolution.
+   */
+  def getAllAttributes: Seq[Attribute] = {
+    val attributes = new mutable.ArrayBuffer[Attribute]
+
+    planOutputs.forEach(planOutput => {
+      attributes.appendAll(planOutput.attributes)
+    })
+
+    attributes.toSeq
+  }
+
+  /**
+   * Expand the [[UnresolvedStar]] using `planOutputs`. The expected use case for this method is
+   * star expansion inside [[Project]]. Since [[Project]] has only one child, we assert that the
+   * size of `planOutputs` is 1, otherwise the query is malformed.
+   *
+   * Some examples of queries with a star:
+   *
+   *  - Star without a target:
+   *  {{{ SELECT * FROM VALUES (1,  2,  3) AS t(a, b, c); }}}
+   *  - Star with a multipart name target:
+   *  {{{ SELECT catalog1.database1.table1.* FROM catalog1.database1.table1; }}}
+   *  - Star with a struct target:
+   *  {{{ SELECT d.* FROM VALUES (named_struct('a', 1, 'b', 2)) AS t(d); }}}
+   *  - Star as an argument to a function:
+   *  {{{ SELECT concat_ws('', *) AS result FROM VALUES (1, 2, 3) AS t(a, b, c); }}}
+   *
+   * It is resolved by correctly resolving the star qualifier.
+   * Please check [[UnresolvedStarBase.expandStar]] for more details.
+   *
+   * @param unresolvedStar [[UnresolvedStar]] to expand.
+   * @return The output of a plan expanded from the star.
+   */
+  def expandStar(unresolvedStar: UnresolvedStar): Seq[NamedExpression] = {
+    if (planOutputs.size != 1) {
+      throw QueryCompilationErrors.invalidStarUsageError("query", Seq(unresolvedStar))
+    }
+
+    planOutputs.get(0).expandStar(unresolvedStar)
+  }
+
+  /**
+   * Get all matched attributes by a multipart name. It returns [[Attribute]]s when we resolve a
+   * simple column or an alias name from a lower operator. However this function can also return
+   * [[Alias]]es in case we access a struct field or a map value using some key.
+   *
+   * Example that contains those major use-cases:
+   *
+   * {{{
+   *  SELECT col1, a, col2.field, col3.struct.field, col4.key
+   *  FROM (SELECT *, col5 AS a FROM t);
+   * }}}
+   *
+   * has a Project list that looks like this:
+   *
+   * {{{
+   *   AttributeReference(col1),
+   *   AttributeReference(a),
+   *   Alias(col2.field, field),
+   *   Alias(col3.struct.field, field),
+   *   Alias(col4[CAST(key AS INT)], key)
+   * }}}
+   *
+   * Also, see [[AttributeSeq.resolve]] for more details.
+   *
+   * Since there can be several identical attribute names for several named plans, this function
+   * can return multiple values:
+   * - 0 values: No matched attributes
+   * - 1 value: Unique attribute matched
+   * - 1+ values: Ambiguity, several attributes matched
+   *
+   * One example of a query with an attribute that has a multipart name:
+   *
+   * {{{ SELECT catalog1.database1.table1.col1 FROM catalog1.database1.table1; }}}
+   *
+   * @param multipartName Multipart attribute name. Can be of several forms:
+   *   - `catalog.database.table.column`
+   *   - `database.table.column`
+   *   - `table.column`
+   *   - `column`
+   * @return All the attributes matched by the `multipartName`, encapsulated in a [[NameTarget]].
+   */
+  def matchMultipartName(multipartName: Seq[String]): NameTarget = {
+    val candidates = new mutable.ArrayBuffer[Expression]
+    val allAttributes = new mutable.ArrayBuffer[Attribute]
+    var aliasName: Option[String] = None
+
+    planOutputs.forEach(planOutput => {
+      allAttributes.appendAll(planOutput.attributes)
+      val nameTarget = planOutput.matchMultipartName(multipartName)
+      if (nameTarget.aliasName.isDefined) {
+        aliasName = nameTarget.aliasName
+      }
+      candidates.appendAll(nameTarget.candidates)
+    })
+
+    NameTarget(candidates.toSeq, aliasName, allAttributes.toSeq)
+  }
+
+  /**
+   * Add an alias, by name, to the list of existing aliases.
+   */
+  def addAlias(aliasName: String): Unit = existingAliases.add(aliasName.toLowerCase())
+
+  /**
+   * Returns whether an alias exists in the current scope.
+   */
+  def isExistingAlias(aliasName: String): Boolean =
+    existingAliases.contains(aliasName.toLowerCase())
+
+  private def update(attributes: Seq[Attribute], name: Option[String] = None): Unit = {
+    planNameToOffset.get(name) match {
+      case Some(index) =>
+        val prevPlanOutput = planOutputs.get(index)
+        planOutputs.set(
+          index,
+          new PlanOutput(prevPlanOutput.attributes ++ attributes, name, nameComparator)
+        )
+      case None =>
+        val index = planOutputs.size
+        planOutputs.add(new PlanOutput(attributes, name, nameComparator))
+        planNameToOffset += (name -> index)
+    }
+  }
+}
+
+/**
+ * The [[NameScopeStack]] is a stack of [[NameScope]]s managed by the [[Resolver]] and the
+ * [[ExpressionResolver]]. Usually a top scope is used for name resolution, but in case of
+ * correlated subqueries we can lookup names in the parent scopes. Low-level scope creation is
+ * managed internally, and only high-level api like [[withNewScope]] is available to the resolvers.
+ * Freshly-created [[NameScopeStack]] contains an empty root [[NameScope]].
+ */
+class NameScopeStack extends SQLConfHelper {
+  private val stack = new ArrayDeque[NameScope]
+  push()
+
+  /**
+   * Get the top scope, which is a default choice for name resolution.
+   */
+  def top: NameScope = {
+    stack.peek()
+  }
+
+  /**
+   * Completely overwrite the top scope state with a named plan output.
+   *
+   * See [[NameScope.update]] for more details.
+   */
+  def overwriteTop(name: String, attributes: Seq[Attribute]): Unit = {
+    val newScope = new NameScope
+    newScope.update(name, attributes)
+
+    stack.pop()
+    stack.push(newScope)
+  }
+
+  /**
+   * Completely overwrite the top scope state with an unnamed plan output.
+   *
+   * See [[NameScope.+=]] for more details.
+   */
+  def overwriteTop(attributes: Seq[Attribute]): Unit = {
+    val newScope = new NameScope
+    newScope += attributes
+
+    stack.pop()
+    stack.push(newScope)
+  }
+
+  /**
+   * Execute `body` in a context of a fresh scope. It's used during the [[Project]] or the
+   * [[Aggregate]] resolution to avoid calling [[push]] and [[pop]] explicitly.
+   */
+  def withNewScope[R](body: => R): R = {
+    push()
+    try {
+      body
+    } finally {
+      pop()
+    }
+  }
+
+  /**
+   * Push a new scope to the stack. Introduced by the [[Project]] or the [[Aggregate]].
+   */
+  private def push(): Unit = {
+    stack.push(new NameScope)
+  }
+
+  /**
+   * Pop a scope from the stack. Called when the resolution process for the pushed scope is done.
+   */
+  private def pop(): Unit = {
+    stack.pop()
+  }
+}
+
+/**
+ * [[PlanOutput]] represents a sequence of attributes from a plan ([[NamedRelation]], [[Project]],
+ * [[Aggregate]], etc).
+ *
+ * It is created from `attributes`, which is an output of a named plan, optional plan `name` and a
+ * resolver provided by the [[NameScopeStack]].
+ *
+ * @param attributes Plan output. Can contain duplicate names.
+ * @param name Plan name. Non-empty for named plans like [[NamedRelation]] or [[SubqueryAlias]],
+ *   `None` otherwise.
+ */
+class PlanOutput(
+    val attributes: Seq[Attribute],
+    val name: Option[String],
+    val nameComparator: NameComparator) {
+
+  /**
+   * attributesForResolution is an [[AttributeSeq]] that is used for resolution of
+   * multipart attribute names. It's created from the `attributes` when [[NameScope]] is updated.
+   */
+  private val attributesForResolution: AttributeSeq =
+    AttributeSeq.fromNormalOutput(attributes)
+
+  /**
+   * Find attributes by the multipart name.
+   *
+   * See [[NameScope.matchMultipartName]] for more details.
+   *
+   * @param multipartName Multipart attribute name.
+   * @return Matched attributes or [[Seq.empty]] otherwise.
+   */
+  def matchMultipartName(multipartName: Seq[String]): NameTarget = {
+    val (candidates, nestedFields) =
+      attributesForResolution.getCandidatesForResolution(multipartName, nameComparator)
+    val resolvedCandidates = attributesForResolution.resolveCandidates(
+      multipartName,
+      nameComparator,
+      candidates,
+      nestedFields
+    )
+    resolvedCandidates match {
+      case Seq(Alias(child, aliasName)) =>
+        NameTarget(Seq(child), Some(aliasName))
+      case other =>
+        NameTarget(other, None)
+    }
+  }
+
+  /**
+   * Method to expand an unresolved star. See [[NameScope.expandStar]] for more details.
+   *
+   * @param unresolvedStar Star to resolve.
+   * @return Attributes expanded from the star.
+   */
+  def expandStar(unresolvedStar: UnresolvedStar): Seq[NamedExpression] = {
+    unresolvedStar.expandStar(
+      childOperatorOutput = attributes,
+      childOperatorMetadataOutput = Seq.empty,
+      resolve =
+        (nameParts, nameComparator) => attributesForResolution.resolve(nameParts, nameComparator),
+      suggestedAttributes = attributes,
+      resolver = nameComparator,
+      cleanupNestedAliasesDuringStructExpansion = true
+    )
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/NameTarget.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/NameTarget.scala
new file mode 100644
index 0000000000000..3b31c9b1a9110
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/NameTarget.scala
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression}
+import org.apache.spark.sql.catalyst.util.StringUtils.orderSuggestedIdentifiersBySimilarity
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+/**
+ * Class that represents results of name resolution or star expansion. It encapsulates:
+ *   - `candidates` - A list of candidates that are possible matches for a given name.
+ *   - `aliasName` - If the candidates size is 1 and it's type is `ExtractValue` (which means that
+ *     it's a recursive type), then the `aliasName` should be the name with which the candidate is
+ *     aliased. Otherwise, `aliasName` should be `None`.
+ *   - `allAttributes` - A list of all attributes which is used to generate suggestions for
+ *     unresolved column error.
+ *
+ * Example:
+ *
+ * - Attribute resolution:
+ * {{{ SELECT col1 FROM VALUES (1); }}} will have a [[NameTarget]] with a single candidate `col1`.
+ * `aliasName` would be `None` in this case because the column is not of recursive type.
+ *
+ * - Recursive attribute resolution:
+ * {{{ SELECT col1.col1 FROM VALUES(STRUCT(1,2), 3) }}} will have a [[NameTarget]] with a
+ * single candidate `col1` and an `aliasName` of `Some("col1")`.
+ */
+case class NameTarget(
+    candidates: Seq[Expression],
+    aliasName: Option[String] = None,
+    allAttributes: Seq[Attribute] = Seq.empty) {
+
+  /**
+   * Picks a candidate from the list of candidates based on the given unresolved attribute.
+   * Its behavior is as follows (based on the number of candidates):
+   *
+   * - If there is only one candidate, it will be returned.
+   *
+   * - If there are multiple candidates, an ambiguous reference error will be thrown.
+   *
+   * - If there are no candidates, an unresolved column error will be thrown.
+   */
+  def pickCandidate(unresolvedAttribute: UnresolvedAttribute): Expression = {
+    candidates match {
+      case Seq() =>
+        throwUnresolvedColumnError(unresolvedAttribute)
+      case Seq(candidate) =>
+        candidate
+      case _ =>
+        throw QueryCompilationErrors.ambiguousReferenceError(
+          unresolvedAttribute.name,
+          candidates.collect { case attribute: AttributeReference => attribute }
+        )
+    }
+  }
+
+  private def throwUnresolvedColumnError(unresolvedAttribute: UnresolvedAttribute): Nothing =
+    throw QueryCompilationErrors.unresolvedColumnError(
+      unresolvedAttribute.name,
+      proposal = orderSuggestedIdentifiersBySimilarity(
+        unresolvedAttribute.name,
+        candidates = allAttributes.map(attribute => attribute.qualifier :+ attribute.name)
+      )
+    )
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/PlanLogger.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/PlanLogger.scala
new file mode 100644
index 0000000000000..f778915008dbb
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/PlanLogger.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.internal.{Logging, MDC, MessageWithContext}
+import org.apache.spark.internal.LogKeys.{MESSAGE, QUERY_PLAN}
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.sideBySide
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * [[PlanLogger]] is used by the [[Resolver]] to log intermediate resolution results.
+ */
+class PlanLogger extends Logging {
+  private val planChangeLogLevel = SQLConf.get.planChangeLogLevel
+  private val expressionTreeChangeLogLevel = SQLConf.get.expressionTreeChangeLogLevel
+
+  def logPlanResolutionEvent(plan: LogicalPlan, event: String): Unit = {
+    log(() => log"""
+       |=== Plan resolution: ${MDC(MESSAGE, event)} ===
+       |${MDC(QUERY_PLAN, plan.treeString)}
+     """.stripMargin, planChangeLogLevel)
+  }
+
+  def logPlanResolution(unresolvedPlan: LogicalPlan, resolvedPlan: LogicalPlan): Unit = {
+    log(
+      () =>
+        log"""
+       |=== Unresolved plan -> Resolved plan ===
+       |${MDC(
+               QUERY_PLAN,
+               sideBySide(
+                 unresolvedPlan.treeString,
+                 resolvedPlan.treeString
+               ).mkString("\n")
+             )}
+     """.stripMargin,
+      planChangeLogLevel
+    )
+  }
+
+  def logExpressionTreeResolutionEvent(expressionTree: Expression, event: String): Unit = {
+    log(
+      () => log"""
+       |=== Expression tree resolution: ${MDC(MESSAGE, event)} ===
+       |${MDC(QUERY_PLAN, expressionTree.treeString)}
+     """.stripMargin,
+      expressionTreeChangeLogLevel
+    )
+  }
+
+  def logExpressionTreeResolution(
+      unresolvedExpressionTree: Expression,
+      resolvedExpressionTree: Expression): Unit = {
+    log(
+      () =>
+        log"""
+       |=== Unresolved expression tree -> Resolved expression tree ===
+       |${MDC(
+               QUERY_PLAN,
+               sideBySide(
+                 unresolvedExpressionTree
+                   .withNewChildren(resolvedExpressionTree.children)
+                   .treeString,
+                 resolvedExpressionTree.treeString
+               ).mkString("\n")
+             )}
+     """.stripMargin,
+      expressionTreeChangeLogLevel
+    )
+  }
+
+  private def log(createMessage: () => MessageWithContext, logLevel: String): Unit =
+    logLevel match {
+      case "TRACE" => logTrace(createMessage().message)
+      case "DEBUG" => logDebug(createMessage().message)
+      case "INFO" => logInfo(createMessage())
+      case "WARN" => logWarning(createMessage())
+      case "ERROR" => logError(createMessage())
+      case _ => logTrace(createMessage().message)
+    }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/PredicateResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/PredicateResolver.scala
new file mode 100644
index 0000000000000..d94559496d04e
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/PredicateResolver.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.analysis.{
+  AnsiStringPromotionTypeCoercion,
+  AnsiTypeCoercion,
+  ApplyCharTypePaddingHelper,
+  BooleanEqualityTypeCoercion,
+  CollationTypeCoercion,
+  DecimalPrecisionTypeCoercion,
+  DivisionTypeCoercion,
+  IntegralDivisionTypeCoercion,
+  StringPromotionTypeCoercion,
+  TypeCoercion
+}
+import org.apache.spark.sql.catalyst.expressions.{Expression, Predicate, StringRPad}
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Resolver class for resolving all [[Predicate]] expressions. Recursively resolves all children
+ * and applies selected type coercions to the expression.
+ */
+class PredicateResolver(
+    expressionResolver: ExpressionResolver,
+    timezoneAwareExpressionResolver: TimezoneAwareExpressionResolver)
+  extends TreeNodeResolver[Predicate, Expression]
+  with ResolvesExpressionChildren {
+
+  private val typeCoercionRules = if (conf.ansiEnabled) {
+    PredicateResolver.ANSI_TYPE_COERCION_RULES
+  } else {
+    PredicateResolver.TYPE_COERCION_RULES
+  }
+  private val typeCoercionResolver =
+    new TypeCoercionResolver(timezoneAwareExpressionResolver, typeCoercionRules)
+
+  override def resolve(unresolvedPredicate: Predicate): Expression = {
+    val predicateWithResolvedChildren =
+      withResolvedChildren(unresolvedPredicate, expressionResolver.resolve)
+    val predicateWithTypeCoercion = typeCoercionResolver.resolve(predicateWithResolvedChildren)
+    val predicateWithCharTypePadding = {
+      ApplyCharTypePaddingHelper.singleNodePaddingForStringComparison(
+        predicateWithTypeCoercion,
+        !conf.getConf(SQLConf.LEGACY_NO_CHAR_PADDING_IN_PREDICATE)
+      )
+    }
+    predicateWithCharTypePadding.children.collectFirst {
+      case rpad: StringRPad => rpad
+    } match {
+      // In the fixed-point Analyzer [[ApplyCharTypePadding]] is called after [[ResolveAliases]]
+      // and therefore padding doesn't affect the alias. In single-pass resolver we need to call
+      // this code before we resolve the alias, which will cause the alias to include the pad in
+      // its name:
+      //
+      // fixed-point:
+      //     expression: rpad('12', 3, ' ') = '12 '
+      //     alias:      '12' = '12 '
+      //
+      // single-pass:
+      //     expression: rpad('12', 3, ' ') = '12 '
+      //     alias:      rpad('12', 3, ' ') = '12 '
+      //
+      // Disabling this case until the aliasing is fixed.
+      case Some(_) => throw new ExplicitlyUnsupportedResolverFeature("CharTypePaddingAliasing")
+
+      case _ => predicateWithCharTypePadding
+    }
+  }
+}
+
+object PredicateResolver {
+  // Ordering in the list of type coercions should be in sync with the list in [[TypeCoercion]].
+  private val TYPE_COERCION_RULES: Seq[Expression => Expression] = Seq(
+    CollationTypeCoercion.apply,
+    TypeCoercion.InTypeCoercion.apply,
+    StringPromotionTypeCoercion.apply,
+    DecimalPrecisionTypeCoercion.apply,
+    BooleanEqualityTypeCoercion.apply,
+    DivisionTypeCoercion.apply,
+    IntegralDivisionTypeCoercion.apply,
+    TypeCoercion.ImplicitTypeCoercion.apply,
+    TypeCoercion.DateTimeOperationsTypeCoercion.apply
+  )
+
+  // Ordering in the list of type coercions should be in sync with the list in [[AnsiTypeCoercion]].
+  private val ANSI_TYPE_COERCION_RULES: Seq[Expression => Expression] = Seq(
+    CollationTypeCoercion.apply,
+    AnsiTypeCoercion.InTypeCoercion.apply,
+    AnsiStringPromotionTypeCoercion.apply,
+    DecimalPrecisionTypeCoercion.apply,
+    DivisionTypeCoercion.apply,
+    IntegralDivisionTypeCoercion.apply,
+    AnsiTypeCoercion.ImplicitTypeCoercion.apply,
+    AnsiTypeCoercion.AnsiDateTimeOperationsTypeCoercion.apply
+  )
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ProducesUnresolvedSubtree.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ProducesUnresolvedSubtree.scala
new file mode 100644
index 0000000000000..8d85804a93634
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ProducesUnresolvedSubtree.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+
+/**
+ * A mixin trait for expression resolvers that as part of their resolution, replace single node
+ * with a subtree of nodes. This step is necessary because the underlying legacy code that is being
+ * called produces partially-unresolved subtrees. In order to resolve the subtree a callback
+ * resolver is called recursively. This callback must ensure that no node is resolved twice in
+ * order to not break the single-pass invariant. This is done by tagging the limits of this
+ * traversal with [[ExpressionResolver.SINGLE_PASS_SUBTREE_BOUNDARY]] tag. This tag is applied to
+ * the original expression's children, which are guaranteed to be resolved at the time of given
+ * expression's resolution. When callback resolver encounters the node that is tagged, it should
+ * return identity instead of trying to resolve it.
+ */
+trait ProducesUnresolvedSubtree extends ResolvesExpressionChildren {
+
+  /**
+   * Helper method used to resolve a subtree that is generated as part of the resolution of some
+   * node. Method ensures that the downwards traversal never visits previously resolved nodes by
+   * tracking the limits of the traversal with a tag. Invokes a resolver callback to resolve
+   * children, but DOES NOT resolve the root of the subtree.
+   */
+  protected def withResolvedSubtree(
+      expression: Expression,
+      expressionResolver: Expression => Expression)(body: => Expression): Expression = {
+    expression.children.foreach { child =>
+      child.setTagValue(ExpressionResolver.SINGLE_PASS_SUBTREE_BOUNDARY, ())
+    }
+
+    val result = body
+
+    withResolvedChildren(result, expressionResolver)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/RelationId.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/RelationId.scala
new file mode 100644
index 0000000000000..eab97dd8345cb
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/RelationId.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+/**
+ * The [[RelationId]] is a unique identifier for a relation. It is used to lookup the relations
+ * which were processed by the [[MetadataResolver]] to substitute the unresolved relations in single
+ * pass during the analysis phase.
+ */
+case class RelationId(
+    multipartIdentifier: Seq[String],
+    options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty,
+    isStreaming: Boolean = false
+)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/RelationMetadataProvider.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/RelationMetadataProvider.scala
new file mode 100644
index 0000000000000..cf352842fd106
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/RelationMetadataProvider.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import java.util.HashMap
+
+import org.apache.spark.sql.catalyst.analysis.{withPosition, RelationResolution, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.LookupCatalog
+import org.apache.spark.util.ArrayImplicits._
+
+/**
+ * [[RelationMetadataProvider]] provides relations with resolved metadata based on the
+ * corresponding [[UnresolvedRelation]]s. It is used by [[Resolver]] to replace
+ * [[UnresolvedRelation]] with a specific [[LogicalPlan]] with resolved metadata, e.g. with
+ * [[UnresolvedCatalogRelation]] or [[View]].
+ */
+trait RelationMetadataProvider extends LookupCatalog {
+  type RelationsWithResolvedMetadata = HashMap[RelationId, LogicalPlan]
+
+  /**
+   * [[relationResolution]] is used by the [[RelationMetadataProvider]] to expand relation
+   * identifiers in [[relationIdFromUnresolvedRelation]].
+   */
+  protected val relationResolution: RelationResolution
+
+  /**
+   * [[relationsWithResolvedMetadata]] is a map from relation ID to the specific [[LogicalPlan]]
+   * with resolved metadata, like [[UnresolvedCatalogRelation]] or [[View]]. It's filled by the
+   * specific [[RelationMetadataProvider]] implementation and is queried in
+   * [[getRelationWithResolvedMetadata]].
+   */
+  protected val relationsWithResolvedMetadata: RelationsWithResolvedMetadata
+
+  /**
+   * Get the [[LogicalPlan]] with resolved metadata for the given [[UnresolvedRelation]].
+   *
+   * [[java.util.HashMap]] returns `null` if the key is not found, so we wrap it in an [[Option]].
+   */
+  def getRelationWithResolvedMetadata(
+      unresolvedRelation: UnresolvedRelation): Option[LogicalPlan] = {
+    Option(
+      relationsWithResolvedMetadata.get(
+        relationIdFromUnresolvedRelation(unresolvedRelation)
+      )
+    )
+  }
+
+  /**
+   * Returns the [[RelationId]] for the given [[UnresolvedRelation]]. Here we use
+   * [[relationResolution]] to expand the [[UnresolvedRelation]] identifier fully, so that our
+   * [[RelationId]] uniquely identifies the [[unresolvedRelation]].
+   *
+   * This method is public, because it's used in [[MetadataResolverSuite]].
+   */
+  def relationIdFromUnresolvedRelation(unresolvedRelation: UnresolvedRelation): RelationId = {
+    relationResolution.expandIdentifier(unresolvedRelation.multipartIdentifier) match {
+      case CatalogAndIdentifier(catalog, ident) =>
+        RelationId(
+          multipartIdentifier =
+            Seq(catalog.name()) ++ ident.namespace().toImmutableArraySeq ++ Seq(ident.name()),
+          options = unresolvedRelation.options,
+          isStreaming = unresolvedRelation.isStreaming
+        )
+      case _ =>
+        withPosition(unresolvedRelation) {
+          unresolvedRelation.tableNotFound(unresolvedRelation.multipartIdentifier)
+        }
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolutionValidator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolutionValidator.scala
new file mode 100644
index 0000000000000..6c4de2e6e58d7
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolutionValidator.scala
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, ResolvedInlineTable}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference}
+import org.apache.spark.sql.catalyst.plans.logical.{
+  Filter,
+  GlobalLimit,
+  LocalLimit,
+  LocalRelation,
+  LogicalPlan,
+  OneRowRelation,
+  Project,
+  SubqueryAlias
+}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.BooleanType
+
+/**
+ * The [[ResolutionValidator]] performs the validation work after the logical plan tree is
+ * resolved by the [[Resolver]]. Each `resolve*` method in the [[Resolver]] must
+ * have its `validate*` counterpart in the [[ResolutionValidator]]. The validation code asserts the
+ * conditions that must never be false no matter which SQL query or DataFrame program was provided.
+ * The validation approach is single-pass, post-order, complementary to the resolution process.
+ */
+class ResolutionValidator {
+  private val expressionResolutionValidator = new ExpressionResolutionValidator(this)
+
+  private[resolver] var attributeScopeStack = new AttributeScopeStack
+
+  /**
+   * Validate the resolved logical `plan` - assert invariants that should never be false no
+   * matter which SQL query or DataFrame program was provided. New operators must be added here as
+   * soon as [[Resolver]] supports them. We check this by throwing an exception for
+   * unknown operators.
+   */
+  def validatePlan(plan: LogicalPlan): Unit = wrapErrors(plan) {
+    validate(plan)
+  }
+
+  private def validate(operator: LogicalPlan): Unit = {
+    operator match {
+      case project: Project =>
+        validateProject(project)
+      case filter: Filter =>
+        validateFilter(filter)
+      case subqueryAlias: SubqueryAlias =>
+        validateSubqueryAlias(subqueryAlias)
+      case globalLimit: GlobalLimit =>
+        validateGlobalLimit(globalLimit)
+      case localLimit: LocalLimit =>
+        validateLocalLimit(localLimit)
+      case inlineTable: ResolvedInlineTable =>
+        validateInlineTable(inlineTable)
+      case localRelation: LocalRelation =>
+        validateRelation(localRelation)
+      case oneRowRelation: OneRowRelation =>
+        validateRelation(oneRowRelation)
+      // [[LogicalRelation]], [[HiveTableRelation]] and other specific relations can't be imported
+      // because of a potential circular dependency, so we match a generic Catalyst
+      // [[MultiInstanceRelation]] instead.
+      case multiInstanceRelation: MultiInstanceRelation =>
+        validateRelation(multiInstanceRelation)
+    }
+  }
+
+  private def validateProject(project: Project): Unit = {
+    attributeScopeStack.withNewScope {
+      validate(project.child)
+      expressionResolutionValidator.validateProjectList(project.projectList)
+    }
+
+    handleOperatorOutput(project)
+  }
+
+  private def validateFilter(filter: Filter): Unit = {
+    validate(filter.child)
+
+    assert(
+      filter.condition.dataType == BooleanType,
+      s"Output type of a filter must be a boolean, but got: ${filter.condition.dataType.typeName}"
+    )
+    expressionResolutionValidator.validate(filter.condition)
+  }
+
+  private def validateSubqueryAlias(subqueryAlias: SubqueryAlias): Unit = {
+    validate(subqueryAlias.child)
+
+    handleOperatorOutput(subqueryAlias)
+  }
+
+  private def validateGlobalLimit(globalLimit: GlobalLimit): Unit = {
+    validate(globalLimit.child)
+    expressionResolutionValidator.validate(globalLimit.limitExpr)
+  }
+
+  private def validateLocalLimit(localLimit: LocalLimit): Unit = {
+    validate(localLimit.child)
+    expressionResolutionValidator.validate(localLimit.limitExpr)
+  }
+
+  private def validateInlineTable(inlineTable: ResolvedInlineTable): Unit = {
+    inlineTable.rows.foreach(row => {
+      row.foreach(expression => {
+        expressionResolutionValidator.validate(expression)
+      })
+    })
+
+    handleOperatorOutput(inlineTable)
+  }
+
+  private def validateRelation(relation: LogicalPlan): Unit = {
+    handleOperatorOutput(relation)
+  }
+
+  private def handleOperatorOutput(operator: LogicalPlan): Unit = {
+    attributeScopeStack.overwriteTop(operator.output)
+
+    operator.output.foreach(attribute => {
+      assert(
+        attribute.isInstanceOf[AttributeReference],
+        s"Output of an operator must be a reference to an attribute, but got: " +
+        s"${attribute.getClass.getSimpleName}"
+      )
+      expressionResolutionValidator.validate(attribute)
+    })
+  }
+
+  private def wrapErrors[R](plan: LogicalPlan)(body: => R): Unit = {
+    try {
+      body
+    } catch {
+      case ex: Throwable =>
+        throw QueryCompilationErrors.resolutionValidationError(ex, plan)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/Resolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/Resolver.scala
new file mode 100644
index 0000000000000..37b875abaade6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/Resolver.scala
@@ -0,0 +1,374 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.EvaluateUnresolvedInlineTable
+import org.apache.spark.sql.catalyst.analysis.{
+  withPosition,
+  FunctionResolution,
+  NamedRelation,
+  RelationResolution,
+  ResolvedInlineTable,
+  UnresolvedInlineTable,
+  UnresolvedRelation
+}
+import org.apache.spark.sql.catalyst.plans.logical.{
+  Filter,
+  GlobalLimit,
+  LocalLimit,
+  LocalRelation,
+  LogicalPlan,
+  OneRowRelation,
+  Project,
+  SubqueryAlias
+}
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
+import org.apache.spark.sql.types.BooleanType
+
+/**
+ * The Resolver implements a single-pass bottom-up analysis algorithm in the Catalyst.
+ *
+ * The functions here generally traverse the [[LogicalPlan]] nodes recursively,
+ * constructing and returning the resolved [[LogicalPlan]] nodes bottom-up.
+ * This is the primary entry point for implementing SQL and DataFrame plan analysis,
+ * wherein the [[resolve]] method accepts a fully unresolved [[LogicalPlan]] and returns
+ * a fully resolved [[LogicalPlan]] in response with all data types and attribute
+ * reference ID assigned for valid requests. This resolver also takes responsibility
+ * to detect any errors in the initial SQL query or DataFrame and return appropriate
+ * error messages including precise parse locations wherever possible.
+ *
+ * The Resolver is a one-shot object per each SQL/DataFrame logical plan, the calling code must
+ * re-create it for every new analysis run.
+ *
+ * @param catalogManager [[CatalogManager]] for relation and identifier resolution.
+ * @param extensions A list of [[ResolverExtension]] that can resolve external operators.
+ */
+class Resolver(
+    catalogManager: CatalogManager,
+    override val extensions: Seq[ResolverExtension] = Seq.empty,
+    metadataResolverExtensions: Seq[ResolverExtension] = Seq.empty)
+    extends TreeNodeResolver[LogicalPlan, LogicalPlan]
+    with QueryErrorsBase
+    with ResolvesOperatorChildren
+    with TracksResolvedNodes[LogicalPlan]
+    with DelegatesResolutionToExtensions {
+  private val scopes = new NameScopeStack
+  private val planLogger = new PlanLogger
+  private val relationResolution = Resolver.createRelationResolution(catalogManager)
+  private val functionResolution = new FunctionResolution(catalogManager, relationResolution)
+  private val expressionResolver =
+    new ExpressionResolver(this, scopes, functionResolution, planLogger)
+  private val limitExpressionResolver = new LimitExpressionResolver(expressionResolver)
+
+  /**
+   * [[relationMetadataProvider]] is used to resolve metadata for relations. It's initialized with
+   * the default implementation [[MetadataResolver]] here and is called in
+   * [[lookupMetadataAndResolve]] on the unresolved logical plan to visit it (both operators and
+   * expressions) to resolve the metadata and populate its internal state. It's later queried by
+   * [[resolveRelation]] to get the plan with resolved metadata (for example, a [[View]] or an
+   * [[UnresolvedCatalogRelation]]) based on the [[UnresolvedRelation]].
+   *
+   * If the [[AnalyzerBridgeState]] is provided, we reset this provider to the
+   * [[BridgedRelationMetadataProvider]] and later stick to it forever without resorting to the
+   * actual blocking metadata resolution.
+   */
+  private var relationMetadataProvider: RelationMetadataProvider = new MetadataResolver(
+    catalogManager,
+    relationResolution,
+    metadataResolverExtensions
+  )
+
+  /**
+   * This method is an analysis entry point. It resolves the metadata and invokes [[resolve]],
+   * which does most of the analysis work.
+   */
+  def lookupMetadataAndResolve(
+      unresolvedPlan: LogicalPlan,
+      analyzerBridgeState: Option[AnalyzerBridgeState] = None): LogicalPlan = {
+    planLogger.logPlanResolutionEvent(unresolvedPlan, "Lookup metadata and resolve")
+
+    relationMetadataProvider = analyzerBridgeState match {
+      case Some(analyzerBridgeState) =>
+        new BridgedRelationMetadataProvider(
+          catalogManager,
+          relationResolution,
+          analyzerBridgeState
+        )
+      case None =>
+        relationMetadataProvider
+    }
+
+    relationMetadataProvider match {
+      case metadataResolver: MetadataResolver =>
+        metadataResolver.resolve(unresolvedPlan)
+      case _ =>
+    }
+
+    resolve(unresolvedPlan)
+  }
+
+  /**
+   * This method takes an unresolved [[LogicalPlan]] and chooses the right `resolve*` method using
+   * pattern matching on the `unresolvedPlan` type. This pattern matching enumerates all the
+   * operator node types that are supported by the single-pass analysis.
+   *
+   * When developers introduce a new unresolved node type to the Catalyst, they should implement
+   * a corresponding `resolve*` method in the [[Resolver]] and add it to this pattern match
+   * list.
+   *
+   * [[resolve]] will be called recursively during the unresolved plan traversal eventually
+   * producing a fully resolved plan or a descriptive error message.
+   */
+  override def resolve(unresolvedPlan: LogicalPlan): LogicalPlan = {
+    planLogger.logPlanResolutionEvent(unresolvedPlan, "Unresolved plan")
+
+    throwIfNodeWasResolvedEarlier(unresolvedPlan)
+
+    val resolvedPlan =
+      unresolvedPlan match {
+        case unresolvedProject: Project =>
+          resolveProject(unresolvedProject)
+        case unresolvedFilter: Filter =>
+          resolveFilter(unresolvedFilter)
+        case unresolvedSubqueryAlias: SubqueryAlias =>
+          resolveSubqueryAlias(unresolvedSubqueryAlias)
+        case unresolvedGlobalLimit: GlobalLimit =>
+          resolveGlobalLimit(unresolvedGlobalLimit)
+        case unresolvedLocalLimit: LocalLimit =>
+          resolveLocalLimit(unresolvedLocalLimit)
+        case unresolvedRelation: UnresolvedRelation =>
+          resolveRelation(unresolvedRelation)
+        case unresolvedInlineTable: UnresolvedInlineTable =>
+          resolveInlineTable(unresolvedInlineTable)
+        // See the reason why we have to match both [[LocalRelation]] and [[ResolvedInlineTable]]
+        // in the [[resolveInlineTable]] scaladoc
+        case resolvedInlineTable: ResolvedInlineTable =>
+          updateNameScopeWithPlanOutput(resolvedInlineTable)
+        case localRelation: LocalRelation =>
+          updateNameScopeWithPlanOutput(localRelation)
+        case unresolvedOneRowRelation: OneRowRelation =>
+          updateNameScopeWithPlanOutput(unresolvedOneRowRelation)
+        case _ =>
+          tryDelegateResolutionToExtension(unresolvedPlan).getOrElse {
+            handleUnmatchedOperator(unresolvedPlan)
+          }
+      }
+
+    markNodeAsResolved(resolvedPlan)
+
+    planLogger.logPlanResolution(unresolvedPlan, resolvedPlan)
+
+    resolvedPlan
+  }
+
+  /**
+   * [[Project]] introduces a new scope to resolve its subtree and project list expressions. After
+   * those are resolved in the child scope we overwrite current scope with resolved [[Project]]'s
+   * output to expose new names to the parent operators.
+   */
+  private def resolveProject(unresolvedProject: Project): LogicalPlan = {
+    val resolvedProject = scopes.withNewScope {
+      val resolvedChild = resolve(unresolvedProject.child)
+      val resolvedProjectList =
+        expressionResolver.resolveProjectList(unresolvedProject.projectList)
+      Project(resolvedProjectList, resolvedChild)
+    }
+
+    withPosition(unresolvedProject) {
+      scopes.overwriteTop(resolvedProject.output)
+    }
+
+    resolvedProject
+  }
+
+  /**
+   * [[Filter]] has a single child and a single condition and we resolve them in this respective
+   * order.
+   */
+  private def resolveFilter(unresolvedFilter: Filter): LogicalPlan = {
+    val resolvedChild = resolve(unresolvedFilter.child)
+    val resolvedCondition = expressionResolver.resolve(unresolvedFilter.condition)
+
+    val resolvedFilter = Filter(resolvedCondition, resolvedChild)
+    if (resolvedFilter.condition.dataType != BooleanType) {
+      withPosition(unresolvedFilter) {
+        throwDatatypeMismatchFilterNotBoolean(resolvedFilter)
+      }
+    }
+
+    resolvedFilter
+  }
+
+  /**
+   * [[SubqueryAlias]] has a single child and an identifier. We need to resolve the child and update
+   * the scope with the output, since upper expressions can reference [[SubqueryAlias]]es output by
+   * its identifier.
+   */
+  private def resolveSubqueryAlias(unresolvedSubqueryAlias: SubqueryAlias): LogicalPlan = {
+    val resolvedSubqueryAlias =
+      SubqueryAlias(unresolvedSubqueryAlias.identifier, resolve(unresolvedSubqueryAlias.child))
+    withPosition(unresolvedSubqueryAlias) {
+      scopes.overwriteTop(unresolvedSubqueryAlias.alias, resolvedSubqueryAlias.output)
+    }
+    resolvedSubqueryAlias
+  }
+
+  /**
+   * Resolve [[GlobalLimit]]. We have to resolve its child and resolve and validate its limit
+   * expression.
+   */
+  private def resolveGlobalLimit(unresolvedGlobalLimit: GlobalLimit): LogicalPlan = {
+    val resolvedChild = resolve(unresolvedGlobalLimit.child)
+
+    val resolvedLimitExpr = withPosition(unresolvedGlobalLimit) {
+      limitExpressionResolver.resolve(unresolvedGlobalLimit.limitExpr)
+    }
+
+    GlobalLimit(resolvedLimitExpr, resolvedChild)
+  }
+
+  /**
+   * Resolve [[LocalLimit]]. We have to resolve its child and resolve and validate its limit
+   * expression.
+   */
+  private def resolveLocalLimit(unresolvedLocalLimit: LocalLimit): LogicalPlan = {
+    val resolvedChild = resolve(unresolvedLocalLimit.child)
+
+    val resolvedLimitExpr = withPosition(unresolvedLocalLimit) {
+      limitExpressionResolver.resolve(unresolvedLocalLimit.limitExpr)
+    }
+
+    LocalLimit(resolvedLimitExpr, resolvedChild)
+  }
+
+  /**
+   * [[UnresolvedRelation]] was previously looked up by the [[MetadataResolver]] and now we need to:
+   * - Get the specific relation with metadata from `relationsWithResolvedMetadata`, like
+   *   [[UnresolvedCatalogRelation]], or throw an error if it wasn't found
+   * - Resolve it further, usually using extensions, like [[DataSourceResolver]]
+   */
+  private def resolveRelation(unresolvedRelation: UnresolvedRelation): LogicalPlan = {
+    relationMetadataProvider.getRelationWithResolvedMetadata(unresolvedRelation) match {
+      case Some(relationWithResolvedMetadata) =>
+        planLogger.logPlanResolutionEvent(
+          relationWithResolvedMetadata,
+          "Relation metadata retrieved"
+        )
+
+        withPosition(unresolvedRelation) {
+          resolve(relationWithResolvedMetadata)
+        }
+      case None =>
+        withPosition(unresolvedRelation) {
+          unresolvedRelation.tableNotFound(unresolvedRelation.multipartIdentifier)
+        }
+    }
+  }
+
+  /**
+   * [[UnresolvedInlineTable]] resolution requires all the rows to be resolved first. After that we
+   * use [[EvaluateUnresolvedInlineTable]] and try to evaluate the row expressions if possible to
+   * get [[LocalRelation]] right away. Sometimes it's not possible because of expressions like
+   * `current_date()` which are evaluated in the optimizer (SPARK-46380).
+   *
+   * Note: By default if all the inline table expressions can be evaluated eagerly, the parser
+   * would produce a [[LocalRelation]] and the analysis would just skip this step and go straight
+   * to `resolveLocalRelation` (SPARK-48967, SPARK-49269).
+   */
+  private def resolveInlineTable(unresolvedInlineTable: UnresolvedInlineTable): LogicalPlan = {
+    val withResolvedExpressions = UnresolvedInlineTable(
+      unresolvedInlineTable.names,
+      unresolvedInlineTable.rows.map(row => {
+        row.map(expressionResolver.resolve(_))
+      })
+    )
+
+    val resolvedRelation = EvaluateUnresolvedInlineTable
+      .evaluateUnresolvedInlineTable(withResolvedExpressions)
+
+    withPosition(unresolvedInlineTable) {
+      resolve(resolvedRelation)
+    }
+  }
+
+  /**
+   * To finish the operator resolution we add its output to the current scope. This is usually
+   * done for relations. [[NamedRelation]]'s output should be added to the scope under its name.
+   */
+  private def updateNameScopeWithPlanOutput(relation: LogicalPlan): LogicalPlan = {
+    withPosition(relation) {
+      relation match {
+        case namedRelation: NamedRelation =>
+          scopes.top.update(namedRelation.name, namedRelation.output)
+        case _ =>
+          scopes.top += relation.output
+      }
+    }
+    relation
+  }
+
+  override def tryDelegateResolutionToExtension(
+      unresolvedOperator: LogicalPlan): Option[LogicalPlan] = {
+    val resolutionResult = super.tryDelegateResolutionToExtension(unresolvedOperator)
+    resolutionResult.map { resolvedOperator =>
+      updateNameScopeWithPlanOutput(resolvedOperator)
+    }
+  }
+
+  /**
+   * Check if the unresolved operator is explicitly unsupported and throw
+   * [[ExplicitlyUnsupportedResolverFeature]] in that case. Otherwise, throw
+   * [[QueryCompilationErrors.unsupportedSinglePassAnalyzerFeature]].
+   */
+  private def handleUnmatchedOperator(unresolvedOperator: LogicalPlan): Nothing = {
+    if (ExplicitlyUnsupportedResolverFeature.OPERATORS.contains(
+        unresolvedOperator.getClass.getName
+      )) {
+      throw new ExplicitlyUnsupportedResolverFeature(
+        s"unsupported operator: ${unresolvedOperator.getClass.getName}"
+      )
+    }
+    throw QueryCompilationErrors
+      .unsupportedSinglePassAnalyzerFeature(
+        s"${unresolvedOperator.getClass} operator resolution"
+      )
+      .withPosition(unresolvedOperator.origin)
+  }
+
+  private def throwDatatypeMismatchFilterNotBoolean(filter: Filter): Nothing =
+    throw new AnalysisException(
+      errorClass = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN",
+      messageParameters = Map(
+        "sqlExpr" -> filter.expressions.map(toSQLExpr).mkString(","),
+        "filter" -> toSQLExpr(filter.condition),
+        "type" -> toSQLType(filter.condition.dataType)
+      )
+    )
+}
+
+object Resolver {
+
+  /**
+   * Create a new instance of the [[RelationResolution]].
+   */
+  def createRelationResolution(catalogManager: CatalogManager): RelationResolution = {
+    new RelationResolution(catalogManager)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolverExtension.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolverExtension.scala
new file mode 100644
index 0000000000000..8bed881ec97a1
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolverExtension.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+/**
+ * The [[ResolverExtension]] is a main interface for single-pass analysis extensions in Catalyst.
+ * External code that needs specific node types to be resolved has to implement this trait and
+ * inject the implementation into the [[Analyzer.singlePassResolverExtensions]].
+ *
+ * Note that resolver extensions are responsible for creating attribute references with IDs that
+ * are unique from any other subplans. This should be straightforward in most cases because
+ * creating new attribute references will assign [[NamedExpression.newExprId]] by default.
+ */
+trait ResolverExtension {
+
+  /**
+   * Resolve the operator if it's supported by this extension. This method is called by the
+   * single-pass [[Resolver]] on all the configured extensions when it exhausted its match list
+   * for the known node types.
+   *
+   * Guarantees:
+   * - The implementation can rely on children being resolved
+   * - We commit to performing the partial function check only at most once per unresolved operator
+   */
+  def resolveOperator: PartialFunction[LogicalPlan, LogicalPlan]
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolverGuard.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolverGuard.scala
new file mode 100644
index 0000000000000..b3b3d4def602d
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolverGuard.scala
@@ -0,0 +1,283 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.analysis.{
+  ResolvedInlineTable,
+  UnresolvedAlias,
+  UnresolvedAttribute,
+  UnresolvedFunction,
+  UnresolvedInlineTable,
+  UnresolvedRelation,
+  UnresolvedStar
+}
+import org.apache.spark.sql.catalyst.expressions.{
+  Alias,
+  AttributeReference,
+  BinaryArithmetic,
+  Cast,
+  ConditionalExpression,
+  CreateNamedStruct,
+  Expression,
+  Literal,
+  Predicate,
+  SubqueryExpression
+}
+import org.apache.spark.sql.catalyst.plans.logical.{
+  Filter,
+  GlobalLimit,
+  LocalLimit,
+  LocalRelation,
+  LogicalPlan,
+  OneRowRelation,
+  Project,
+  SubqueryAlias
+}
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.internal.SQLConf.HiveCaseSensitiveInferenceMode
+
+/**
+ * [[ResolverGuard]] is a class that checks if the operator that is yet to be analyzed
+ * only consists of operators and expressions that are currently supported by the
+ * single-pass analyzer.
+ *
+ * This is a one-shot object and should not be reused after [[apply]] call.
+ */
+class ResolverGuard(catalogManager: CatalogManager) extends SQLConfHelper {
+
+  /**
+   * Check the top level operator of the parsed operator.
+   */
+  def apply(operator: LogicalPlan): Boolean =
+    checkConfValues() && checkVariables() && checkOperator(operator)
+
+  /**
+   * Check if all the operators are supported. For implemented ones, recursively check
+   * their children. For unimplemented ones, return false.
+   */
+  private def checkOperator(operator: LogicalPlan): Boolean = operator match {
+    case project: Project =>
+      checkProject(project)
+    case filter: Filter =>
+      checkFilter(filter)
+    case subqueryAlias: SubqueryAlias =>
+      checkSubqueryAlias(subqueryAlias)
+    case globalLimit: GlobalLimit =>
+      checkGlobalLimit(globalLimit)
+    case localLimit: LocalLimit =>
+      checkLocalLimit(localLimit)
+    case unresolvedRelation: UnresolvedRelation =>
+      checkUnresolvedRelation(unresolvedRelation)
+    case unresolvedInlineTable: UnresolvedInlineTable =>
+      checkUnresolvedInlineTable(unresolvedInlineTable)
+    case resolvedInlineTable: ResolvedInlineTable =>
+      checkResolvedInlineTable(resolvedInlineTable)
+    case localRelation: LocalRelation =>
+      checkLocalRelation(localRelation)
+    case oneRowRelation: OneRowRelation =>
+      checkOneRowRelation(oneRowRelation)
+    case _ =>
+      false
+  }
+
+  /**
+   * Method used to check if expressions are supported by the new analyzer.
+   * For LeafNode types, we return true or false. For other ones, check their children.
+   */
+  private def checkExpression(expression: Expression): Boolean = {
+    expression match {
+      case alias: Alias =>
+        checkAlias(alias)
+      case unresolvedBinaryArithmetic: BinaryArithmetic =>
+        checkUnresolvedBinaryArithmetic(unresolvedBinaryArithmetic)
+      case unresolvedConditionalExpression: ConditionalExpression =>
+        checkUnresolvedConditionalExpression(unresolvedConditionalExpression)
+      case unresolvedCast: Cast =>
+        checkUnresolvedCast(unresolvedCast)
+      case unresolvedStar: UnresolvedStar =>
+        checkUnresolvedStar(unresolvedStar)
+      case unresolvedAlias: UnresolvedAlias =>
+        checkUnresolvedAlias(unresolvedAlias)
+      case unresolvedAttribute: UnresolvedAttribute =>
+        checkUnresolvedAttribute(unresolvedAttribute)
+      case unresolvedPredicate: Predicate =>
+        checkUnresolvedPredicate(unresolvedPredicate)
+      case literal: Literal =>
+        checkLiteral(literal)
+      case attributeReference: AttributeReference =>
+        checkAttributeReference(attributeReference)
+      case createNamedStruct: CreateNamedStruct =>
+        checkCreateNamedStruct(createNamedStruct)
+      case unresolvedFunction: UnresolvedFunction =>
+        checkUnresolvedFunction(unresolvedFunction)
+      case _ =>
+        false
+    }
+  }
+
+  private def checkProject(project: Project) = {
+    checkOperator(project.child) && project.projectList.forall(checkExpression)
+  }
+
+  private def checkFilter(unresolvedFilter: Filter) =
+    checkOperator(unresolvedFilter.child) && checkExpression(unresolvedFilter.condition)
+
+  private def checkSubqueryAlias(subqueryAlias: SubqueryAlias) =
+    subqueryAlias.identifier.qualifier.isEmpty && checkOperator(subqueryAlias.child)
+
+  private def checkGlobalLimit(globalLimit: GlobalLimit) =
+    checkOperator(globalLimit.child) && checkExpression(globalLimit.limitExpr)
+
+  private def checkLocalLimit(localLimit: LocalLimit) =
+    checkOperator(localLimit.child) && checkExpression(localLimit.limitExpr)
+
+  private def checkUnresolvedInlineTable(unresolvedInlineTable: UnresolvedInlineTable) =
+    unresolvedInlineTable.rows.forall(_.forall(checkExpression))
+
+  private def checkUnresolvedRelation(unresolvedRelation: UnresolvedRelation) = true
+
+  private def checkResolvedInlineTable(resolvedInlineTable: ResolvedInlineTable) =
+    resolvedInlineTable.rows.forall(_.forall(checkExpression))
+
+  // Usually we don't check outputs of operators in unresolved plans, but in this case
+  // [[LocalRelation]] is resolved in the parser.
+  private def checkLocalRelation(localRelation: LocalRelation) =
+    localRelation.output.forall(checkExpression)
+
+  private def checkOneRowRelation(oneRowRelation: OneRowRelation) = true
+
+  private def checkAlias(alias: Alias) = checkExpression(alias.child)
+
+  private def checkUnresolvedBinaryArithmetic(unresolvedBinaryArithmetic: BinaryArithmetic) =
+    checkExpression(unresolvedBinaryArithmetic.left) &&
+    checkExpression(unresolvedBinaryArithmetic.right)
+
+  private def checkUnresolvedConditionalExpression(
+      unresolvedConditionalExpression: ConditionalExpression) =
+    unresolvedConditionalExpression.children.forall(checkExpression)
+
+  private def checkUnresolvedCast(cast: Cast) = checkExpression(cast.child)
+
+  private def checkUnresolvedStar(unresolvedStar: UnresolvedStar) = true
+
+  private def checkUnresolvedAlias(unresolvedAlias: UnresolvedAlias) =
+    checkExpression(unresolvedAlias.child)
+
+  private def checkUnresolvedAttribute(unresolvedAttribute: UnresolvedAttribute) =
+    !ResolverGuard.UNSUPPORTED_ATTRIBUTE_NAMES.contains(unresolvedAttribute.nameParts.head)
+
+  private def checkUnresolvedPredicate(unresolvedPredicate: Predicate) = {
+    unresolvedPredicate match {
+      case _: SubqueryExpression => false
+      case other =>
+        other.children.forall(checkExpression)
+    }
+  }
+
+  private def checkAttributeReference(attributeReference: AttributeReference) = true
+
+  private def checkCreateNamedStruct(createNamedStruct: CreateNamedStruct) = {
+    createNamedStruct.children.forall(checkExpression)
+  }
+
+  private def checkUnresolvedFunction(unresolvedFunction: UnresolvedFunction) =
+    ResolverGuard.SUPPORTED_FUNCTION_NAMES.contains(
+      unresolvedFunction.nameParts.head
+    ) && unresolvedFunction.children.forall(checkExpression)
+
+  private def checkLiteral(literal: Literal) = true
+
+  private def checkConfValues() =
+    // Case sensitive analysis is not supported.
+    !conf.caseSensitiveAnalysis &&
+    // Case-sensitive inference is not supported for Hive table schema.
+    conf.caseSensitiveInferenceMode == HiveCaseSensitiveInferenceMode.NEVER_INFER
+
+  private def checkVariables() = catalogManager.tempVariableManager.isEmpty
+}
+
+object ResolverGuard {
+
+  private val UNSUPPORTED_ATTRIBUTE_NAMES = {
+    val map = new IdentifierMap[Unit]()
+
+    /**
+     * Some SQL functions can be called without the braces and thus they are found in the
+     * parsed operator as UnresolvedAttributes. This list contains the names of those functions
+     * so we can reject them. Find more information in [[ColumnResolutionHelper.literalFunctions]].
+     */
+    map += ("current_date", ())
+    map += ("current_timestamp", ())
+    map += ("current_user", ())
+    map += ("user", ())
+    map += ("session_user", ())
+    map += ("grouping__id", ())
+
+    /**
+     * Metadata column resolution is not supported for now
+     */
+    map += ("_metadata", ())
+
+    map
+  }
+
+  /**
+   * Most of the functions are not supported, but we allow some explicitly supported ones.
+   */
+  private val SUPPORTED_FUNCTION_NAMES = {
+    val map = new IdentifierMap[Unit]()
+    map += ("array", ())
+    // map += ("array_agg", ()) - until aggregate expressions are supported
+    map += ("array_append", ())
+    map += ("array_compact", ())
+    map += ("array_contains", ())
+    map += ("array_distinct", ())
+    map += ("array_except", ())
+    map += ("array_insert", ())
+    map += ("array_intersect", ())
+    map += ("array_join", ())
+    map += ("array_max", ())
+    map += ("array_min", ())
+    map += ("array_position", ())
+    map += ("array_prepend", ())
+    map += ("array_remove", ())
+    map += ("array_repeat", ())
+    map += ("array_size", ())
+    // map += ("array_sort", ()) - until lambda functions are supported
+    map += ("array_union", ())
+    map += ("arrays_overlap", ())
+    map += ("arrays_zip", ())
+    map += ("coalesce", ())
+    map += ("if", ())
+    map += ("map", ())
+    map += ("map_concat", ())
+    map += ("map_contains_key", ())
+    map += ("map_entries", ())
+    // map += ("map_filter", ()) - until lambda functions are supported
+    map += ("map_from_arrays", ())
+    map += ("map_from_entries", ())
+    map += ("map_keys", ())
+    map += ("map_values", ())
+    // map += ("map_zip_with", ()) - until lambda functions are supported
+    map += ("named_struct", ())
+    map += ("sort_array", ())
+    map += ("str_to_map", ())
+    map
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolvesExpressionChildren.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolvesExpressionChildren.scala
new file mode 100644
index 0000000000000..c170941ce5348
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolvesExpressionChildren.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+
+trait ResolvesExpressionChildren {
+
+  /**
+   * Resolves generic [[Expression]] children and returns its copy with children resolved.
+   */
+  protected def withResolvedChildren[ExpressionType <: Expression](
+      unresolvedExpression: ExpressionType,
+      resolveChild: Expression => Expression): ExpressionType = {
+    val newChildren = unresolvedExpression.children.map(resolveChild(_))
+    unresolvedExpression.withNewChildren(newChildren).asInstanceOf[ExpressionType]
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolvesOperatorChildren.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolvesOperatorChildren.scala
new file mode 100644
index 0000000000000..0f548c3c55858
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolvesOperatorChildren.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+
+/**
+ * A mixin trait for all operator resolvers that need to resolve their children.
+ */
+trait ResolvesOperatorChildren {
+
+  /**
+   * Resolves generic [[LogicalPlan]] children and returns its copy with children resolved.
+   */
+  protected def withResolvedChildren[OperatorType <: LogicalPlan](
+      unresolvedOperator: OperatorType,
+      resolve: LogicalPlan => LogicalPlan): OperatorType = {
+    val newChildren = unresolvedOperator.children.map(resolve(_))
+    unresolvedOperator.withNewChildren(newChildren).asInstanceOf[OperatorType]
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimeAddResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimeAddResolver.scala
new file mode 100644
index 0000000000000..bf27f64598723
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimeAddResolver.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.analysis.{
+  AnsiStringPromotionTypeCoercion,
+  AnsiTypeCoercion,
+  StringPromotionTypeCoercion,
+  TypeCoercion
+}
+import org.apache.spark.sql.catalyst.expressions.{Expression, TimeAdd}
+
+/**
+ * Helper resolver for [[TimeAdd]] which is produced by resolving [[BinaryArithmetic]] nodes.
+ */
+class TimeAddResolver(
+    expressionResolver: ExpressionResolver,
+    timezoneAwareExpressionResolver: TimezoneAwareExpressionResolver)
+  extends TreeNodeResolver[TimeAdd, Expression]
+  with ResolvesExpressionChildren {
+
+  private val typeCoercionRules: Seq[Expression => Expression] =
+    if (conf.ansiEnabled) {
+      TimeAddResolver.ANSI_TYPE_COERCION_RULES
+    } else {
+      TimeAddResolver.TYPE_COERCION_RULES
+    }
+  private val typeCoercionResolver: TypeCoercionResolver =
+    new TypeCoercionResolver(timezoneAwareExpressionResolver, typeCoercionRules)
+
+  override def resolve(unresolvedTimeAdd: TimeAdd): Expression = {
+    val timeAddWithResolvedChildren: TimeAdd =
+      withResolvedChildren(unresolvedTimeAdd, expressionResolver.resolve)
+    val timeAddWithTypeCoercion: Expression = typeCoercionResolver
+      .resolve(timeAddWithResolvedChildren)
+    timezoneAwareExpressionResolver.withResolvedTimezone(
+      timeAddWithTypeCoercion,
+      conf.sessionLocalTimeZone
+    )
+  }
+}
+
+object TimeAddResolver {
+  // Ordering in the list of type coercions should be in sync with the list in [[TypeCoercion]].
+  private val TYPE_COERCION_RULES: Seq[Expression => Expression] = Seq(
+    StringPromotionTypeCoercion.apply,
+    TypeCoercion.ImplicitTypeCoercion.apply,
+    TypeCoercion.DateTimeOperationsTypeCoercion.apply
+  )
+
+  // Ordering in the list of type coercions should be in sync with the list in [[AnsiTypeCoercion]].
+  private val ANSI_TYPE_COERCION_RULES: Seq[Expression => Expression] = Seq(
+    AnsiStringPromotionTypeCoercion.apply,
+    AnsiTypeCoercion.ImplicitTypeCoercion.apply,
+    AnsiTypeCoercion.AnsiDateTimeOperationsTypeCoercion.apply
+  )
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimezoneAwareExpressionResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimezoneAwareExpressionResolver.scala
new file mode 100644
index 0000000000000..a45e9e41cbfb1
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimezoneAwareExpressionResolver.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, TimeZoneAwareExpression}
+
+/**
+ * Resolves [[TimeZoneAwareExpressions]] by applying the session's local timezone.
+ *
+ * This class is responsible for resolving [[TimeZoneAwareExpression]]s by first resolving their
+ * children and then applying the session's local timezone. Additionally, ensures that any tags from
+ * the original expression are preserved during the resolution process.
+ *
+ * @constructor Creates a new TimezoneAwareExpressionResolver with the given expression resolver.
+ * @param expressionResolver The [[ExpressionResolver]] used to resolve child expressions.
+ */
+class TimezoneAwareExpressionResolver(expressionResolver: TreeNodeResolver[Expression, Expression])
+    extends TreeNodeResolver[TimeZoneAwareExpression, Expression]
+    with ResolvesExpressionChildren {
+
+  /**
+   * Resolves a [[TimeZoneAwareExpression]] by resolving its children and applying a timezone.
+   *
+   * @param unresolvedTimezoneExpression The [[TimeZoneAwareExpression]] to resolve.
+   * @return A resolved [[Expression]] with the session's local timezone applied.
+   */
+  override def resolve(unresolvedTimezoneExpression: TimeZoneAwareExpression): Expression = {
+    val expressionWithResolvedChildren =
+      withResolvedChildren(unresolvedTimezoneExpression, expressionResolver.resolve)
+    withResolvedTimezoneCopyTags(expressionWithResolvedChildren, conf.sessionLocalTimeZone)
+  }
+
+  /**
+   * Applies a timezone to a [[TimeZoneAwareExpression]] while preserving original tags.
+   *
+   * This method is particularly useful for cases like resolving [[Cast]] expressions where tags
+   * such as [[USER_SPECIFIED_CAST]] need to be preserved.
+   *
+   * @param expression The [[TimeZoneAwareExpression]] to apply the timezone to.
+   * @param timeZoneId The timezone ID to apply.
+   * @return A new [[TimeZoneAwareExpression]] with the specified timezone and original tags.
+   */
+  def withResolvedTimezoneCopyTags(expression: Expression, timeZoneId: String): Expression = {
+    val withTimeZone = withResolvedTimezone(expression, timeZoneId)
+    withTimeZone.copyTagsFrom(expression)
+    withTimeZone
+  }
+
+  /**
+   * Apply timezone to [[TimeZoneAwareExpression]] expressions.
+   */
+  def withResolvedTimezone(expression: Expression, timeZoneId: String): Expression =
+    expression match {
+      case timezoneExpression: TimeZoneAwareExpression if timezoneExpression.timeZoneId.isEmpty =>
+        timezoneExpression.withTimeZone(timeZoneId)
+      case other => other
+    }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TracksResolvedNodes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TracksResolvedNodes.scala
new file mode 100644
index 0000000000000..dd86bf843b4ec
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TracksResolvedNodes.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import java.util.IdentityHashMap
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.trees.TreeNode
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Trait for top-level resolvers that is used to keep track of resolved nodes and throw an error if
+ * a node is resolved more than once. This is only used in tests because of the memory overhead of
+ * using a set to track resolved nodes.
+ */
+trait TracksResolvedNodes[TreeNodeType <: TreeNode[TreeNodeType]] extends SQLConfHelper {
+  // Using Map because IdentityHashSet is not available in Scala
+  private val seenResolvedNodes = new IdentityHashMap[TreeNodeType, Unit]
+
+  private val shouldTrackResolvedNodes =
+    conf.getConf(SQLConf.ANALYZER_SINGLE_PASS_TRACK_RESOLVED_NODES_ENABLED)
+
+  protected def throwIfNodeWasResolvedEarlier(node: TreeNodeType): Unit =
+    if (shouldTrackResolvedNodes && seenResolvedNodes.containsKey(node)) {
+      throw SparkException.internalError(
+        s"Single-pass resolver attempted to resolve the same node more than once: $node"
+      )
+    }
+
+  protected def markNodeAsResolved(node: TreeNodeType): Unit = {
+    if (shouldTrackResolvedNodes) {
+      seenResolvedNodes.put(node, ())
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TreeNodeResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TreeNodeResolver.scala
new file mode 100644
index 0000000000000..5991585995cad
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TreeNodeResolver.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.trees.TreeNode
+
+/**
+ * Base class for [[TreeNode]] resolvers. All resolvers should extend this class with
+ * specific [[UnresolvedNode]] and [[ResolvedNode]] types.
+ */
+trait TreeNodeResolver[UnresolvedNode <: TreeNode[_], ResolvedNode <: TreeNode[_]]
+    extends SQLConfHelper {
+  def resolve(unresolvedNode: UnresolvedNode): ResolvedNode
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TypeCoercionResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TypeCoercionResolver.scala
new file mode 100644
index 0000000000000..cf4c2ef0d7504
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/TypeCoercionResolver.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
+
+/**
+ * [[TypeCoercionResolver]] is used by other resolvers to uniformly apply type coercions to all
+ * expressions. [[TypeCoercionResolver]] takes in a sequence of type coercion transformations that
+ * should be applied to an expression and applies them in order. Finally, [[TypeCoercionResolver]]
+ * applies timezone to expression's children, as a child could be replaced with Cast(child, type),
+ * therefore [[Cast]] resolution is needed. Timezone is applied only on children that have been
+ * re-instantiated by [[TypeCoercionResolver]], because otherwise children have already been
+ * resolved.
+ */
+class TypeCoercionResolver(
+    timezoneAwareExpressionResolver: TimezoneAwareExpressionResolver,
+    typeCoercionRules: Seq[Expression => Expression])
+    extends TreeNodeResolver[Expression, Expression] {
+
+  override def resolve(expression: Expression): Expression = {
+    val oldChildren = expression.children
+
+    val withTypeCoercion = typeCoercionRules.foldLeft(expression) {
+      case (expr, rule) => rule.apply(expr)
+    }
+
+    val newChildren = withTypeCoercion.children.zip(oldChildren).map {
+      case (newChild: Cast, oldChild) if !newChild.eq(oldChild) =>
+        timezoneAwareExpressionResolver.withResolvedTimezone(newChild, conf.sessionLocalTimeZone)
+      case (newChild, _) => newChild
+    }
+    withTypeCoercion.withNewChildren(newChildren)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/UnaryMinusResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/UnaryMinusResolver.scala
new file mode 100644
index 0000000000000..739d7cf43c183
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/resolver/UnaryMinusResolver.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.sql.catalyst.analysis.{AnsiTypeCoercion, TypeCoercion}
+import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryMinus}
+
+/**
+ * Resolver for [[UnaryMinus]]. Resolves children and applies type coercion to target node.
+ */
+class UnaryMinusResolver(
+    expressionResolver: ExpressionResolver,
+    timezoneAwareExpressionResolver: TimezoneAwareExpressionResolver)
+    extends TreeNodeResolver[UnaryMinus, Expression]
+    with ResolvesExpressionChildren {
+
+  private val typeCoercionRules: Seq[Expression => Expression] =
+    if (conf.ansiEnabled) {
+      UnaryMinusResolver.ANSI_TYPE_COERCION_RULES
+    } else {
+      UnaryMinusResolver.TYPE_COERCION_RULES
+    }
+  private val typeCoercionResolver: TypeCoercionResolver =
+    new TypeCoercionResolver(timezoneAwareExpressionResolver, typeCoercionRules)
+
+  override def resolve(unresolvedUnaryMinus: UnaryMinus): Expression = {
+    val unaryMinusWithResolvedChildren: UnaryMinus =
+      withResolvedChildren(unresolvedUnaryMinus, expressionResolver.resolve)
+    typeCoercionResolver.resolve(unaryMinusWithResolvedChildren)
+  }
+}
+
+object UnaryMinusResolver {
+  // Ordering in the list of type coercions should be in sync with the list in [[TypeCoercion]].
+  private val TYPE_COERCION_RULES: Seq[Expression => Expression] = Seq(
+    TypeCoercion.ImplicitTypeCoercion.apply,
+    TypeCoercion.DateTimeOperationsTypeCoercion.apply
+  )
+
+  // Ordering in the list of type coercions should be in sync with the list in [[AnsiTypeCoercion]].
+  private val ANSI_TYPE_COERCION_RULES: Seq[Expression => Expression] = Seq(
+    AnsiTypeCoercion.ImplicitTypeCoercion.apply,
+    AnsiTypeCoercion.AnsiDateTimeOperationsTypeCoercion.apply
+  )
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 40994f42e71d6..fabe551d054ca 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -23,14 +23,14 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIden
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.plans.logical.{CTERelationDef, LeafNode, LogicalPlan, UnaryNode}
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, UnaryNode}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
 import org.apache.spark.sql.connector.catalog.TableWritePrivilege
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.types.{DataType, Metadata, StructType}
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.sql.util.{CaseInsensitiveStringMap, SchemaUtils}
 import org.apache.spark.util.ArrayImplicits._
 
 /**
@@ -76,17 +76,6 @@ case class PlanWithUnresolvedIdentifier(
     copy(identifierExpr, newChildren, planBuilder)
 }
 
-/**
- * A logical plan placeholder which delays CTE resolution
- * to moment when PlanWithUnresolvedIdentifier gets resolved
- */
-case class UnresolvedWithCTERelations(
-   unresolvedPlan: LogicalPlan,
-   cteRelations: Seq[(String, CTERelationDef)])
-  extends UnresolvedLeafNode {
-  final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_IDENTIFIER_WITH_CTE)
-}
-
 /**
  * An expression placeholder that holds the identifier clause string expression. It will be
  * replaced by the actual expression with the evaluated identifier string.
@@ -217,7 +206,8 @@ case class ResolvedInlineTable(rows: Seq[Seq[Expression]], output: Seq[Attribute
  */
 case class UnresolvedTableValuedFunction(
     name: Seq[String],
-    functionArgs: Seq[Expression])
+    functionArgs: Seq[Expression],
+    override val isStreaming: Boolean = false)
   extends UnresolvedLeafNode {
 
   final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_TABLE_VALUED_FUNCTION)
@@ -439,7 +429,7 @@ object UnresolvedFunction {
  * Represents all of the input attributes to a given relational operator, for example in
  * "SELECT * FROM ...". A [[Star]] gets automatically expanded during analysis.
  */
-abstract class Star extends LeafExpression with NamedExpression {
+trait Star extends NamedExpression {
 
   override def name: String = throw new UnresolvedException("name")
   override def exprId: ExprId = throw new UnresolvedException("exprId")
@@ -461,15 +451,20 @@ abstract class Star extends LeafExpression with NamedExpression {
  * This is also used to expand structs. For example:
  * "SELECT record.* from (SELECT struct(a,b,c) as record ...)
  *
- * @param target an optional name that should be the target of the expansion.  If omitted all
- *              targets' columns are produced. This can either be a table name or struct name. This
- *              is a list of identifiers that is the path of the expansion.
- *
- * This class provides the shared behavior between the classes for SELECT * ([[UnresolvedStar]])
- * and SELECT * EXCEPT ([[UnresolvedStarExcept]]). [[UnresolvedStar]] is just a case class of this,
- * while [[UnresolvedStarExcept]] adds some additional logic to the expand method.
-  */
-abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star with Unevaluable {
+ * This trait provides the shared behavior among the classes for SELECT * ([[UnresolvedStar]])
+ * and SELECT * EXCEPT ([[UnresolvedStarExceptOrReplace]]), etc. [[UnresolvedStar]] is just a case
+ * class of this, while [[UnresolvedStarExceptOrReplace]] or other classes add some additional logic
+ * to the expand method.
+ */
+trait UnresolvedStarBase extends Star with Unevaluable {
+
+  /**
+   * An optional name that should be the target of the expansion. If omitted all
+   * targets' columns are produced. This can either be a table name or struct name. This
+   * is a list of identifiers that is the path of the expansion.
+   */
+  def target: Option[Seq[String]]
+
   /**
    * Returns true if the nameParts is a subset of the last elements of qualifier of the attribute.
    *
@@ -518,7 +513,9 @@ abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star with
       childOperatorMetadataOutput: Seq[Attribute],
       resolve: (Seq[String], Resolver) => Option[NamedExpression],
       suggestedAttributes: Seq[Attribute],
-      resolver: Resolver): Seq[NamedExpression] = {
+      resolver: Resolver,
+      cleanupNestedAliasesDuringStructExpansion: Boolean = false
+  ): Seq[NamedExpression] = {
     // If there is no table specified, use all non-hidden input attributes.
     if (target.isEmpty) return childOperatorOutput
 
@@ -539,11 +536,22 @@ abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star with
     // (i.e. [name].* is both a table and a struct), the struct path can always be qualified.
     val attribute = resolve(target.get, resolver)
     if (attribute.isDefined) {
+      // If cleanupNestedAliasesDuringStructExpansion is true, we remove nested aliases during
+      // struct expansion. This is something which is done in the CleanupAliases rule but for the
+      // single-pass analyzer it has to be done here to avoid additional tree traversals.
+      val normalizedAttribute = if (cleanupNestedAliasesDuringStructExpansion) {
+        attribute.get match {
+          case a: Alias => a.child
+          case other => other
+        }
+      } else {
+        attribute.get
+      }
       // This target resolved to an attribute in child. It must be a struct. Expand it.
-      attribute.get.dataType match {
+      normalizedAttribute.dataType match {
         case s: StructType => s.zipWithIndex.map {
           case (f, i) =>
-            val extract = GetStructField(attribute.get, i)
+            val extract = GetStructField(normalizedAttribute, i)
             Alias(extract, f.name)()
         }
 
@@ -571,9 +579,16 @@ abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star with
  *
  * @param excepts a list of names that should be excluded from the expansion.
  *
+ * @param replacements an optional list of expressions that should be used to replace the
+ *                     expressions removed by EXCEPT. If present, the length of this list must
+ *                     be the same as the length of the EXCEPT list. This supports replacing
+ *                     expressions instead of excluding them from the original SELECT list.
  */
-case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[String]])
-  extends UnresolvedStarBase(target) {
+case class UnresolvedStarExceptOrReplace(
+    target: Option[Seq[String]],
+    excepts: Seq[Seq[String]],
+    replacements: Option[Seq[NamedExpression]])
+  extends LeafExpression with UnresolvedStarBase {
 
   /**
    * We expand the * EXCEPT by the following three steps:
@@ -652,7 +667,14 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
       // group the except pairs by the column they refer to. NOTE: no groupMap until scala 2.13
       val groupedExcepts: AttributeMap[Seq[Seq[String]]] =
         AttributeMap(excepts.groupBy(_._1.toAttribute).transform((_, v) => v.map(_._2)))
-
+      // If the 'replacements' list is populated to indicate we should replace excepted columns
+      // with new expressions, we must have the same number of replacements as excepts. Keep an
+      // index to track the current replacement.
+      replacements.foreach { r =>
+        assert(excepts.size == r.size,
+          "The number of replacements must be the same as the number of excepts")
+      }
+      var replacementIndex = 0
       // map input columns while searching for the except entry corresponding to the current column
       columns.map(col => col -> groupedExcepts.get(col.toAttribute)).collect {
         // pass through columns that don't match anything in groupedExcepts
@@ -679,11 +701,15 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
             filterColumns(extractedFields.toImmutableArraySeq, newExcepts)), col.name)()
         // if there are multiple nestedExcepts but one is empty we must have overlapping except
         // columns. throw an error.
-        case (col, Some(nestedExcepts)) if nestedExcepts.size > 1 =>
+        case (_, Some(nestedExcepts)) if nestedExcepts.size > 1 =>
           throw new AnalysisException(
             errorClass = "EXCEPT_OVERLAPPING_COLUMNS",
             messageParameters = Map(
               "columns" -> this.excepts.map(_.mkString(".")).mkString(", ")))
+        // found a match and the 'replacements' list is populated - replace the column
+        case (_, Some(_)) if replacements.nonEmpty =>
+          replacementIndex += 1
+          replacements.get(replacementIndex - 1)
       }
     }
 
@@ -691,6 +717,103 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
   }
 }
 
+/**
+ * Represents some of the input attributes to a given relational operator, for example in
+ * `df.withColumn`.
+ *
+ * @param colNames a list of column names that should be replaced or produced.
+ *
+ * @param exprs the corresponding expressions for `colNames`.
+ *
+ * @param explicitMetadata an optional list of explicit metadata to associate with the columns.
+ */
+case class UnresolvedStarWithColumns(
+     colNames: Seq[String],
+     exprs: Seq[Expression],
+     explicitMetadata: Option[Seq[Metadata]] = None)
+  extends UnresolvedStarBase {
+
+  override def target: Option[Seq[String]] = None
+  override def children: Seq[Expression] = exprs
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): UnresolvedStarWithColumns =
+    copy(exprs = newChildren)
+
+  override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = {
+    assert(colNames.size == exprs.size,
+      s"The size of column names: ${colNames.size} isn't equal to " +
+        s"the size of expressions: ${exprs.size}")
+    explicitMetadata.foreach { m =>
+      assert(colNames.size == m.size,
+        s"The size of column names: ${colNames.size} isn't equal to " +
+          s"the size of metadata elements: ${m.size}")
+    }
+
+    SchemaUtils.checkColumnNameDuplication(colNames, resolver)
+
+    val expandedCols = super.expand(input, resolver)
+
+    val columnSeq = explicitMetadata match {
+      case Some(ms) => colNames.zip(exprs).zip(ms.map(Some(_)))
+      case _ => colNames.zip(exprs).map((_, None))
+    }
+
+    val replacedAndExistingColumns = expandedCols.map { field =>
+      columnSeq.find { case ((colName, _), _) =>
+        resolver(field.name, colName)
+      } match {
+        case Some(((colName, expr), m)) => Alias(expr, colName)(explicitMetadata = m)
+        case _ => field
+      }
+    }
+
+    val newColumns = columnSeq.filter { case ((colName, _), _) =>
+      !expandedCols.exists(f => resolver(f.name, colName))
+    }.map {
+      case ((colName, expr), m) => Alias(expr, colName)(explicitMetadata = m)
+    }
+
+    replacedAndExistingColumns ++ newColumns
+  }
+}
+
+/**
+ * Represents some of the input attributes to a given relational operator, for example in
+ * `df.withColumnRenamed`.
+ *
+ * @param existingNames a list of column names that should be replaced.
+ *                      If the column does not exist, it is ignored.
+ *
+ * @param newNames a list of new column names that should be used to replace the existing columns.
+ */
+case class UnresolvedStarWithColumnsRenames(
+    existingNames: Seq[String],
+    newNames: Seq[String])
+  extends LeafExpression with UnresolvedStarBase {
+
+  override def target: Option[Seq[String]] = None
+
+  override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = {
+    assert(existingNames.size == newNames.size,
+      s"The size of existing column names: ${existingNames.size} isn't equal to " +
+        s"the size of new column names: ${newNames.size}")
+
+    val expandedCols = super.expand(input, resolver)
+
+    existingNames.zip(newNames).foldLeft(expandedCols) {
+      case (attrs, (existingName, newName)) =>
+        attrs.map(attr =>
+          if (resolver(attr.name, existingName)) {
+            Alias(attr, newName)()
+          } else {
+            attr
+          }
+        )
+    }
+  }
+}
+
 /**
  * Represents all of the input attributes to a given relational operator, for example in
  * "SELECT * FROM ...".
@@ -702,7 +825,8 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
  *              targets' columns are produced. This can either be a table name or struct name. This
  *              is a list of identifiers that is the path of the expansion.
  */
-case class UnresolvedStar(target: Option[Seq[String]]) extends UnresolvedStarBase(target)
+case class UnresolvedStar(target: Option[Seq[String]])
+  extends LeafExpression with UnresolvedStarBase
 
 /**
  * Represents all of the input attributes to a given relational operator, for example in
@@ -712,7 +836,7 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends UnresolvedStarBas
  *              tables' columns are produced.
  */
 case class UnresolvedRegex(regexPattern: String, table: Option[String], caseSensitive: Boolean)
-  extends Star with Unevaluable {
+  extends LeafExpression with Star with Unevaluable {
   override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = {
     val pattern = if (caseSensitive) regexPattern else s"(?i)$regexPattern"
     table match {
@@ -770,7 +894,8 @@ case class MultiAlias(child: Expression, names: Seq[String])
  *
  * @param expressions Expressions to expand.
  */
-case class ResolvedStar(expressions: Seq[NamedExpression]) extends Star with Unevaluable {
+case class ResolvedStar(expressions: Seq[NamedExpression])
+  extends LeafExpression with Star with Unevaluable {
   override def newInstance(): NamedExpression = throw new UnresolvedException("newInstance")
   override def expand(input: LogicalPlan, resolver: Resolver): Seq[NamedExpression] = expressions
   override def toString: String = expressions.mkString("ResolvedStar(", ", ", ")")
@@ -936,6 +1061,28 @@ case class UnresolvedOrdinal(ordinal: Int)
   final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_ORDINAL)
 }
 
+/**
+ * Represents an unresolved ordinal used in the GROUP BY clause of a SQL pipe aggregate operator
+ * ("|> AGGREGATE").
+ *
+ * In this context, the ordinal refers to the one-based position of the column in the input
+ * relation. Note that this behavior is different from GROUP BY ordinals in regular SQL, wherein the
+ * ordinal refers to the one-based position of the column in the SELECT clause.
+ *
+ * For example:
+ * {{{
+ *   values ('abc', 'def') tab(x, y)
+ *   |> aggregate sum(x) group by 2
+ * }}}
+ * @param ordinal ordinal starts from 1, instead of 0
+ */
+case class UnresolvedPipeAggregateOrdinal(ordinal: Int)
+  extends LeafExpression with Unevaluable with NonSQLExpression {
+  override def dataType: DataType = throw new UnresolvedException("dataType")
+  override def nullable: Boolean = throw new UnresolvedException("nullable")
+  override lazy val resolved = false
+}
+
 /**
  * Represents unresolved having clause, the child for it can be Aggregate, GroupingSets, Rollup
  * and Cube. It is turned by the analyzer into a Filter.
@@ -1004,42 +1151,28 @@ case class UnresolvedTranspose(
     copy(child = newChild)
 }
 
-case class UnresolvedOuterReference(
-    nameParts: Seq[String])
-  extends LeafExpression with NamedExpression with Unevaluable {
-
-  def name: String =
-    nameParts.map(n => if (n.contains(".")) s"`$n`" else n).mkString(".")
-
-  override def exprId: ExprId = throw new UnresolvedException("exprId")
-  override def dataType: DataType = throw new UnresolvedException("dataType")
-  override def nullable: Boolean = throw new UnresolvedException("nullable")
-  override def qualifier: Seq[String] = throw new UnresolvedException("qualifier")
-  override lazy val resolved = false
-
-  override def toAttribute: Attribute = throw new UnresolvedException("toAttribute")
-  override def newInstance(): UnresolvedOuterReference = this
-
-  final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_OUTER_REFERENCE)
+// A marker node to indicate that the logical plan containing this expression should be lazily
+// analyzed in the DataFrame. This node will be removed at the beginning of analysis.
+case class LazyExpression(child: Expression) extends UnaryExpression with Unevaluable {
+  override lazy val resolved: Boolean = false
+  override def dataType: DataType = child.dataType
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(child = newChild)
+  }
+  final override val nodePatterns: Seq[TreePattern] = Seq(LAZY_EXPRESSION)
 }
 
-case class LazyOuterReference(
-     nameParts: Seq[String])
-  extends LeafExpression with NamedExpression with Unevaluable with LazyAnalysisExpression {
-
-  def name: String =
-    nameParts.map(n => if (n.contains(".")) s"`$n`" else n).mkString(".")
-
-  override def exprId: ExprId = throw new UnresolvedException("exprId")
-  override def dataType: DataType = throw new UnresolvedException("dataType")
+trait UnresolvedPlanId extends LeafExpression with Unevaluable {
   override def nullable: Boolean = throw new UnresolvedException("nullable")
-  override def qualifier: Seq[String] = throw new UnresolvedException("qualifier")
+  override def dataType: DataType = throw new UnresolvedException("dataType")
+  override lazy val resolved = false
 
-  override def toAttribute: Attribute = throw new UnresolvedException("toAttribute")
-  override def newInstance(): NamedExpression = LazyOuterReference(nameParts)
+  def planId: Long
+  def withPlan(plan: LogicalPlan): Expression
 
-  override def nodePatternsInternal(): Seq[TreePattern] = Seq(LAZY_OUTER_REFERENCE)
+  final override val nodePatterns: Seq[TreePattern] =
+    Seq(UNRESOLVED_PLAN_ID) ++ nodePatternsInternal()
 
-  override def prettyName: String = "outer"
-  override def sql: String = s"$prettyName($name)"
+  // Subclasses can override this function to provide more TreePatterns.
+  def nodePatternsInternal(): Seq[TreePattern] = Seq()
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
new file mode 100644
index 0000000000000..923373c1856a9
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
@@ -0,0 +1,305 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.catalog
+
+import scala.collection.mutable
+
+import org.json4s.JsonAST.{JArray, JString}
+import org.json4s.jackson.JsonMethods.{compact, render}
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.catalog.UserDefinedFunction._
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo, ScalarSubquery}
+import org.apache.spark.sql.catalyst.parser.ParserInterface
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, Project}
+import org.apache.spark.sql.types.{DataType, StructType}
+
+/**
+ * Represent a SQL function.
+ *
+ * @param name qualified name of the SQL function
+ * @param inputParam function input parameters
+ * @param returnType function return type
+ * @param exprText function body as an expression
+ * @param queryText function body as a query
+ * @param comment function comment
+ * @param deterministic whether the function is deterministic
+ * @param containsSQL whether the function has data access routine to be CONTAINS SQL
+ * @param isTableFunc whether the function is a table function
+ * @param properties additional properties to be serialized for the SQL function
+ * @param owner owner of the function
+ * @param createTimeMs function creation time in milliseconds
+ */
+case class SQLFunction(
+    name: FunctionIdentifier,
+    inputParam: Option[StructType],
+    returnType: Either[DataType, StructType],
+    exprText: Option[String],
+    queryText: Option[String],
+    comment: Option[String],
+    deterministic: Option[Boolean],
+    containsSQL: Option[Boolean],
+    isTableFunc: Boolean,
+    properties: Map[String, String],
+    owner: Option[String] = None,
+    createTimeMs: Long = System.currentTimeMillis) extends UserDefinedFunction {
+
+  assert(exprText.nonEmpty || queryText.nonEmpty)
+  assert((isTableFunc && returnType.isRight) || (!isTableFunc && returnType.isLeft))
+
+  import SQLFunction._
+
+  override val language: RoutineLanguage = LanguageSQL
+
+  /**
+   * Optionally get the function body as an expression or query using the given parser.
+   */
+  def getExpressionAndQuery(
+      parser: ParserInterface,
+      isTableFunc: Boolean): (Option[Expression], Option[LogicalPlan]) = {
+    // The RETURN clause of the CREATE FUNCTION statement looks like this in the parser:
+    // RETURN (query | expression)
+    // If the 'query' matches and parses as a SELECT clause of one item with no FROM clause, and
+    // this is a scalar function, we skip a level of subquery expression wrapping by using the
+    // referenced expression directly.
+    val parsedExpression = exprText.map(parser.parseExpression)
+    val parsedQuery = queryText.map(parser.parsePlan)
+    (parsedExpression, parsedQuery) match {
+      case (None, Some(Project(expr :: Nil, _: OneRowRelation)))
+        if !isTableFunc =>
+        (Some(expr), None)
+      case (Some(ScalarSubquery(Project(expr :: Nil, _: OneRowRelation), _, _, _, _, _, _)), None)
+        if !isTableFunc =>
+        (Some(expr), None)
+      case (_, _) =>
+        (parsedExpression, parsedQuery)
+    }
+  }
+
+  /** Get scalar function return data type. */
+  def getScalarFuncReturnType: DataType = returnType match {
+    case Left(dataType) => dataType
+    case Right(_) =>
+      throw SparkException.internalError(
+        "This function is a table function, not a scalar function.")
+  }
+
+  /** Get table function return columns. */
+  def getTableFuncReturnCols: StructType = returnType match {
+    case Left(_) =>
+      throw SparkException.internalError(
+        "This function is a scalar function, not a table function.")
+    case Right(columns) => columns
+  }
+
+  /**
+   * Convert the SQL function to a [[CatalogFunction]].
+   */
+  def toCatalogFunction: CatalogFunction = {
+    val props = sqlFunctionToProps ++ properties
+    CatalogFunction(
+      identifier = name,
+      className = SQL_FUNCTION_PREFIX,
+      resources = propertiesToFunctionResources(props, name))
+  }
+
+  /**
+   * Convert the SQL function to an [[ExpressionInfo]].
+   */
+  def toExpressionInfo: ExpressionInfo = {
+    val props = sqlFunctionToProps ++ functionMetadataToProps ++ properties
+    val usage = mapper.writeValueAsString(props)
+    new ExpressionInfo(
+      SQL_FUNCTION_PREFIX,
+      name.database.orNull,
+      name.funcName,
+      usage,
+      "",
+      "",
+      "",
+      "",
+      "",
+      "",
+      "sql_udf")
+  }
+
+  /**
+   * Convert the SQL function fields into properties.
+   */
+  private def sqlFunctionToProps: Map[String, String] = {
+    val props = new mutable.HashMap[String, String]
+    val inputParamText = inputParam.map(_.fields.map(_.toDDL).mkString(", "))
+    inputParamText.foreach(props.put(INPUT_PARAM, _))
+    val returnTypeText = returnType match {
+      case Left(dataType) => dataType.sql
+      case Right(columns) => columns.toDDL
+    }
+    props.put(RETURN_TYPE, returnTypeText)
+    exprText.foreach(props.put(EXPRESSION, _))
+    queryText.foreach(props.put(QUERY, _))
+    comment.foreach(props.put(COMMENT, _))
+    deterministic.foreach(d => props.put(DETERMINISTIC, d.toString))
+    containsSQL.foreach(x => props.put(CONTAINS_SQL, x.toString))
+    props.put(IS_TABLE_FUNC, isTableFunc.toString)
+    props.toMap
+  }
+
+  private def functionMetadataToProps: Map[String, String] = {
+    val props = new mutable.HashMap[String, String]
+    owner.foreach(props.put(OWNER, _))
+    props.put(CREATE_TIME, createTimeMs.toString)
+    props.toMap
+  }
+}
+
+object SQLFunction {
+
+  private val SQL_FUNCTION_PREFIX = "sqlFunction."
+
+  private val INPUT_PARAM: String = SQL_FUNCTION_PREFIX + "inputParam"
+  private val RETURN_TYPE: String = SQL_FUNCTION_PREFIX + "returnType"
+  private val EXPRESSION: String = SQL_FUNCTION_PREFIX + "expression"
+  private val QUERY: String = SQL_FUNCTION_PREFIX + "query"
+  private val COMMENT: String = SQL_FUNCTION_PREFIX + "comment"
+  private val DETERMINISTIC: String = SQL_FUNCTION_PREFIX + "deterministic"
+  private val CONTAINS_SQL: String = SQL_FUNCTION_PREFIX + "containsSQL"
+  private val IS_TABLE_FUNC: String = SQL_FUNCTION_PREFIX + "isTableFunc"
+  private val OWNER: String = SQL_FUNCTION_PREFIX + "owner"
+  private val CREATE_TIME: String = SQL_FUNCTION_PREFIX + "createTime"
+
+  private val FUNCTION_CATALOG_AND_NAMESPACE = "catalogAndNamespace.numParts"
+  private val FUNCTION_CATALOG_AND_NAMESPACE_PART_PREFIX = "catalogAndNamespace.part."
+
+  private val FUNCTION_REFERRED_TEMP_VIEW_NAMES = "referredTempViewNames"
+  private val FUNCTION_REFERRED_TEMP_FUNCTION_NAMES = "referredTempFunctionsNames"
+  private val FUNCTION_REFERRED_TEMP_VARIABLE_NAMES = "referredTempVariableNames"
+
+  /**
+   * Convert a [[CatalogFunction]] into a SQL function.
+   */
+  def fromCatalogFunction(function: CatalogFunction, parser: ParserInterface): SQLFunction = {
+    try {
+      val parts = function.resources.collect { case FunctionResource(FileResource, uri) =>
+        val index = uri.substring(0, INDEX_LENGTH).toInt
+        val body = uri.substring(INDEX_LENGTH)
+        index -> body
+      }
+      val blob = parts.sortBy(_._1).map(_._2).mkString
+      val props = mapper.readValue(blob, classOf[Map[String, String]])
+      val isTableFunc = props(IS_TABLE_FUNC).toBoolean
+      val returnType = parseReturnTypeText(props(RETURN_TYPE), isTableFunc, parser)
+      SQLFunction(
+        name = function.identifier,
+        inputParam = props.get(INPUT_PARAM).map(parseTableSchema(_, parser)),
+        returnType = returnType.get,
+        exprText = props.get(EXPRESSION),
+        queryText = props.get(QUERY),
+        comment = props.get(COMMENT),
+        deterministic = props.get(DETERMINISTIC).map(_.toBoolean),
+        containsSQL = props.get(CONTAINS_SQL).map(_.toBoolean),
+        isTableFunc = isTableFunc,
+        props.filterNot(_._1.startsWith(SQL_FUNCTION_PREFIX)))
+    } catch {
+      case e: Exception =>
+        throw new AnalysisException(
+          errorClass = "CORRUPTED_CATALOG_FUNCTION",
+          messageParameters = Map(
+            "identifier" -> s"${function.identifier}",
+            "className" -> s"${function.className}"), cause = Some(e)
+        )
+    }
+  }
+
+  def parseDefault(text: String, parser: ParserInterface): Expression = {
+    parser.parseExpression(text)
+  }
+
+  /**
+   * This method returns an optional DataType indicating, when present, either the return type for
+   * scalar user-defined functions, or a StructType indicating the names and types of the columns in
+   * the output schema for table functions. If the optional value is empty, this indicates that the
+   * CREATE FUNCTION statement did not have any RETURNS clause at all (for scalar functions), or
+   * that it included a RETURNS TABLE clause but without any specified output schema (for table
+   * functions), prompting the analyzer to infer these metadata instead.
+   */
+  def parseReturnTypeText(
+      text: String,
+      isTableFunc: Boolean,
+      parser: ParserInterface): Option[Either[DataType, StructType]] = {
+    if (!isTableFunc) {
+      // This is a scalar user-defined function.
+      if (text.isEmpty) {
+        // The CREATE FUNCTION statement did not have any RETURNS clause.
+        Option.empty[Either[DataType, StructType]]
+      } else {
+        // The CREATE FUNCTION statement included a RETURNS clause with an explicit return type.
+        Some(Left(parseDataType(text, parser)))
+      }
+    } else {
+      // This is a table function.
+      if (text.equalsIgnoreCase("table")) {
+        // The CREATE FUNCTION statement had a RETURNS TABLE clause but without any explicit schema.
+        Option.empty[Either[DataType, StructType]]
+      } else {
+        // The CREATE FUNCTION statement included a RETURNS TABLE clause with an explicit schema.
+        Some(Right(parseTableSchema(text, parser)))
+      }
+    }
+  }
+
+  def isSQLFunction(className: String): Boolean = className == SQL_FUNCTION_PREFIX
+
+  /**
+   * Convert the current catalog and namespace to properties.
+   */
+  def catalogAndNamespaceToProps(
+      currentCatalog: String,
+      currentNamespace: Seq[String]): Map[String, String] = {
+    val props = new mutable.HashMap[String, String]
+    val parts = currentCatalog +: currentNamespace
+    if (parts.nonEmpty) {
+      props.put(FUNCTION_CATALOG_AND_NAMESPACE, parts.length.toString)
+      parts.zipWithIndex.foreach { case (name, index) =>
+        props.put(s"$FUNCTION_CATALOG_AND_NAMESPACE_PART_PREFIX$index", name)
+      }
+    }
+    props.toMap
+  }
+
+  /**
+   * Convert the temporary object names to properties.
+   */
+  def referredTempNamesToProps(
+      viewNames: Seq[Seq[String]],
+      functionsNames: Seq[String],
+      variableNames: Seq[Seq[String]]): Map[String, String] = {
+    val viewNamesJson =
+      JArray(viewNames.map(nameParts => JArray(nameParts.map(JString).toList)).toList)
+    val functionsNamesJson = JArray(functionsNames.map(JString).toList)
+    val variableNamesJson =
+      JArray(variableNames.map(nameParts => JArray(nameParts.map(JString).toList)).toList)
+
+    val props = new mutable.HashMap[String, String]
+    props.put(FUNCTION_REFERRED_TEMP_VIEW_NAMES, compact(render(viewNamesJson)))
+    props.put(FUNCTION_REFERRED_TEMP_FUNCTION_NAMES, compact(render(functionsNamesJson)))
+    props.put(FUNCTION_REFERRED_TEMP_VARIABLE_NAMES, compact(render(variableNamesJson)))
+    props.toMap
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index a0f7af10fefaf..b123952c5f086 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -19,25 +19,28 @@ package org.apache.spark.sql.catalyst.catalog
 
 import java.net.URI
 import java.util.Locale
-import java.util.concurrent.Callable
-import java.util.concurrent.TimeUnit
+import java.util.concurrent.{Callable, ExecutionException, TimeUnit}
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
 import scala.util.{Failure, Success, Try}
 
 import com.google.common.cache.{Cache, CacheBuilder}
+import com.google.common.util.concurrent.UncheckedExecutionException
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkThrowable}
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
-import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Expression, ExpressionInfo, NamedExpression, UpCast}
+import org.apache.spark.sql.catalyst.analysis.TableFunctionRegistry.TableFunctionBuilder
+import org.apache.spark.sql.catalyst.catalog.SQLFunction.parseDefault
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, Expression, ExpressionInfo, NamedArgumentExpression, NamedExpression, ScalarSubquery, UpCast}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface}
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, SubqueryAlias, View}
+import org.apache.spark.sql.catalyst.plans.logical.{FunctionSignature, InputParameter, LocalRelation, LogicalPlan, NamedParametersSupport, Project, SubqueryAlias, View}
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils}
 import org.apache.spark.sql.connector.catalog.CatalogManager
@@ -45,7 +48,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAM
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType}
 import org.apache.spark.sql.util.{CaseInsensitiveStringMap, PartitioningUtils}
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.Utils
@@ -210,7 +213,13 @@ class SessionCatalog(
 
   /** This method provides a way to get a cached plan. */
   def getCachedPlan(t: QualifiedTableName, c: Callable[LogicalPlan]): LogicalPlan = {
-    tableRelationCache.get(t, c)
+    try {
+      tableRelationCache.get(t, c)
+    } catch {
+      case e @ (_: ExecutionException | _: UncheckedExecutionException)
+          if e.getCause != null && e.getCause.isInstanceOf[SparkThrowable] =>
+        throw e.getCause
+    }
   }
 
   /** This method provides a way to get a cached plan if the key exists. */
@@ -1526,10 +1535,146 @@ class SessionCatalog(
     }
   }
 
+  /**
+   * Create a user defined function.
+   */
+  def createUserDefinedFunction(function: UserDefinedFunction, ignoreIfExists: Boolean): Unit = {
+    createFunction(function.toCatalogFunction, ignoreIfExists)
+  }
+
   // ----------------------------------------------------------------
   // | Methods that interact with temporary and metastore functions |
   // ----------------------------------------------------------------
 
+  /**
+   * Constructs a [[FunctionBuilder]] based on the provided class that represents a function.
+   */
+  private def makeSQLFunctionBuilder(function: SQLFunction): FunctionBuilder = {
+    if (function.isTableFunc) {
+      throw UserDefinedFunctionErrors.notAScalarFunction(function.name.nameParts)
+    }
+    (input: Seq[Expression]) => {
+      val args = rearrangeArguments(function.inputParam, input, function.name.toString)
+      val returnType = function.getScalarFuncReturnType
+      SQLFunctionExpression(
+        function.name.unquotedString, function, args, Some(returnType))
+    }
+  }
+
+  /**
+   * Constructs a scalar SQL function logical plan. The logical plan will be used to
+   * construct actual expression from the function inputs and body.
+   *
+   * The body of a scalar SQL function can either be an expression or a query returns
+   * one single column.
+   *
+   * Example scalar SQL function with an expression:
+   *
+   *   CREATE FUNCTION area(width DOUBLE, height DOUBLE) RETURNS DOUBLE
+   *   RETURN width * height;
+   *
+   * Query:
+   *
+   *   SELECT area(a, b) FROM t;
+   *
+   * SQL function plan:
+   *
+   *   Project [CAST(width * height AS DOUBLE) AS area]
+   *   +- Project [CAST(a AS DOUBLE) AS width, CAST(b AS DOUBLE) AS height]
+   *      +- LocalRelation [a, b]
+   *
+   * Example scalar SQL function with a subquery:
+   *
+   *   CREATE FUNCTION foo(x INT) RETURNS INT
+   *   RETURN SELECT SUM(b) FROM t WHERE x = a;
+   *
+   *   SELECT foo(a) FROM t;
+   *
+   * SQL function plan:
+   *
+   *   Project [scalar-subquery AS foo]
+   *   :  +- Aggregate [] [sum(b)]
+   *   :     +- Filter [outer(x) = a]
+   *   :        +- Relation [a, b]
+   *   +- Project [CAST(a AS INT) AS x]
+   *      +- LocalRelation [a, b]
+   */
+  def makeSQLFunctionPlan(
+      name: String,
+      function: SQLFunction,
+      input: Seq[Expression]): LogicalPlan = {
+    def metaForFuncInputAlias = {
+      new MetadataBuilder()
+        .putString("__funcInputAlias", "true")
+        .build()
+    }
+    assert(!function.isTableFunc)
+    val funcName = function.name.funcName
+
+    // Use captured SQL configs when parsing a SQL function.
+    val conf = new SQLConf()
+    function.getSQLConfigs.foreach { case (k, v) => conf.settings.put(k, v) }
+    SQLConf.withExistingConf(conf) {
+      val inputParam = function.inputParam
+      val returnType = function.getScalarFuncReturnType
+      val (expression, query) = function.getExpressionAndQuery(parser, isTableFunc = false)
+      assert(expression.isDefined || query.isDefined)
+
+      // Check function arguments
+      val paramSize = inputParam.map(_.size).getOrElse(0)
+      if (input.size > paramSize) {
+        throw QueryCompilationErrors.wrongNumArgsError(
+          name, paramSize.toString, input.size)
+      }
+
+      val inputs = inputParam.map { param =>
+        // Attributes referencing the input parameters inside the function can use the
+        // function name as a qualifier. E.G.:
+        // `create function foo(a int) returns int return foo.a`
+        val qualifier = Seq(funcName)
+        val paddedInput = input ++
+          param.takeRight(paramSize - input.size).map { p =>
+            val defaultExpr = p.getDefault()
+            if (defaultExpr.isDefined) {
+              Cast(parseDefault(defaultExpr.get, parser), p.dataType)
+            } else {
+              throw QueryCompilationErrors.wrongNumArgsError(
+                name, paramSize.toString, input.size)
+            }
+          }
+
+        paddedInput.zip(param.fields).map {
+          case (expr, param) =>
+            Alias(Cast(expr, param.dataType), param.name)(
+              qualifier = qualifier,
+              // mark the alias as function input
+              explicitMetadata = Some(metaForFuncInputAlias))
+        }
+      }.getOrElse(Nil)
+
+      val body = if (query.isDefined) ScalarSubquery(query.get) else expression.get
+      Project(Alias(Cast(body, returnType), funcName)() :: Nil,
+        Project(inputs, LocalRelation(inputs.flatMap(_.references))))
+    }
+  }
+
+  /**
+   * Constructs a [[TableFunctionBuilder]] based on the provided class that represents a function.
+   */
+  private def makeSQLTableFunctionBuilder(function: SQLFunction): TableFunctionBuilder = {
+    if (!function.isTableFunc) {
+      throw UserDefinedFunctionErrors.notATableFunction(function.name.nameParts)
+    }
+    (input: Seq[Expression]) => {
+      val args = rearrangeArguments(function.inputParam, input, function.name.toString)
+      val returnParam = function.getTableFuncReturnCols
+      val output = returnParam.fields.map { param =>
+        AttributeReference(param.name, param.dataType, param.nullable)()
+      }
+      SQLTableFunction(function.name.unquotedString, function, args, output.toSeq)
+    }
+  }
+
   /**
    * Constructs a [[FunctionBuilder]] based on the provided function metadata.
    */
@@ -1544,6 +1689,24 @@ class SessionCatalog(
     (input: Seq[Expression]) => functionExpressionBuilder.makeExpression(name, clazz, input)
   }
 
+  private def makeUserDefinedScalarFuncBuilder(func: UserDefinedFunction): FunctionBuilder = {
+    func match {
+      case f: SQLFunction => makeSQLFunctionBuilder(f)
+      case _ =>
+        val clsName = func.getClass.getSimpleName
+        throw UserDefinedFunctionErrors.unsupportedUserDefinedFunction(clsName)
+    }
+  }
+
+  private def makeUserDefinedTableFuncBuilder(func: UserDefinedFunction): TableFunctionBuilder = {
+    func match {
+      case f: SQLFunction => makeSQLTableFunctionBuilder(f)
+      case _ =>
+        val clsName = func.getClass.getSimpleName
+        throw UserDefinedFunctionErrors.unsupportedUserDefinedFunction(clsName)
+    }
+  }
+
   /**
    * Loads resources such as JARs and Files for a function. Every resource is represented
    * by a tuple (resource type, resource uri).
@@ -1591,6 +1754,81 @@ class SessionCatalog(
       "hive")
   }
 
+  /**
+   * Registers a temporary or persistent SQL scalar function into a session-specific
+   * [[FunctionRegistry]].
+   */
+  def registerSQLScalarFunction(
+      function: SQLFunction,
+      overrideIfExists: Boolean): Unit = {
+    registerUserDefinedFunction[Expression](
+      function,
+      overrideIfExists,
+      functionRegistry,
+      makeSQLFunctionBuilder(function))
+  }
+
+  /**
+   * Registers a temporary or persistent SQL table function into a session-specific
+   * [[TableFunctionRegistry]].
+   */
+  def registerSQLTableFunction(
+      function: SQLFunction,
+      overrideIfExists: Boolean): Unit = {
+    registerUserDefinedFunction[LogicalPlan](
+      function,
+      overrideIfExists,
+      tableFunctionRegistry,
+      makeSQLTableFunctionBuilder(function))
+  }
+
+  /**
+   * Rearranges the arguments of a UDF into positional order.
+   */
+  private def rearrangeArguments(
+      inputParams: Option[StructType],
+      expressions: Seq[Expression],
+      functionName: String) : Seq[Expression] = {
+    val firstNamedArgumentExpressionIdx =
+      expressions.indexWhere(_.isInstanceOf[NamedArgumentExpression])
+    if (firstNamedArgumentExpressionIdx == -1) {
+      return expressions
+    }
+
+    val paramNames: Seq[InputParameter] =
+      if (inputParams.isDefined) {
+        inputParams.get.map {
+          p => p.getDefault() match {
+            case Some(defaultExpr) =>
+              // This cast is needed to ensure the default value is of the target data type.
+              InputParameter(p.name, Some(Cast(parseDefault(defaultExpr, parser), p.dataType)))
+            case None =>
+              InputParameter(p.name)
+          }
+        }.toSeq
+      } else {
+        Seq()
+      }
+
+    NamedParametersSupport.defaultRearrange(
+      FunctionSignature(paramNames), expressions, functionName)
+  }
+
+  /**
+   * Registers a temporary or permanent SQL function into a session-specific function registry.
+   */
+  private def registerUserDefinedFunction[T](
+      function: UserDefinedFunction,
+      overrideIfExists: Boolean,
+      registry: FunctionRegistryBase[T],
+      functionBuilder: Seq[Expression] => T): Unit = {
+    if (registry.functionExists(function.name) && !overrideIfExists) {
+      throw QueryCompilationErrors.functionAlreadyExistsError(function.name)
+    }
+    val info = function.toExpressionInfo
+    registry.registerFunction(function.name, info, functionBuilder)
+  }
+
   /**
    * Unregister a temporary or permanent function from a session-specific [[FunctionRegistry]]
    * or [[TableFunctionRegistry]]. Return true if function exists.
@@ -1747,7 +1985,11 @@ class SessionCatalog(
         requireDbExists(db)
         if (externalCatalog.functionExists(db, funcName)) {
           val metadata = externalCatalog.getFunction(db, funcName)
-          makeExprInfoForHiveFunction(metadata.copy(identifier = qualifiedIdent))
+          if (metadata.isUserDefinedFunction) {
+            UserDefinedFunction.fromCatalogFunction(metadata, parser).toExpressionInfo
+          } else {
+            makeExprInfoForHiveFunction(metadata.copy(identifier = qualifiedIdent))
+          }
         } else {
           failFunctionLookup(name)
         }
@@ -1759,7 +2001,26 @@ class SessionCatalog(
    */
   def resolvePersistentFunction(
       name: FunctionIdentifier, arguments: Seq[Expression]): Expression = {
-    resolvePersistentFunctionInternal(name, arguments, functionRegistry, makeFunctionBuilder)
+    resolvePersistentFunctionInternal[Expression](
+      name,
+      arguments,
+      functionRegistry,
+      registerHiveFunc = func =>
+        registerFunction(
+          func,
+          overrideIfExists = false,
+          registry = functionRegistry,
+          functionBuilder = makeFunctionBuilder(func)
+        ),
+      registerUserDefinedFunc = function => {
+        val builder = makeUserDefinedScalarFuncBuilder(function)
+        registerUserDefinedFunction[Expression](
+          function = function,
+          overrideIfExists = false,
+          registry = functionRegistry,
+          functionBuilder = builder)
+      }
+    )
   }
 
   /**
@@ -1768,16 +2029,29 @@ class SessionCatalog(
   def resolvePersistentTableFunction(
       name: FunctionIdentifier,
       arguments: Seq[Expression]): LogicalPlan = {
-    // We don't support persistent table functions yet.
-    val builder = (func: CatalogFunction) => failFunctionLookup(name)
-    resolvePersistentFunctionInternal(name, arguments, tableFunctionRegistry, builder)
+    resolvePersistentFunctionInternal[LogicalPlan](
+      name,
+      arguments,
+      tableFunctionRegistry,
+      // We don't support persistent Hive table functions yet.
+      registerHiveFunc = (func: CatalogFunction) => failFunctionLookup(name),
+      registerUserDefinedFunc = function => {
+        val builder = makeUserDefinedTableFuncBuilder(function)
+        registerUserDefinedFunction[LogicalPlan](
+          function = function,
+          overrideIfExists = false,
+          registry = tableFunctionRegistry,
+          functionBuilder = builder)
+      }
+    )
   }
 
   private def resolvePersistentFunctionInternal[T](
       name: FunctionIdentifier,
       arguments: Seq[Expression],
       registry: FunctionRegistryBase[T],
-      createFunctionBuilder: CatalogFunction => FunctionRegistryBase[T]#FunctionBuilder): T = {
+      registerHiveFunc: CatalogFunction => Unit,
+      registerUserDefinedFunc: UserDefinedFunction => Unit): T = {
     // `synchronized` is used to prevent multiple threads from concurrently resolving the
     // same function that has not yet been loaded into the function registry. This is needed
     // because calling `registerFunction` twice with `overrideIfExists = false` can lead to
@@ -1793,19 +2067,24 @@ class SessionCatalog(
         // The function has not been loaded to the function registry, which means
         // that the function is a persistent function (if it actually has been registered
         // in the metastore). We need to first put the function in the function registry.
-        val catalogFunction = externalCatalog.getFunction(db, funcName)
-        loadFunctionResources(catalogFunction.resources)
+        val catalogFunction = try {
+          externalCatalog.getFunction(db, funcName)
+        } catch {
+          case _: AnalysisException => failFunctionLookup(qualifiedIdent)
+        }
         // Please note that qualifiedName is provided by the user. However,
         // catalogFunction.identifier.unquotedString is returned by the underlying
         // catalog. So, it is possible that qualifiedName is not exactly the same as
         // catalogFunction.identifier.unquotedString (difference is on case-sensitivity).
         // At here, we preserve the input from the user.
         val funcMetadata = catalogFunction.copy(identifier = qualifiedIdent)
-        registerFunction(
-          funcMetadata,
-          overrideIfExists = false,
-          registry = registry,
-          functionBuilder = createFunctionBuilder(funcMetadata))
+        if (!catalogFunction.isUserDefinedFunction) {
+          loadFunctionResources(catalogFunction.resources)
+          registerHiveFunc(funcMetadata)
+        } else {
+          val function = UserDefinedFunction.fromCatalogFunction(funcMetadata, parser)
+          registerUserDefinedFunc(function)
+        }
         // Now, we need to create the Expression.
         registry.lookupFunction(qualifiedIdent, arguments)
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/TempVariableManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/TempVariableManager.scala
index abe6cede0c550..2c262da1f4449 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/TempVariableManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/TempVariableManager.scala
@@ -63,6 +63,10 @@ class TempVariableManager extends DataTypeErrorsBase {
   def clear(): Unit = synchronized {
     variables.clear()
   }
+
+  def isEmpty: Boolean = synchronized {
+    variables.isEmpty
+  }
 }
 
 case class VariableDefinition(defaultValueSQL: String, currentValue: Literal)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala
new file mode 100644
index 0000000000000..a76ca7b15c278
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.catalog
+
+import com.fasterxml.jackson.annotation.JsonInclude.Include
+import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
+import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule}
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
+import org.apache.spark.sql.catalyst.parser.ParserInterface
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.types.{DataType, StructType}
+
+/**
+ * The base class for all user defined functions registered via SQL.
+ */
+trait UserDefinedFunction {
+
+  /**
+   * Qualified name of the function
+   */
+  def name: FunctionIdentifier
+
+  /**
+   * Additional properties to be serialized for the function.
+   * Use this to preserve the runtime configuration that should be used during the function
+   * execution, such as SQL configs etc. See [[SQLConf]] for more info.
+   */
+  def properties: Map[String, String]
+
+  /**
+   * Get SQL configs from the function properties.
+   * Use this to restore the SQL configs that should be used for this function.
+   */
+  def getSQLConfigs: Map[String, String] = {
+    UserDefinedFunction.propertiesToSQLConfigs(properties)
+  }
+
+  /**
+   * Owner of the function
+   */
+  def owner: Option[String]
+
+  /**
+   * Function creation time in milliseconds since the linux epoch
+   */
+  def createTimeMs: Long
+
+  /**
+   * The language of the user defined function.
+   */
+  def language: RoutineLanguage
+
+  /**
+   * Convert the function to a [[CatalogFunction]].
+   */
+  def toCatalogFunction: CatalogFunction
+
+  /**
+   * Convert the SQL function to an [[ExpressionInfo]].
+   */
+  def toExpressionInfo: ExpressionInfo
+}
+
+object UserDefinedFunction {
+  val SQL_CONFIG_PREFIX = "sqlConfig."
+  val INDEX_LENGTH: Int = 3
+
+  // The default Hive Metastore SQL schema length for function resource uri.
+  private val HIVE_FUNCTION_RESOURCE_URI_LENGTH_THRESHOLD: Int = 4000
+
+  def parseTableSchema(text: String, parser: ParserInterface): StructType = {
+    val parsed = parser.parseTableSchema(text)
+    CharVarcharUtils.failIfHasCharVarchar(parsed).asInstanceOf[StructType]
+  }
+
+  def parseDataType(text: String, parser: ParserInterface): DataType = {
+    val dataType = parser.parseDataType(text)
+    CharVarcharUtils.failIfHasCharVarchar(dataType)
+  }
+
+  private val _mapper: ObjectMapper = getObjectMapper
+
+  /**
+   * A shared [[ObjectMapper]] for serializations.
+   */
+  def mapper: ObjectMapper = _mapper
+
+  /**
+   * Convert the given properties to a list of function resources.
+   */
+  def propertiesToFunctionResources(
+      props: Map[String, String],
+      name: FunctionIdentifier): Seq[FunctionResource] = {
+    val blob = mapper.writeValueAsString(props)
+    val threshold = HIVE_FUNCTION_RESOURCE_URI_LENGTH_THRESHOLD - INDEX_LENGTH
+    blob.grouped(threshold).zipWithIndex.map { case (part, i) =>
+      // Add a sequence number to the part and pad it to a given length.
+      // E.g. 1 will become "001" if the given length is 3.
+      val index = s"%0${INDEX_LENGTH}d".format(i)
+      if (index.length > INDEX_LENGTH) {
+        throw UserDefinedFunctionErrors.routinePropertyTooLarge(name.funcName)
+      }
+      FunctionResource(FileResource, index + part)
+    }.toSeq
+  }
+
+  /**
+   * Get a object mapper to serialize and deserialize function properties.
+   */
+  private def getObjectMapper: ObjectMapper = {
+    val mapper = new ObjectMapper with ClassTagExtensions
+    mapper.setSerializationInclusion(Include.NON_ABSENT)
+    mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+    mapper.registerModule(DefaultScalaModule)
+    mapper
+  }
+
+  /**
+   * Convert a [[CatalogFunction]] into a corresponding UDF.
+   */
+  def fromCatalogFunction(function: CatalogFunction, parser: ParserInterface)
+  : UserDefinedFunction = {
+    val className = function.className
+    if (SQLFunction.isSQLFunction(className)) {
+      SQLFunction.fromCatalogFunction(function, parser)
+    } else {
+      throw SparkException.internalError(s"Unsupported function type $className")
+    }
+  }
+
+  /**
+   * Verify if the function is a [[UserDefinedFunction]].
+   */
+  def isUserDefinedFunction(className: String): Boolean = SQLFunction.isSQLFunction(className)
+
+  /**
+   * Covert properties to SQL configs.
+   */
+  def propertiesToSQLConfigs(properties: Map[String, String]): Map[String, String] = {
+    try {
+      for ((key, value) <- properties if key.startsWith(SQL_CONFIG_PREFIX))
+        yield (key.substring(SQL_CONFIG_PREFIX.length), value)
+    } catch {
+      case e: Exception => throw SparkException.internalError(
+        "Corrupted user defined function SQL configs in catalog", cause = e)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunctionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunctionErrors.scala
index a5381669caea8..904a17bc8ce44 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunctionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunctionErrors.scala
@@ -18,10 +18,12 @@
 package org.apache.spark.sql.catalyst.catalog
 
 import org.apache.spark.SparkException
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.errors.QueryErrorsBase
 
 /**
- * Errors during registering and executing [[UserDefinedFunction]]s.
+ * Errors during registering and executing
+ * [[org.apache.spark.sql.expressions.UserDefinedFunction]]s.
  */
 object UserDefinedFunctionErrors extends QueryErrorsBase {
   def unsupportedUserDefinedFunction(language: RoutineLanguage): Throwable = {
@@ -31,4 +33,86 @@ object UserDefinedFunctionErrors extends QueryErrorsBase {
   def unsupportedUserDefinedFunction(language: String): Throwable = {
     SparkException.internalError(s"Unsupported user defined function type: $language")
   }
+
+  def duplicateParameterNames(routineName: String, names: String): Throwable = {
+    new AnalysisException(
+      errorClass = "DUPLICATE_ROUTINE_PARAMETER_NAMES",
+      messageParameters = Map("routineName" -> routineName, "names" -> names))
+  }
+
+  def duplicateReturnsColumns(routineName: String, columns: String): Throwable = {
+    new AnalysisException(
+      errorClass = "DUPLICATE_ROUTINE_RETURNS_COLUMNS",
+      messageParameters = Map("routineName" -> routineName, "columns" -> columns))
+  }
+
+  def cannotSpecifyNotNullOnFunctionParameters(input: String): Throwable = {
+    new AnalysisException(
+      errorClass = "USER_DEFINED_FUNCTIONS.NOT_NULL_ON_FUNCTION_PARAMETERS",
+      messageParameters = Map("input" -> input))
+  }
+
+  def bodyIsNotAQueryForSqlTableUdf(functionName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "USER_DEFINED_FUNCTIONS.SQL_TABLE_UDF_BODY_MUST_BE_A_QUERY",
+      messageParameters = Map("name" -> functionName))
+  }
+
+  def missingColumnNamesForSqlTableUdf(functionName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "USER_DEFINED_FUNCTIONS.SQL_TABLE_UDF_MISSING_COLUMN_NAMES",
+      messageParameters = Map("functionName" -> toSQLId(functionName)))
+  }
+
+  def invalidTempViewReference(routineName: Seq[String], tempViewName: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_TEMP_OBJ_REFERENCE",
+      messageParameters = Map(
+        "obj" -> "FUNCTION",
+        "objName" -> toSQLId(routineName),
+        "tempObj" -> "VIEW",
+        "tempObjName" -> toSQLId(tempViewName)
+      )
+    )
+  }
+
+  def invalidTempFuncReference(routineName: Seq[String], tempFuncName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_TEMP_OBJ_REFERENCE",
+      messageParameters = Map(
+        "obj" -> "FUNCTION",
+        "objName" -> toSQLId(routineName),
+        "tempObj" -> "FUNCTION",
+        "tempObjName" -> toSQLId(tempFuncName)
+      )
+    )
+  }
+
+  def invalidTempVarReference(routineName: Seq[String], varName: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_TEMP_OBJ_REFERENCE",
+      messageParameters = Map(
+        "obj" -> "FUNCTION",
+        "objName" -> toSQLId(routineName),
+        "tempObj" -> "VARIABLE",
+        "tempObjName" -> toSQLId(varName)))
+  }
+
+  def routinePropertyTooLarge(routineName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "USER_DEFINED_FUNCTIONS.ROUTINE_PROPERTY_TOO_LARGE",
+      messageParameters = Map("name" -> toSQLId(routineName)))
+  }
+
+  def notAScalarFunction(functionName: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "NOT_A_SCALAR_FUNCTION",
+      messageParameters = Map("functionName" -> toSQLId(functionName)))
+  }
+
+  def notATableFunction(functionName: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "NOT_A_TABLE_FUNCTION",
+      messageParameters = Map("functionName" -> toSQLId(functionName)))
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index dcd1d3137da3f..7836e533c8b5c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.catalog
 
 import java.net.URI
 import java.time.{ZoneId, ZoneOffset}
-import java.util.Date
 
 import scala.collection.mutable
 import scala.util.control.NonFatal
@@ -28,7 +27,7 @@ import com.fasterxml.jackson.annotation.JsonInclude.Include
 import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
 import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule}
 import org.apache.commons.lang3.StringUtils
-import org.json4s.JsonAST.{JArray, JString}
+import org.json4s.JsonAST.{JArray, JBool, JDouble, JInt, JNull, JObject, JString, JValue}
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkException
@@ -51,6 +50,52 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.{CaseInsensitiveStringMap, SchemaUtils}
 import org.apache.spark.util.ArrayImplicits._
 
+/**
+ * Interface providing util to convert JValue to String representation of catalog entities.
+ */
+trait MetadataMapSupport {
+  def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue]
+
+  def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
+    jsonToString(toJsonLinkedHashMap)
+  }
+
+  protected def jsonToString(
+      jsonMap: mutable.LinkedHashMap[String, JValue]): mutable.LinkedHashMap[String, String] = {
+    val map = new mutable.LinkedHashMap[String, String]()
+    jsonMap.foreach { case (key, jValue) =>
+      val stringValue = jValue match {
+        case JString(value) => value
+        case JArray(values) =>
+          values.map(_.values)
+            .map {
+              case str: String => quoteIdentifier(str)
+              case other => other.toString
+            }
+            .mkString("[", ", ", "]")
+        case JObject(fields) =>
+          fields.map { case (k, v) =>
+            s"$k=${v.values.toString}"
+          }
+            .mkString("[", ", ", "]")
+        case JInt(value) => value.toString
+        case JDouble(value) => value.toString
+        case _ => jValue.values.toString
+      }
+      map.put(key, stringValue)
+    }
+    map
+  }
+
+  val timestampFormatter = new Iso8601TimestampFormatter(
+    pattern = "yyyy-MM-dd'T'HH:mm:ss'Z'",
+    zoneId = ZoneId.of("UTC"),
+    locale = DateFormatter.defaultLocale,
+    legacyFormat = LegacyDateFormats.LENIENT_SIMPLE_DATE_FORMAT,
+    isParsing = true
+  )
+}
+
 
 /**
  * A function defined in the catalog.
@@ -62,7 +107,9 @@ import org.apache.spark.util.ArrayImplicits._
 case class CatalogFunction(
     identifier: FunctionIdentifier,
     className: String,
-    resources: Seq[FunctionResource])
+    resources: Seq[FunctionResource]) {
+  val isUserDefinedFunction: Boolean = UserDefinedFunction.isUserDefinedFunction(className)
+}
 
 
 /**
@@ -74,25 +121,31 @@ case class CatalogStorageFormat(
     outputFormat: Option[String],
     serde: Option[String],
     compressed: Boolean,
-    properties: Map[String, String]) {
+    properties: Map[String, String]) extends MetadataMapSupport {
 
   override def toString: String = {
-    toLinkedHashMap.map { case ((key, value)) =>
+    toLinkedHashMap.map { case (key, value) =>
       if (value.isEmpty) key else s"$key: $value"
     }.mkString("Storage(", ", ", ")")
   }
 
-  def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
-    val map = new mutable.LinkedHashMap[String, String]()
-    locationUri.foreach(l => map.put("Location", l.toString))
-    serde.foreach(map.put("Serde Library", _))
-    inputFormat.foreach(map.put("InputFormat", _))
-    outputFormat.foreach(map.put("OutputFormat", _))
-    if (compressed) map.put("Compressed", "")
+  def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = {
+    val map = mutable.LinkedHashMap[String, JValue]()
+
+    locationUri.foreach(l => map += ("Location" -> JString(l.toString)))
+    serde.foreach(s => map += ("Serde Library" -> JString(s)))
+    inputFormat.foreach(format => map += ("InputFormat" -> JString(format)))
+    outputFormat.foreach(format => map += ("OutputFormat" -> JString(format)))
+
+    if (compressed) map += ("Compressed" -> JBool(true))
+
     SQLConf.get.redactOptions(properties) match {
       case props if props.isEmpty => // No-op
       case props =>
-        map.put("Storage Properties", props.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]"))
+        val storagePropsJson = JObject(
+          props.map { case (k, v) => k -> JString(v) }.toList
+        )
+        map += ("Storage Properties" -> storagePropsJson)
     }
     map
   }
@@ -120,35 +173,46 @@ case class CatalogTablePartition(
     parameters: Map[String, String] = Map.empty,
     createTime: Long = System.currentTimeMillis,
     lastAccessTime: Long = -1,
-    stats: Option[CatalogStatistics] = None) {
+    stats: Option[CatalogStatistics] = None) extends MetadataMapSupport {
+  def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = {
+    val map = mutable.LinkedHashMap[String, JValue]()
 
-  def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
-    val map = new mutable.LinkedHashMap[String, String]()
-    val specString = spec.map { case (k, v) => s"$k=$v" }.mkString(", ")
-    map.put("Partition Values", s"[$specString]")
-    map ++= storage.toLinkedHashMap
-    if (parameters.nonEmpty) {
-      map.put("Partition Parameters", s"{" +
-        s"${SQLConf.get.redactOptions(parameters).map(p => p._1 + "=" + p._2).mkString(", ")}}")
+    val specJson = JObject(spec.map { case (k, v) => k -> JString(v) }.toList)
+    map += ("Partition Values" -> specJson)
+
+    storage.toJsonLinkedHashMap.foreach { case (k, v) =>
+      map += (k -> v)
     }
-    map.put("Created Time", new Date(createTime).toString)
-    val lastAccess = {
-      if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString
+
+    if (parameters.nonEmpty) {
+      val paramsJson = JObject(SQLConf.get.redactOptions(parameters).map {
+        case (k, v) => k -> JString(v)
+      }.toList)
+      map += ("Partition Parameters" -> paramsJson)
     }
-    map.put("Last Access", lastAccess)
-    stats.foreach(s => map.put("Partition Statistics", s.simpleString))
+
+    map += ("Created Time" -> JString(
+      timestampFormatter.format(DateTimeUtils.millisToMicros(createTime))))
+
+    val lastAccess = if (lastAccessTime <= 0) JString("UNKNOWN")
+    else JString(
+      timestampFormatter.format(DateTimeUtils.millisToMicros(createTime)))
+    map += ("Last Access" -> lastAccess)
+
+    stats.foreach(s => map += ("Partition Statistics" -> JString(s.simpleString)))
+
     map
   }
 
   override def toString: String = {
-    toLinkedHashMap.map { case ((key, value)) =>
+    toLinkedHashMap.map { case (key, value) =>
       if (value.isEmpty) key else s"$key: $value"
     }.mkString("CatalogPartition(\n\t", "\n\t", ")")
   }
 
   /** Readable string representation for the CatalogTablePartition. */
   def simpleString: String = {
-    toLinkedHashMap.map { case ((key, value)) =>
+    toLinkedHashMap.map { case (key, value) =>
       if (value.isEmpty) key else s"$key: $value"
     }.mkString("", "\n", "")
   }
@@ -284,7 +348,7 @@ object ClusterBySpec {
 case class BucketSpec(
     numBuckets: Int,
     bucketColumnNames: Seq[String],
-    sortColumnNames: Seq[String]) extends SQLConfHelper {
+    sortColumnNames: Seq[String]) extends SQLConfHelper with MetadataMapSupport {
 
   if (numBuckets <= 0 || numBuckets > conf.bucketingMaxBuckets) {
     throw QueryCompilationErrors.invalidBucketNumberError(
@@ -301,11 +365,11 @@ case class BucketSpec(
     s"$numBuckets buckets, $bucketString$sortString"
   }
 
-  def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
-    mutable.LinkedHashMap[String, String](
-      "Num Buckets" -> numBuckets.toString,
-      "Bucket Columns" -> bucketColumnNames.map(quoteIdentifier).mkString("[", ", ", "]"),
-      "Sort Columns" -> sortColumnNames.map(quoteIdentifier).mkString("[", ", ", "]")
+  def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = {
+    mutable.LinkedHashMap[String, JValue](
+      "Num Buckets" -> JInt(numBuckets),
+      "Bucket Columns" -> JArray(bucketColumnNames.map(JString).toList),
+      "Sort Columns" -> JArray(sortColumnNames.map(JString).toList)
     )
   }
 }
@@ -350,11 +414,12 @@ case class CatalogTable(
     stats: Option[CatalogStatistics] = None,
     viewText: Option[String] = None,
     comment: Option[String] = None,
+    collation: Option[String] = None,
     unsupportedFeatures: Seq[String] = Seq.empty,
     tracksPartitionsInCatalog: Boolean = false,
     schemaPreservesCase: Boolean = true,
     ignoredProperties: Map[String, String] = Map.empty,
-    viewOriginalText: Option[String] = None) {
+    viewOriginalText: Option[String] = None) extends MetadataMapSupport {
 
   import CatalogTable._
 
@@ -523,65 +588,81 @@ case class CatalogTable(
       locationUri, inputFormat, outputFormat, serde, compressed, properties))
   }
 
+  def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = {
+    val filteredTableProperties = SQLConf.get
+      .redactOptions(properties.filter { case (k, v) =>
+        !k.startsWith(VIEW_PREFIX) && v.nonEmpty
+      })
 
-  def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
-    val map = new mutable.LinkedHashMap[String, String]()
-    val tableProperties =
-      SQLConf.get.redactOptions(properties.filter { case (k, _) => !k.startsWith(VIEW_PREFIX) })
-        .toSeq.sortBy(_._1)
-        .map(p => p._1 + "=" + p._2)
-    val partitionColumns = partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]")
-    val lastAccess = {
-      if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString
+    val tableProperties: JValue =
+      if (filteredTableProperties.isEmpty) JNull
+      else JObject(
+        filteredTableProperties.toSeq.sortBy(_._1).map { case (k, v) => k -> JString(v) }: _*)
+
+    val partitionColumns: JValue =
+      if (partitionColumnNames.nonEmpty) JArray(partitionColumnNames.map(JString).toList)
+      else JNull
+
+    val lastAccess: JValue =
+      if (lastAccessTime <= 0) JString("UNKNOWN")
+      else JString(timestampFormatter.format(DateTimeUtils.millisToMicros(createTime)))
+
+    val viewQueryOutputColumns: JValue =
+      if (viewQueryColumnNames.nonEmpty) JArray(viewQueryColumnNames.map(JString).toList)
+      else JNull
+
+    val map = mutable.LinkedHashMap[String, JValue]()
+
+    if (identifier.catalog.isDefined) map += "Catalog" -> JString(identifier.catalog.get)
+    if (identifier.database.isDefined) map += "Database" -> JString(identifier.database.get)
+    map += "Table" -> JString(identifier.table)
+    if (Option(owner).exists(_.nonEmpty)) map += "Owner" -> JString(owner)
+    map += "Created Time" ->
+      JString(timestampFormatter.format(DateTimeUtils.millisToMicros(createTime)))
+    if (lastAccess != JNull) map += "Last Access" -> lastAccess
+    map += "Created By" -> JString(s"Spark $createVersion")
+    map += "Type" -> JString(tableType.name)
+    if (provider.isDefined) map += "Provider" -> JString(provider.get)
+    bucketSpec.foreach { spec =>
+      map ++= spec.toJsonLinkedHashMap.map { case (k, v) => k -> v }
     }
-
-    identifier.catalog.foreach(map.put("Catalog", _))
-    identifier.database.foreach(map.put("Database", _))
-    map.put("Table", identifier.table)
-    if (owner != null && owner.nonEmpty) map.put("Owner", owner)
-    map.put("Created Time", new Date(createTime).toString)
-    map.put("Last Access", lastAccess)
-    map.put("Created By", "Spark " + createVersion)
-    map.put("Type", tableType.name)
-    provider.foreach(map.put("Provider", _))
-    bucketSpec.foreach(map ++= _.toLinkedHashMap)
-    comment.foreach(map.put("Comment", _))
-    if (tableType == CatalogTableType.VIEW) {
-      viewText.foreach(map.put("View Text", _))
-      viewOriginalText.foreach(map.put("View Original Text", _))
-      if (SQLConf.get.viewSchemaBindingEnabled) {
-        map.put("View Schema Mode", viewSchemaMode.toString)
-      }
-      if (viewCatalogAndNamespace.nonEmpty) {
-        import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-        map.put("View Catalog and Namespace", viewCatalogAndNamespace.quoted)
-      }
-      if (viewQueryColumnNames.nonEmpty) {
-        map.put("View Query Output Columns", viewQueryColumnNames.mkString("[", ", ", "]"))
-      }
+    if (comment.isDefined) map += "Comment" -> JString(comment.get)
+    if (collation.isDefined) map += "Collation" -> JString(collation.get)
+    if (tableType == CatalogTableType.VIEW && viewText.isDefined) {
+      map += "View Text" -> JString(viewText.get)
     }
-
-    if (tableProperties.nonEmpty) {
-      map.put("Table Properties", tableProperties.mkString("[", ", ", "]"))
+    if (tableType == CatalogTableType.VIEW && viewOriginalText.isDefined) {
+      map += "View Original Text" -> JString(viewOriginalText.get)
     }
-    stats.foreach(s => map.put("Statistics", s.simpleString))
-    map ++= storage.toLinkedHashMap
-    if (tracksPartitionsInCatalog) map.put("Partition Provider", "Catalog")
-    if (partitionColumnNames.nonEmpty) map.put("Partition Columns", partitionColumns)
-    if (schema.nonEmpty) map.put("Schema", schema.treeString)
-
-    map
+    if (SQLConf.get.viewSchemaBindingEnabled && tableType == CatalogTableType.VIEW) {
+      map += "View Schema Mode" -> JString(viewSchemaMode.toString)
+    }
+    if (viewCatalogAndNamespace.nonEmpty && tableType == CatalogTableType.VIEW) {
+      import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+      map += "View Catalog and Namespace" -> JString(viewCatalogAndNamespace.quoted)
+    }
+    if (viewQueryOutputColumns != JNull) {
+      map += "View Query Output Columns" -> viewQueryOutputColumns
+    }
+    if (tableProperties != JNull) map += "Table Properties" -> tableProperties
+    if (stats.isDefined) map += "Statistics" -> JString(stats.get.simpleString)
+    map ++= storage.toJsonLinkedHashMap.map { case (k, v) => k -> v }
+    if (tracksPartitionsInCatalog) map += "Partition Provider" -> JString("Catalog")
+    if (partitionColumns != JNull) map += "Partition Columns" -> partitionColumns
+    if (schema.nonEmpty) map += "Schema" -> JString(schema.treeString)
+
+    map.filterNot(_._2 == JNull)
   }
 
   override def toString: String = {
-    toLinkedHashMap.map { case ((key, value)) =>
+    toLinkedHashMap.map { case (key, value) =>
       if (value.isEmpty) key else s"$key: $value"
     }.mkString("CatalogTable(\n", "\n", ")")
   }
 
   /** Readable string representation for the CatalogTable. */
   def simpleString: String = {
-    toLinkedHashMap.map { case ((key, value)) =>
+    toLinkedHashMap.map { case (key, value) =>
       if (value.isEmpty) key else s"$key: $value"
     }.mkString("", "\n", "")
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index 5a23d6f7a3ccb..6c68bc1aa5890 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -103,6 +103,16 @@ class CSVOptions(
 
   val delimiter = CSVExprUtils.toDelimiterStr(
     parameters.getOrElse(SEP, parameters.getOrElse(DELIMITER, ",")))
+
+  val extension = {
+    val ext = parameters.getOrElse(EXTENSION, "csv")
+    if (ext.size != 3 && !ext.forall(_.isLetter)) {
+      throw QueryExecutionErrors.invalidFileExtensionError(EXTENSION, ext)
+    }
+
+    ext
+  }
+
   val parseMode: ParseMode =
     parameters.get(MODE).map(ParseMode.fromString).getOrElse(PermissiveMode)
   val charset = parameters.get(ENCODING).orElse(parameters.get(CHARSET))
@@ -385,6 +395,7 @@ object CSVOptions extends DataSourceOptions {
   val NEGATIVE_INF = newOption("negativeInf")
   val TIME_ZONE = newOption("timeZone")
   val UNESCAPED_QUOTE_HANDLING = newOption("unescapedQuoteHandling")
+  val EXTENSION = newOption("extension")
   // Options with alternative
   val ENCODING = "encoding"
   val CHARSET = "charset"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index f2f86a90d5172..5f0b42fec0fa8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -87,7 +87,8 @@ object ExpressionEncoder {
       }
       constructProjection(row).get(0, anyObjectType).asInstanceOf[T]
     } catch {
-      case e: SparkRuntimeException if e.getCondition == "NOT_NULL_ASSERT_VIOLATION" =>
+      case e: SparkRuntimeException if e.getCondition == "NOT_NULL_ASSERT_VIOLATION" ||
+        e.getCondition == "EXCEED_LIMIT_LENGTH" =>
         throw e
       case e: Exception =>
         throw QueryExecutionErrors.expressionDecodingError(e, expressions)
@@ -115,7 +116,8 @@ object ExpressionEncoder {
       inputRow(0) = t
       extractProjection(inputRow)
     } catch {
-      case e: SparkRuntimeException if e.getCondition == "NOT_NULL_ASSERT_VIOLATION" =>
+      case e: SparkRuntimeException if e.getCondition == "NOT_NULL_ASSERT_VIOLATION" ||
+        e.getCondition == "EXCEED_LIMIT_LENGTH" =>
         throw e
       case e: Exception =>
         throw QueryExecutionErrors.expressionEncodingError(e, expressions)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
index d38ee01485288..4eb14fb9e7b86 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
@@ -115,7 +115,7 @@ case class CallMethodViaReflection(
               "requiredType" -> toSQLType(
                 TypeCollection(BooleanType, ByteType, ShortType,
                   IntegerType, LongType, FloatType, DoubleType,
-                  StringTypeWithCollation)),
+                  StringTypeWithCollation(supportsTrimCollation = true))),
               "inputSql" -> toSQLExpr(e),
               "inputType" -> toSQLType(e.dataType))
           )
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 154199d37c46d..8773d7a6a029e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -281,7 +281,7 @@ object Cast extends QueryErrorsBase {
   def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match {
     case (VariantType, _) => true
     case (_: StringType, TimestampType) => true
-    case (TimestampType, StringType) => true
+    case (TimestampType, _: StringType) => true
     case (DateType, TimestampType) => true
     case (TimestampType, DateType) => true
     case (TimestampType, TimestampNTZType) => true
@@ -565,6 +565,11 @@ case class Cast(
     }
   }
 
+  private lazy val castArgs = variant.VariantCastArgs(
+    evalMode != EvalMode.TRY,
+    timeZoneId,
+    zoneId)
+
   def needsTimeZone: Boolean = Cast.needsTimeZone(child.dataType, dataType)
 
   // [[func]] assumes the input is no longer null because eval already does the null check.
@@ -1120,13 +1125,13 @@ case class Cast(
       _ => throw QueryExecutionErrors.cannotCastFromNullTypeError(to)
     } else if (from.isInstanceOf[VariantType]) {
       buildCast[VariantVal](_, v => {
-        variant.VariantGet.cast(v, to, evalMode != EvalMode.TRY, timeZoneId, zoneId)
+        variant.VariantGet.cast(v, to, castArgs)
       })
     } else {
       to match {
         case dt if dt == from => identity[Any]
         case VariantType => input => variant.VariantExpressionEvalUtils.castToVariant(input, from)
-        case _: StringType => castToString(from)
+        case s: StringType => castToString(from, s.constraint)
         case BinaryType => castToBinary(from)
         case DateType => castToDate(from)
         case decimal: DecimalType => castToDecimal(from, decimal)
@@ -1218,12 +1223,10 @@ case class Cast(
     case _ if from.isInstanceOf[VariantType] => (c, evPrim, evNull) =>
       val tmp = ctx.freshVariable("tmp", classOf[Object])
       val dataTypeArg = ctx.addReferenceObj("dataType", to)
-      val zoneStrArg = ctx.addReferenceObj("zoneStr", timeZoneId)
-      val zoneIdArg = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
-      val failOnError = evalMode != EvalMode.TRY
+      val castArgsArg = ctx.addReferenceObj("castArgs", castArgs)
       val cls = classOf[variant.VariantGet].getName
       code"""
-        Object $tmp = $cls.cast($c, $dataTypeArg, $failOnError, $zoneStrArg, $zoneIdArg);
+        Object $tmp = $cls.cast($c, $dataTypeArg, $castArgsArg);
         if ($tmp == null) {
           $evNull = true;
         } else {
@@ -1234,7 +1237,8 @@ case class Cast(
       val cls = variant.VariantExpressionEvalUtils.getClass.getName.stripSuffix("$")
       val fromArg = ctx.addReferenceObj("from", from)
       (c, evPrim, evNull) => code"$evPrim = $cls.castToVariant($c, $fromArg);"
-    case _: StringType => (c, evPrim, _) => castToStringCode(from, ctx).apply(c, evPrim)
+    case s: StringType =>
+      (c, evPrim, _) => castToStringCode(from, ctx, s.constraint).apply(c, evPrim)
     case BinaryType => castToBinaryCode(from)
     case DateType => castToDateCode(from, ctx)
     case decimal: DecimalType => castToDecimalCode(from, decimal, ctx)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
index e65a0200b064f..8b7d641828ba1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
@@ -61,7 +61,9 @@ object ExprUtils extends EvalHelper with QueryErrorsBase {
 
   def convertToMapData(exp: Expression): Map[String, String] = exp match {
     case m: CreateMap
-      if AbstractMapType(StringTypeWithCollation, StringTypeWithCollation)
+      if AbstractMapType(
+        StringTypeWithCollation(supportsTrimCollation = true),
+        StringTypeWithCollation(supportsTrimCollation = true))
         .acceptsType(m.dataType) =>
       val arrayMap = m.eval().asInstanceOf[ArrayBasedMapData]
       ArrayBasedMapData.toScalaMap(arrayMap).map { case (key, value) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index c454799852826..4c83f92509ecd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.{BinaryLike, CurrentOrigin, LeafLike, QuaternaryLike, TernaryLike, TreeNode, UnaryLike}
-import org.apache.spark.sql.catalyst.trees.TreePattern.{LAZY_ANALYSIS_EXPRESSION, RUNTIME_REPLACEABLE, TreePattern}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{RUNTIME_REPLACEABLE, TreePattern}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
@@ -410,20 +410,6 @@ trait Unevaluable extends Expression with FoldableUnevaluable {
   final override def foldable: Boolean = false
 }
 
-/**
- * An expression that cannot be analyzed. These expressions don't live analysis time or after
- * and should not be evaluated during query planning and execution.
- */
-trait LazyAnalysisExpression extends Expression {
-  final override lazy val resolved = false
-
-  final override val nodePatterns: Seq[TreePattern] =
-    Seq(LAZY_ANALYSIS_EXPRESSION) ++ nodePatternsInternal()
-
-  // Subclasses can override this function to provide more TreePatterns.
-  def nodePatternsInternal(): Seq[TreePattern] = Seq()
-}
-
 /**
  * An expression that gets replaced at runtime (currently by the optimizer) into a different
  * expression for evaluation. This is mainly used to provide compatibility with other databases.
@@ -1368,19 +1354,24 @@ trait UserDefinedExpression {
 }
 
 trait CommutativeExpression extends Expression {
-  /** Collects adjacent commutative operations. */
-  private def gatherCommutative(
+  /**
+   * Collects adjacent commutative operations.
+   *
+   * Exposed for testing
+   */
+  private[spark] def gatherCommutative(
       e: Expression,
       f: PartialFunction[CommutativeExpression, Seq[Expression]]): Seq[Expression] = {
     val resultBuffer = scala.collection.mutable.Buffer[Expression]()
-    val stack = scala.collection.mutable.Stack[Expression](e)
+    val queue = scala.collection.mutable.Queue[Expression](e)
 
     // [SPARK-49977]: Use iterative approach to avoid creating many temporary List objects
     // for deep expression trees through recursion.
-    while (stack.nonEmpty) {
-      stack.pop() match {
+    while (queue.nonEmpty) {
+      val current = queue.dequeue()
+      current match {
         case c: CommutativeExpression if f.isDefinedAt(c) =>
-          stack.pushAll(f(c))
+          queue ++= f(c)
         case other =>
           resultBuffer += other.canonicalized
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
index 130b4ee4c8cac..de72b94df3ac5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
@@ -22,7 +22,7 @@ import java.time.ZoneOffset
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.util.{ArrayData, DateFormatter, IntervalStringStyles, IntervalUtils, MapData, SparkStringUtils, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.{ArrayData, CharVarcharCodegenUtils, DateFormatter, IntervalStringStyles, IntervalUtils, MapData, SparkStringUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.BinaryOutputStyle
@@ -53,7 +53,17 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
     i => func(i.asInstanceOf[T])
 
   // Returns a function to convert a value to pretty string. The function assumes input is not null.
-  protected final def castToString(from: DataType): Any => UTF8String = from match {
+  protected final def castToString(
+      from: DataType, to: StringConstraint = NoConstraint): Any => UTF8String =
+    to match {
+      case FixedLength(length) =>
+        s => CharVarcharCodegenUtils.charTypeWriteSideCheck(castToString(from)(s), length)
+      case MaxLength(length) =>
+        s => CharVarcharCodegenUtils.varcharTypeWriteSideCheck(castToString(from)(s), length)
+      case NoConstraint => castToString(from)
+    }
+
+  private def castToString(from: DataType): Any => UTF8String = from match {
     case CalendarIntervalType =>
       acceptAny[CalendarInterval](i => UTF8String.fromString(i.toString))
     case BinaryType => acceptAny[Array[Byte]](binaryFormatter.apply)
@@ -167,8 +177,31 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
 
   // Returns a function to generate code to convert a value to pretty string. It assumes the input
   // is not null.
-  @scala.annotation.tailrec
   protected final def castToStringCode(
+      from: DataType,
+      ctx: CodegenContext,
+      to: StringConstraint = NoConstraint): (ExprValue, ExprValue) => Block =
+    (c, evPrim) => {
+      val tmpVar = ctx.freshVariable("tmp", classOf[UTF8String])
+      val castToString = castToStringCode(from, ctx)(c, tmpVar)
+      val maintainConstraint = to match {
+        case FixedLength(length) =>
+          code"""$evPrim = org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils
+                .charTypeWriteSideCheck($tmpVar, $length);""".stripMargin
+        case MaxLength(length) =>
+          code"""$evPrim = org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils
+                .varcharTypeWriteSideCheck($tmpVar, $length);""".stripMargin
+        case NoConstraint => code"$evPrim = $tmpVar;"
+      }
+      code"""
+            UTF8String $tmpVar;
+            $castToString
+            $maintainConstraint
+          """
+    }
+
+  @scala.annotation.tailrec
+  private def castToStringCode(
       from: DataType, ctx: CodegenContext): (ExprValue, ExprValue) => Block = {
     from match {
       case BinaryType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
index eda2c742ab4b5..142f4a4eae4c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
@@ -126,7 +126,10 @@ case class HistogramNumeric(
     // Ignore empty rows, for example: histogram_numeric(null)
     if (value != null) {
       // Convert the value to a double value
-      val doubleValue = value.asInstanceOf[Number].doubleValue
+      val doubleValue = value match {
+        case d: Decimal => d.toDouble
+        case o => o.asInstanceOf[Number].doubleValue()
+      }
       buffer.add(doubleValue)
     }
     buffer
@@ -162,6 +165,11 @@ case class HistogramNumeric(
             case ShortType => coord.x.toShort
             case _: DayTimeIntervalType | LongType | TimestampType | TimestampNTZType =>
               coord.x.toLong
+            case d: DecimalType =>
+              val bigDecimal = BigDecimal
+                .decimal(coord.x, new java.math.MathContext(d.precision))
+                .setScale(d.scale, BigDecimal.RoundingMode.HALF_UP)
+              Decimal(bigDecimal)
             case _ => coord.x
           }
           array(index) = InternalRow.apply(result, coord.y)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala
index 97add0b8e45bc..f3eeaa96b3d46 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala
@@ -183,6 +183,8 @@ case class Mode(
   }
 
   override def orderingFilled: Boolean = child != UnresolvedWithinGroup
+  override def isOrderingMandatory: Boolean = true
+  override def isDistinctSupported: Boolean = false
 
   assert(orderingFilled || (!orderingFilled && reverseOpt.isEmpty))
 
@@ -190,7 +192,7 @@ case class Mode(
     child match {
       case UnresolvedWithinGroup =>
         if (orderingWithinGroup.length != 1) {
-          throw QueryCompilationErrors.wrongNumOrderingsForInverseDistributionFunctionError(
+          throw QueryCompilationErrors.wrongNumOrderingsForFunctionError(
             nodeName, 1, orderingWithinGroup.length)
         }
         orderingWithinGroup.head match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/SupportsOrderingWithinGroup.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/SupportsOrderingWithinGroup.scala
index 9c0502a2c1fcf..453251ac61cde 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/SupportsOrderingWithinGroup.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/SupportsOrderingWithinGroup.scala
@@ -20,9 +20,26 @@ package org.apache.spark.sql.catalyst.expressions.aggregate
 import org.apache.spark.sql.catalyst.expressions.SortOrder
 
 /**
- * The trait used to set the [[SortOrder]] after inverse distribution functions parsed.
+ * The trait used to set the [[SortOrder]] for supporting functions.
  */
 trait SupportsOrderingWithinGroup { self: AggregateFunction =>
-  def orderingFilled: Boolean = false
   def withOrderingWithinGroup(orderingWithinGroup: Seq[SortOrder]): AggregateFunction
+
+  /** Indicator that ordering was set. */
+  def orderingFilled: Boolean
+
+  /**
+   * Tells Analyzer that WITHIN GROUP (ORDER BY ...) is mandatory for function.
+   *
+   * @see [[QueryCompilationErrors.functionMissingWithinGroupError]]
+   */
+  def isOrderingMandatory: Boolean
+
+  /**
+   * Tells Analyzer that DISTINCT is supported.
+   * The DISTINCT can conflict with order so some functions can ban it.
+   *
+   * @see [[QueryCompilationErrors.functionMissingWithinGroupError]]
+   */
+  def isDistinctSupported: Boolean
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
index 3aaf353043a9a..7789c23b50a48 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -18,16 +18,22 @@
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
 import scala.collection.mutable
-import scala.collection.mutable.Growable
+import scala.collection.mutable.{ArrayBuffer, Growable}
+import scala.util.{Left, Right}
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.trees.UnaryLike
+import org.apache.spark.sql.catalyst.types.PhysicalDataType
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, TypeUtils, UnsafeRowUtils}
+import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLExpr
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
+import org.apache.spark.sql.errors.DataTypeErrors.{toSQLId, toSQLType}
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
 import org.apache.spark.util.BoundedPriorityQueue
 
 /**
@@ -36,8 +42,7 @@ import org.apache.spark.util.BoundedPriorityQueue
  * We have to store all the collected elements in memory, and so notice that too many elements
  * can cause GC paused and eventually OutOfMemory Errors.
  */
-abstract class Collect[T <: Growable[Any] with Iterable[Any]] extends TypedImperativeAggregate[T]
-  with UnaryLike[Expression] {
+abstract class Collect[T <: Growable[Any] with Iterable[Any]] extends TypedImperativeAggregate[T] {
 
   val child: Expression
 
@@ -102,7 +107,8 @@ abstract class Collect[T <: Growable[Any] with Iterable[Any]] extends TypedImper
 case class CollectList(
     child: Expression,
     mutableAggBufferOffset: Int = 0,
-    inputAggBufferOffset: Int = 0) extends Collect[mutable.ArrayBuffer[Any]] {
+    inputAggBufferOffset: Int = 0) extends Collect[mutable.ArrayBuffer[Any]]
+  with UnaryLike[Expression] {
 
   def this(child: Expression) = this(child, 0, 0)
 
@@ -149,7 +155,7 @@ case class CollectSet(
     child: Expression,
     mutableAggBufferOffset: Int = 0,
     inputAggBufferOffset: Int = 0)
-  extends Collect[mutable.HashSet[Any]] with QueryErrorsBase {
+  extends Collect[mutable.HashSet[Any]] with QueryErrorsBase with UnaryLike[Expression] {
 
   def this(child: Expression) = this(child, 0, 0)
 
@@ -215,7 +221,8 @@ case class CollectTopK(
     num: Int,
     reverse: Boolean = false,
     mutableAggBufferOffset: Int = 0,
-    inputAggBufferOffset: Int = 0) extends Collect[BoundedPriorityQueue[Any]] {
+    inputAggBufferOffset: Int = 0) extends Collect[BoundedPriorityQueue[Any]]
+  with UnaryLike[Expression] {
   assert(num > 0)
 
   def this(child: Expression, num: Int) = this(child, num, false, 0, 0)
@@ -265,3 +272,280 @@ private[aggregate] object CollectTopK {
     case _ => throw QueryCompilationErrors.invalidNumParameter(e)
   }
 }
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(expr[, delimiter])[ WITHIN GROUP (ORDER BY key [ASC | DESC] [,...])] - Returns
+    the concatenation of non-null input values, separated by the delimiter ordered by key.
+    If all values are null, null is returned.
+    """,
+  arguments = """
+    Arguments:
+      * expr - a string or binary expression to be concatenated.
+      * delimiter - an optional string or binary foldable expression used to separate the input values.
+        If null, the concatenation will be performed without a delimiter. Default is null.
+      * key - an optional expression for ordering the input values. Multiple keys can be specified.
+        If none are specified, the order of the rows in the result is non-deterministic.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES ('a'), ('b'), ('c') AS tab(col);
+       abc
+      > SELECT _FUNC_(col) WITHIN GROUP (ORDER BY col DESC) FROM VALUES ('a'), ('b'), ('c') AS tab(col);
+       cba
+      > SELECT _FUNC_(col) FROM VALUES ('a'), (NULL), ('b') AS tab(col);
+       ab
+      > SELECT _FUNC_(col) FROM VALUES ('a'), ('a') AS tab(col);
+       aa
+      > SELECT _FUNC_(DISTINCT col) FROM VALUES ('a'), ('a'), ('b') AS tab(col);
+       ab
+      > SELECT _FUNC_(col, ', ') FROM VALUES ('a'), ('b'), ('c') AS tab(col);
+       a, b, c
+      > SELECT _FUNC_(col) FROM VALUES (NULL), (NULL) AS tab(col);
+       NULL
+  """,
+  note = """
+    * If the order is not specified, the function is non-deterministic because
+    the order of the rows may be non-deterministic after a shuffle.
+    * If DISTINCT is specified, then expr and key must be the same expression.
+  """,
+  group = "agg_funcs",
+  since = "4.0.0"
+)
+// scalastyle:on line.size.limit
+case class ListAgg(
+    child: Expression,
+    delimiter: Expression = Literal(null),
+    orderExpressions: Seq[SortOrder] = Nil,
+    mutableAggBufferOffset: Int = 0,
+    inputAggBufferOffset: Int = 0)
+  extends Collect[mutable.ArrayBuffer[Any]]
+  with SupportsOrderingWithinGroup
+  with ImplicitCastInputTypes {
+
+  override def orderingFilled: Boolean = orderExpressions.nonEmpty
+
+  override def isOrderingMandatory: Boolean = false
+
+  override def isDistinctSupported: Boolean = true
+
+  override def withOrderingWithinGroup(orderingWithinGroup: Seq[SortOrder]): AggregateFunction =
+    copy(orderExpressions = orderingWithinGroup)
+
+  override protected lazy val bufferElementType: DataType = {
+    if (!needSaveOrderValue) {
+      child.dataType
+    } else {
+      StructType(
+        StructField("value", child.dataType)
+        +: orderValuesField
+      )
+    }
+  }
+  /** Indicates that the result of [[child]] is not enough for evaluation  */
+  lazy val needSaveOrderValue: Boolean = !isOrderCompatible(orderExpressions)
+
+  def this(child: Expression) =
+    this(child, Literal(null), Nil, 0, 0)
+
+  def this(child: Expression, delimiter: Expression) =
+    this(child, delimiter, Nil, 0, 0)
+
+  override def nullable: Boolean = true
+
+  override def createAggregationBuffer(): mutable.ArrayBuffer[Any] = mutable.ArrayBuffer.empty
+
+  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate =
+    copy(mutableAggBufferOffset = newMutableAggBufferOffset)
+
+  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate =
+    copy(inputAggBufferOffset = newInputAggBufferOffset)
+
+  override def defaultResult: Option[Literal] = Option(Literal.create(null, dataType))
+
+  override def sql(isDistinct: Boolean): String = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    val withinGroup = if (orderingFilled) {
+      s" WITHIN GROUP (ORDER BY ${orderExpressions.map(_.sql).mkString(", ")})"
+    } else {
+      ""
+    }
+    s"$prettyName($distinct${child.sql}, ${delimiter.sql})$withinGroup"
+  }
+
+  override def inputTypes: Seq[AbstractDataType] =
+    TypeCollection(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      BinaryType
+    ) +:
+    TypeCollection(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      BinaryType,
+      NullType
+    ) +:
+    orderExpressions.map(_ => AnyDataType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val matchInputTypes = super.checkInputDataTypes()
+    if (matchInputTypes.isFailure) {
+      matchInputTypes
+    } else if (!delimiter.foldable) {
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> toSQLId("delimiter"),
+          "inputType" -> toSQLType(delimiter.dataType),
+          "inputExpr" -> toSQLExpr(delimiter)
+        )
+      )
+    } else if (delimiter.dataType == NullType) {
+      // null is the default empty delimiter so type is not important
+      TypeCheckSuccess
+    } else {
+      TypeUtils.checkForSameTypeInputExpr(child.dataType :: delimiter.dataType :: Nil, prettyName)
+    }
+  }
+
+  override def eval(buffer: mutable.ArrayBuffer[Any]): Any = {
+    if (buffer.nonEmpty) {
+      val sortedBufferWithoutNulls = sortBuffer(buffer)
+      concatSkippingNulls(sortedBufferWithoutNulls)
+    } else {
+      null
+    }
+  }
+
+  /**
+   * Sort buffer according orderExpressions.
+   * If orderExpressions is empty then returns buffer as is.
+   * The format of buffer is determined by [[needSaveOrderValue]]
+   * @return sorted buffer containing only child's values
+   */
+  private[this] def sortBuffer(buffer: mutable.ArrayBuffer[Any]): mutable.ArrayBuffer[Any] = {
+    if (!orderingFilled) {
+      // without order return as is.
+      return buffer
+    }
+    if (!needSaveOrderValue) {
+      // Here the buffer has structure [childValue0, childValue1, ...]
+      // and we want to sort it by childValues
+      val sortOrderExpression = orderExpressions.head
+      val ascendingOrdering = PhysicalDataType.ordering(sortOrderExpression.dataType)
+      val ordering =
+        if (sortOrderExpression.direction == Ascending) ascendingOrdering
+        else ascendingOrdering.reverse
+      buffer.sorted(ordering)
+    } else {
+      // Here the buffer has structure
+      // [[childValue, orderValue0, orderValue1, ...],
+      //  [childValue, orderValue0, orderValue1, ...],
+      //  ...]
+      // and we want to sort it by tuples (orderValue0, orderValue1, ...)
+      buffer
+        .asInstanceOf[mutable.ArrayBuffer[InternalRow]]
+        .sorted(bufferOrdering)
+        // drop orderValues after sort
+        .map(_.get(0, child.dataType))
+    }
+  }
+
+  /**
+   * @return ordering by (orderValue0, orderValue1, ...)
+   *         for InternalRow with format [childValue, orderValue0, orderValue1, ...]
+   */
+  private[this] def bufferOrdering: Ordering[InternalRow] = {
+    val bufferSortOrder = orderExpressions.zipWithIndex.map {
+      case (originalOrder, i) =>
+        originalOrder.copy(
+          // first value is the evaluated child so add +1 for order's values
+          child = BoundReference(i + 1, originalOrder.dataType, originalOrder.child.nullable)
+        )
+    }
+    new InterpretedOrdering(bufferSortOrder)
+  }
+
+  private[this] def concatSkippingNulls(buffer: mutable.ArrayBuffer[Any]): Any = {
+    getDelimiterValue match {
+      case Right(delimiterValue: Array[Byte]) =>
+        val inputs = buffer.filter(_ != null).map(_.asInstanceOf[Array[Byte]])
+        ByteArray.concatWS(delimiterValue, inputs.toSeq: _*)
+      case Left(delimiterValue: UTF8String) =>
+        val inputs = buffer.filter(_ != null).map(_.asInstanceOf[UTF8String])
+        UTF8String.concatWs(delimiterValue, inputs.toSeq: _*)
+    }
+  }
+
+  /**
+   * @return delimiter value or default empty value if delimiter is null. Type respects [[dataType]]
+   */
+  private[this] def getDelimiterValue: Either[UTF8String, Array[Byte]] = {
+    val delimiterValue = delimiter.eval()
+    dataType match {
+      case _: StringType =>
+        Left(
+          if (delimiterValue == null) UTF8String.fromString("")
+          else delimiterValue.asInstanceOf[UTF8String]
+        )
+      case _: BinaryType =>
+        Right(
+          if (delimiterValue == null) ByteArray.EMPTY_BYTE
+          else delimiterValue.asInstanceOf[Array[Byte]]
+        )
+    }
+  }
+
+  override def dataType: DataType = child.dataType
+
+  override def update(buffer: ArrayBuffer[Any], input: InternalRow): ArrayBuffer[Any] = {
+    val value = child.eval(input)
+    if (value != null) {
+      val v = if (!needSaveOrderValue) {
+        convertToBufferElement(value)
+      } else {
+        InternalRow.fromSeq(convertToBufferElement(value) +: evalOrderValues(input))
+      }
+      buffer += v
+    }
+    buffer
+  }
+
+  private[this] def evalOrderValues(internalRow: InternalRow): Seq[Any] = {
+    orderExpressions.map(order => convertToBufferElement(order.child.eval(internalRow)))
+  }
+
+  override protected def convertToBufferElement(value: Any): Any = InternalRow.copyValue(value)
+
+  override def children: Seq[Expression] = child +: delimiter +: orderExpressions
+
+  /**
+   * Utility func to check if given order is defined and different from [[child]].
+   *
+   * @see [[QueryCompilationErrors.functionAndOrderExpressionMismatchError]]
+   * @see [[needSaveOrderValue]]
+   */
+  private[this] def isOrderCompatible(someOrder: Seq[SortOrder]): Boolean = {
+    if (someOrder.isEmpty) {
+      return true
+    }
+    if (someOrder.size == 1 && someOrder.head.child.semanticEquals(child)) {
+      return true
+    }
+    false
+  }
+
+  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
+    copy(
+      child = newChildren.head,
+      delimiter = newChildren(1),
+      orderExpressions = newChildren
+        .drop(2)
+        .map(_.asInstanceOf[SortOrder])
+    )
+
+  private[this] def orderValuesField: Seq[StructField] = {
+    orderExpressions.zipWithIndex.map {
+      case (order, i) => StructField(s"sortOrderValue[$i]", order.dataType)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
index 89a6984b80852..6dfa1b499df23 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
@@ -378,7 +378,7 @@ case class PercentileCont(left: Expression, right: Expression, reverse: Boolean
 
   override def withOrderingWithinGroup(orderingWithinGroup: Seq[SortOrder]): AggregateFunction = {
     if (orderingWithinGroup.length != 1) {
-      throw QueryCompilationErrors.wrongNumOrderingsForInverseDistributionFunctionError(
+      throw QueryCompilationErrors.wrongNumOrderingsForFunctionError(
         nodeName, 1, orderingWithinGroup.length)
     }
     orderingWithinGroup.head match {
@@ -390,6 +390,10 @@ case class PercentileCont(left: Expression, right: Expression, reverse: Boolean
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): PercentileCont =
     this.copy(left = newLeft, right = newRight)
+
+  override def orderingFilled: Boolean = left != UnresolvedWithinGroup
+  override def isOrderingMandatory: Boolean = true
+  override def isDistinctSupported: Boolean = false
 }
 
 /**
@@ -432,7 +436,7 @@ case class PercentileDisc(
 
   override def withOrderingWithinGroup(orderingWithinGroup: Seq[SortOrder]): AggregateFunction = {
     if (orderingWithinGroup.length != 1) {
-      throw QueryCompilationErrors.wrongNumOrderingsForInverseDistributionFunctionError(
+      throw QueryCompilationErrors.wrongNumOrderingsForFunctionError(
         nodeName, 1, orderingWithinGroup.length)
     }
     orderingWithinGroup.head match {
@@ -467,6 +471,10 @@ case class PercentileDisc(
       toDoubleValue(higherKey)
     }
   }
+
+  override def orderingFilled: Boolean = left != UnresolvedWithinGroup
+  override def isOrderingMandatory: Boolean = true
+  override def isDistinctSupported: Boolean = false
 }
 
 // scalastyle:off line.size.limit
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/avroSqlFunctions.scala
similarity index 69%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/avroSqlFunctions.scala
index 457f469e0f687..6693ee83fd4af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/avroSqlFunctions.scala
@@ -200,3 +200,96 @@ case class ToAvro(child: Expression, jsonFormatSchema: Expression)
   override def prettyName: String =
     getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("to_avro")
 }
+
+/**
+ * Returns schema in the DDL format of the avro schema in JSON string format.
+ * This is a thin wrapper over the [[SchemaOfAvro]] class to create a SQL function.
+ *
+ * @param jsonFormatSchema the Avro schema in JSON string format.
+ * @param options the options to use when performing the conversion.
+ *
+ * @since 4.0.0
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(jsonFormatSchema, options) - Returns schema in the DDL format of the avro schema in JSON string format.
+    """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('{"type": "record", "name": "struct", "fields": [{"name": "u", "type": ["int", "string"]}]}', map());
+       STRUCT<u: STRUCT<member0: INT, member1: STRING> NOT NULL>
+  """,
+  group = "misc_funcs",
+  since = "4.0.0"
+)
+// scalastyle:on line.size.limit
+case class SchemaOfAvro(jsonFormatSchema: Expression, options: Expression)
+  extends BinaryExpression with RuntimeReplaceable {
+
+  override def left: Expression = jsonFormatSchema
+  override def right: Expression = options
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): Expression =
+    copy(jsonFormatSchema = newLeft, options = newRight)
+
+  def this(jsonFormatSchema: Expression) =
+    this(jsonFormatSchema, Literal.create(null))
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val schemaCheck = jsonFormatSchema.dataType match {
+      case _: StringType |
+           _: NullType
+        if jsonFormatSchema.foldable =>
+        None
+      case _ =>
+        Some(TypeCheckResult.TypeCheckFailure("The first argument of the SCHEMA_OF_AVRO SQL " +
+          "function must be a constant string containing the JSON representation of the schema " +
+          "to use for converting the value from AVRO format"))
+    }
+    val optionsCheck = options.dataType match {
+      case MapType(StringType, StringType, _) |
+           MapType(NullType, NullType, _) |
+           _: NullType
+        if options.foldable =>
+        None
+      case _ =>
+        Some(TypeCheckResult.TypeCheckFailure("The second argument of the SCHEMA_OF_AVRO SQL " +
+          "function must be a constant map of strings to strings containing the options to use " +
+          "for converting the value from AVRO format"))
+    }
+    schemaCheck.getOrElse(
+      optionsCheck.getOrElse(
+        TypeCheckResult.TypeCheckSuccess))
+  }
+
+  override lazy val replacement: Expression = {
+    val schemaValue: String = jsonFormatSchema.eval() match {
+      case s: UTF8String =>
+        s.toString
+      case null =>
+        ""
+    }
+    val optionsValue: Map[String, String] = options.eval() match {
+      case a: ArrayBasedMapData if a.keyArray.array.nonEmpty =>
+        val keys: Array[String] = a.keyArray.array.map(_.toString)
+        val values: Array[String] = a.valueArray.array.map(_.toString)
+        keys.zip(values).toMap
+      case _ =>
+        Map.empty
+    }
+    val constructor = try {
+      Utils.classForName("org.apache.spark.sql.avro.SchemaOfAvro").getConstructors.head
+    } catch {
+      case _: java.lang.ClassNotFoundException =>
+        throw QueryCompilationErrors.avroNotLoadedSqlFunctionsUnusable(
+          functionName = "SCHEMA_OF_AVRO")
+    }
+    val expr = constructor.newInstance(schemaValue, optionsValue)
+    expr.asInstanceOf[Expression]
+  }
+
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("schema_of_avro")
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 30c00f5bf96b8..de74bb2f8cd21 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -29,6 +29,7 @@ import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionExce
 import org.codehaus.commons.compiler.{CompileException, InternalCompilerException}
 import org.codehaus.janino.ClassBodyEvaluator
 import org.codehaus.janino.util.ClassFile
+import org.codehaus.janino.util.ClassFile.CodeAttribute
 
 import org.apache.spark.{SparkException, SparkIllegalArgumentException, TaskContext, TaskKilledException}
 import org.apache.spark.executor.InputMetrics
@@ -1578,9 +1579,6 @@ object CodeGenerator extends Logging {
     val classes = evaluator.getBytecodes.asScala
 
     // Then walk the classes to get at the method bytecode.
-    val codeAttr = Utils.classForName("org.codehaus.janino.util.ClassFile$CodeAttribute")
-    val codeAttrField = codeAttr.getDeclaredField("code")
-    codeAttrField.setAccessible(true)
     val codeStats = classes.map { case (_, classBytes) =>
       val classCodeSize = classBytes.length
       CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classCodeSize)
@@ -1588,8 +1586,8 @@ object CodeGenerator extends Logging {
         val cf = new ClassFile(new ByteArrayInputStream(classBytes))
         val constPoolSize = cf.getConstantPoolSize
         val methodCodeSizes = cf.methodInfos.asScala.flatMap { method =>
-          method.getAttributes().filter(_.getClass eq codeAttr).map { a =>
-            val byteCodeSize = codeAttrField.get(a).asInstanceOf[Array[Byte]].length
+          method.getAttributes.collect { case attr: CodeAttribute =>
+            val byteCodeSize = attr.code.length
             CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.update(byteCodeSize)
 
             if (byteCodeSize > DEFAULT_JVM_HUGE_METHOD_LIMIT) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
index c75bf30ad21f7..024bef08b5273 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
@@ -17,10 +17,12 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.ExpressionBuilder
+import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, UnresolvedException}
 import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.util.CollationFactory
+import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UNRESOLVED_COLLATION}
+import org.apache.spark.sql.catalyst.util.{AttributeNameParser, CollationFactory}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.types.StringTypeWithCollation
@@ -37,7 +39,7 @@ import org.apache.spark.sql.types._
   examples = """
     Examples:
       > SELECT COLLATION('Spark SQL' _FUNC_ UTF8_LCASE);
-      UTF8_LCASE
+      SYSTEM.BUILTIN.UTF8_LCASE
   """,
   since = "4.0.0",
   group = "string_funcs")
@@ -56,7 +58,8 @@ object CollateExpressionBuilder extends ExpressionBuilder {
                 evalCollation.toString.toUpperCase().contains("TRIM")) {
                 throw QueryCompilationErrors.trimCollationNotEnabledError()
               }
-              Collate(e, evalCollation.toString)
+              Collate(e, UnresolvedCollation(
+                AttributeNameParser.parseAttributeName(evalCollation.toString)))
             }
           case (_: StringType, false) => throw QueryCompilationErrors.nonFoldableArgumentError(
             funcName, "collationName", StringType)
@@ -73,24 +76,63 @@ object CollateExpressionBuilder extends ExpressionBuilder {
  * This function is pass-through, it will not modify the input data.
  * Only type metadata will be updated.
  */
-case class Collate(child: Expression, collationName: String)
-  extends UnaryExpression with ExpectsInputTypes {
-  private val collationId = CollationFactory.collationNameToId(collationName)
-  override def dataType: DataType = StringType(collationId)
+case class Collate(child: Expression, collation: Expression)
+  extends BinaryExpression with ExpectsInputTypes {
+  override def left: Expression = child
+  override def right: Expression = collation
+  override def dataType: DataType = collation.dataType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeWithCollation(supportsTrimCollation = true))
-
-  override protected def withNewChildInternal(
-    newChild: Expression): Expression = copy(newChild)
+    Seq(StringTypeWithCollation(supportsTrimCollation = true), AnyDataType)
 
   override def eval(row: InternalRow): Any = child.eval(row)
 
-  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
-    defineCodeGen(ctx, ev, (in) => in)
+  /** Just a simple passthrough for code generation. */
+  override def genCode(ctx: CodegenContext): ExprCode = child.genCode(ctx)
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    throw SparkException.internalError("Collate.doGenCode should not be called.")
+  }
+
+  override def sql: String = s"$prettyName(${child.sql}, $collation)"
+
+  override def toString: String =
+    s"$prettyName($child, $collation)"
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): Expression =
+    copy(child = newLeft, collation = newRight)
+
+  override def foldable: Boolean = child.foldable
+}
+
+/**
+ * An expression that marks an unresolved collation name.
+ *
+ * This class is used to represent a collation name that has not yet been resolved from a fully
+ * qualified collation name. It is used during the analysis phase, where the collation name is
+ * specified but not yet validated or resolved.
+ */
+case class UnresolvedCollation(collationName: Seq[String])
+  extends LeafExpression with Unevaluable {
+  override def dataType: DataType = throw new UnresolvedException("dataType")
+
+  override def nullable: Boolean = false
+
+  override lazy val resolved: Boolean = false
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_COLLATION)
+}
+
+/**
+ * An expression that represents a resolved collation name.
+ */
+case class ResolvedCollation(collationName: String) extends LeafExpression with Unevaluable {
+  override def nullable: Boolean = false
+
+  override def dataType: DataType = StringType(CollationFactory.collationNameToId(collationName))
 
-  override def sql: String = s"$prettyName(${child.sql}, $collationName)"
+  override def toString: String = collationName
 
-  override def toString: String = s"$prettyName($child, $collationName)"
+  override def sql: String = collationName
 }
 
 // scalastyle:off line.contains.tab
@@ -103,7 +145,7 @@ case class Collate(child: Expression, collationName: String)
   examples = """
     Examples:
       > SELECT _FUNC_('Spark SQL');
-      UTF8_BINARY
+      SYSTEM.BUILTIN.UTF8_BINARY
   """,
   since = "4.0.0",
   group = "string_funcs")
@@ -113,8 +155,8 @@ case class Collation(child: Expression)
   override protected def withNewChildInternal(newChild: Expression): Collation = copy(newChild)
   override lazy val replacement: Expression = {
     val collationId = child.dataType.asInstanceOf[StringType].collationId
-    val collationName = CollationFactory.fetchCollation(collationId).collationName
-    Literal.create(collationName, SQLConf.get.defaultStringType)
+    val fullyQualifiedCollationName = CollationFactory.fullyQualifiedName(collationId)
+    Literal.create(fullyQualifiedCollationName, SQLConf.get.defaultStringType)
   }
   override def inputTypes: Seq[AbstractDataType] =
     Seq(StringTypeWithCollation(supportsTrimCollation = true))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index fb130574d3474..84e52282b632f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -1354,7 +1354,7 @@ case class Reverse(child: Expression)
   override def nullIntolerant: Boolean = true
   // Input types are utilized by type coercion in ImplicitTypeCasts.
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(StringTypeWithCollation, ArrayType))
+    Seq(TypeCollection(StringTypeWithCollation(supportsTrimCollation = true), ArrayType))
 
   override def dataType: DataType = child.dataType
 
@@ -2127,12 +2127,12 @@ case class ArrayJoin(
     this(array, delimiter, Some(nullReplacement))
 
   override def inputTypes: Seq[AbstractDataType] = if (nullReplacement.isDefined) {
-    Seq(AbstractArrayType(StringTypeWithCollation),
-      StringTypeWithCollation,
-        StringTypeWithCollation)
+    Seq(AbstractArrayType(StringTypeWithCollation(supportsTrimCollation = true)),
+      StringTypeWithCollation(supportsTrimCollation = true),
+        StringTypeWithCollation(supportsTrimCollation = true))
   } else {
-    Seq(AbstractArrayType(StringTypeWithCollation),
-        StringTypeWithCollation)
+    Seq(AbstractArrayType(StringTypeWithCollation(supportsTrimCollation = true)),
+        StringTypeWithCollation(supportsTrimCollation = true))
   }
 
   override def children: Seq[Expression] = if (nullReplacement.isDefined) {
@@ -2609,9 +2609,6 @@ case class ElementAt(
 
   @transient private lazy val mapKeyType = left.dataType.asInstanceOf[MapType].keyType
 
-  @transient private lazy val mapValueContainsNull =
-    left.dataType.asInstanceOf[MapType].valueContainsNull
-
   @transient private lazy val arrayElementNullable =
     left.dataType.asInstanceOf[ArrayType].containsNull
 
@@ -2855,7 +2852,7 @@ case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpressio
   with QueryErrorsBase {
 
   private def allowedTypes: Seq[AbstractDataType] =
-    Seq(StringTypeWithCollation, BinaryType, ArrayType)
+    Seq(StringTypeWithCollation(supportsTrimCollation = true), BinaryType, ArrayType)
 
   final override val nodePatterns: Seq[TreePattern] = Seq(CONCAT)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csv/CsvExpressionEvalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csv/CsvExpressionEvalUtils.scala
index a91e4ab13001b..fd298b33450b3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csv/CsvExpressionEvalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csv/CsvExpressionEvalUtils.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.types.{DataType, NullType, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
- * The expression `CsvToStructs` will utilize the `Invoke` to call it, support codegen.
+ * The expression `CsvToStructs` will utilize it to support codegen.
  */
 case class CsvToStructsEvaluator(
     options: Map[String, String],
@@ -86,6 +86,7 @@ case class CsvToStructsEvaluator(
   }
 
   final def evaluate(csv: UTF8String): InternalRow = {
+    if (csv == null) return null
     converter(parser.parse(csv.toString))
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index 02e5488835c91..04fb9bc133c67 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -23,10 +23,10 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.csv._
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
 import org.apache.spark.sql.catalyst.expressions.csv.{CsvToStructsEvaluator, SchemaOfCsvEvaluator}
 import org.apache.spark.sql.catalyst.expressions.objects.Invoke
-import org.apache.spark.sql.catalyst.trees.TreePattern.{RUNTIME_REPLACEABLE, TreePattern}
 import org.apache.spark.sql.catalyst.util.TypeUtils._
 import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.internal.SQLConf
@@ -57,17 +57,12 @@ case class CsvToStructs(
     timeZoneId: Option[String] = None,
     requiredSchema: Option[StructType] = None)
   extends UnaryExpression
-  with RuntimeReplaceable
-  with ExpectsInputTypes
-  with TimeZoneAwareExpression {
+  with TimeZoneAwareExpression
+  with ExpectsInputTypes {
 
   override def nullable: Boolean = child.nullable
 
-  override def nodePatternsInternal(): Seq[TreePattern] = Seq(RUNTIME_REPLACEABLE)
-
-  // The CSV input data might be missing certain fields. We force the nullability
-  // of the user-provided schema to avoid data corruptions.
-  private val nullableSchema: StructType = schema.asNullable
+  override def nullIntolerant: Boolean = true
 
   // Used in `FunctionRegistry`
   def this(child: Expression, schema: Expression, options: Map[String, String]) =
@@ -86,28 +81,48 @@ case class CsvToStructs(
       child = child,
       timeZoneId = None)
 
-  private val nameOfCorruptRecord = SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
-
   override def dataType: DataType = requiredSchema.getOrElse(schema).asNullable
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = {
     copy(timeZoneId = Option(timeZoneId))
   }
 
-  override def inputTypes: Seq[AbstractDataType] = StringTypeWithCollation :: Nil
+  override def inputTypes: Seq[AbstractDataType] =
+    StringTypeWithCollation(supportsTrimCollation = true) :: Nil
 
   override def prettyName: String = "from_csv"
 
+  // The CSV input data might be missing certain fields. We force the nullability
+  // of the user-provided schema to avoid data corruptions.
+  private val nullableSchema: StructType = schema.asNullable
+
+  @transient
+  private val nameOfCorruptRecord = SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
+
   @transient
   private lazy val evaluator: CsvToStructsEvaluator = CsvToStructsEvaluator(
     options, nullableSchema, nameOfCorruptRecord, timeZoneId, requiredSchema)
 
-  override def replacement: Expression = Invoke(
-    Literal.create(evaluator, ObjectType(classOf[CsvToStructsEvaluator])),
-    "evaluate",
-    dataType,
-    Seq(child),
-    Seq(child.dataType))
+  override def nullSafeEval(input: Any): Any = {
+    evaluator.evaluate(input.asInstanceOf[UTF8String])
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val refEvaluator = ctx.addReferenceObj("evaluator", evaluator)
+    val eval = child.genCode(ctx)
+    val resultType = CodeGenerator.boxedType(dataType)
+    val resultTerm = ctx.freshName("result")
+    ev.copy(code =
+      code"""
+         |${eval.code}
+         |$resultType $resultTerm = ($resultType) $refEvaluator.evaluate(${eval.value});
+         |boolean ${ev.isNull} = $resultTerm == null;
+         |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+         |if (!${ev.isNull}) {
+         |  ${ev.value} = $resultTerm;
+         |}
+         |""".stripMargin)
+  }
 
   override protected def withNewChildInternal(newChild: Expression): CsvToStructs =
     copy(child = newChild)
@@ -173,7 +188,8 @@ case class SchemaOfCsv(
     "evaluate",
     dataType,
     Seq(child),
-    Seq(child.dataType))
+    Seq(child.dataType),
+    returnNullable = false)
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index fba3927a0bc9c..81be40b3b6474 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -971,7 +971,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
   override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TimestampType, StringTypeWithCollation)
+    Seq(TimestampType, StringTypeWithCollation(supportsTrimCollation = true))
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
@@ -1129,6 +1129,7 @@ case class GetTimestamp(
     left: Expression,
     right: Expression,
     override val dataType: DataType,
+    override val suggestedFuncOnFail: String = "try_to_timestamp",
     timeZoneId: Option[String] = None,
     failOnError: Boolean = SQLConf.get.ansiEnabled) extends ToTimestamp {
 
@@ -1267,6 +1268,7 @@ object TryToTimestampExpressionBuilder extends ExpressionBuilder {
 abstract class ToTimestamp
   extends BinaryExpression with TimestampFormatterHelper with ExpectsInputTypes {
 
+  val suggestedFuncOnFail: String = "try_to_timestamp"
   def failOnError: Boolean
 
   // The result of the conversion to timestamp is microseconds divided by this factor.
@@ -1279,10 +1281,13 @@ abstract class ToTimestamp
   override def forTimestampNTZ: Boolean = left.dataType == TimestampNTZType
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(
-      StringTypeWithCollation, DateType, TimestampType, TimestampNTZType
-    ),
-      StringTypeWithCollation)
+    Seq(
+      TypeCollection(
+        StringTypeWithCollation(supportsTrimCollation = true),
+        DateType,
+        TimestampType,
+        TimestampNTZType),
+      StringTypeWithCollation(supportsTrimCollation = true))
 
   override def dataType: DataType = LongType
   override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true
@@ -1318,9 +1323,9 @@ abstract class ToTimestamp
               }
             } catch {
               case e: DateTimeException if failOnError =>
-                throw QueryExecutionErrors.ansiDateTimeParseError(e)
+                throw QueryExecutionErrors.ansiDateTimeParseError(e, suggestedFuncOnFail)
               case e: ParseException if failOnError =>
-                throw QueryExecutionErrors.ansiDateTimeParseError(e)
+                throw QueryExecutionErrors.ansiDateTimeParseError(e, suggestedFuncOnFail)
               case e if isParseError(e) => null
             }
           }
@@ -1331,7 +1336,7 @@ abstract class ToTimestamp
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val javaType = CodeGenerator.javaType(dataType)
     val parseErrorBranch: String = if (failOnError) {
-      "throw QueryExecutionErrors.ansiDateTimeParseError(e);"
+      s"throw QueryExecutionErrors.ansiDateTimeParseError(e, \"${suggestedFuncOnFail}\");"
     } else {
       s"${ev.isNull} = true;"
     }
@@ -1454,7 +1459,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
   override def nullable: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(LongType, StringTypeWithCollation)
+    Seq(LongType, StringTypeWithCollation(supportsTrimCollation = true))
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
@@ -1566,7 +1571,7 @@ case class NextDay(
   def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(DateType, StringTypeWithCollation)
+    Seq(DateType, StringTypeWithCollation(supportsTrimCollation = true))
 
   override def dataType: DataType = DateType
   override def nullable: Boolean = true
@@ -1781,7 +1786,7 @@ sealed trait UTCTimestamp extends BinaryExpression with ImplicitCastInputTypes {
   val funcName: String
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TimestampType, StringTypeWithCollation)
+    Seq(TimestampType, StringTypeWithCollation(supportsTrimCollation = true))
   override def dataType: DataType = TimestampType
 
   override def nullSafeEval(time: Any, timezone: Any): Any = {
@@ -2097,8 +2102,8 @@ case class ParseToDate(
   extends RuntimeReplaceable with ImplicitCastInputTypes with TimeZoneAwareExpression {
 
   override lazy val replacement: Expression = format.map { f =>
-    Cast(GetTimestamp(left, f, TimestampType, timeZoneId, ansiEnabled), DateType, timeZoneId,
-      EvalMode.fromBoolean(ansiEnabled))
+    Cast(GetTimestamp(left, f, TimestampType, "try_to_date", timeZoneId, ansiEnabled), DateType,
+      timeZoneId, EvalMode.fromBoolean(ansiEnabled))
   }.getOrElse(Cast(left, DateType, timeZoneId,
     EvalMode.fromBoolean(ansiEnabled))) // backwards compatibility
 
@@ -2123,8 +2128,11 @@ case class ParseToDate(
     // Note: ideally this function should only take string input, but we allow more types here to
     // be backward compatible.
     TypeCollection(
-      StringTypeWithCollation, DateType, TimestampType, TimestampNTZType) +:
-      format.map(_ => StringTypeWithCollation).toSeq
+      StringTypeWithCollation(supportsTrimCollation = true),
+        DateType,
+        TimestampType,
+        TimestampNTZType) +:
+      format.map(_ => StringTypeWithCollation(supportsTrimCollation = true)).toSeq
   }
 
   override protected def withNewChildrenInternal(
@@ -2173,7 +2181,7 @@ case class ParseToTimestamp(
   extends RuntimeReplaceable with ImplicitCastInputTypes with TimeZoneAwareExpression {
 
   override lazy val replacement: Expression = format.map { f =>
-    GetTimestamp(left, f, dataType, timeZoneId, failOnError = failOnError)
+    GetTimestamp(left, f, dataType, "try_to_timestamp", timeZoneId, failOnError = failOnError)
   }.getOrElse(Cast(left, dataType, timeZoneId, ansiEnabled = failOnError))
 
   def this(left: Expression, format: Expression) = {
@@ -2195,10 +2203,15 @@ case class ParseToTimestamp(
   override def inputTypes: Seq[AbstractDataType] = {
     // Note: ideally this function should only take string input, but we allow more types here to
     // be backward compatible.
-    val types = Seq(StringTypeWithCollation, DateType, TimestampType, TimestampNTZType)
+    val types = Seq(
+      StringTypeWithCollation(
+        supportsTrimCollation = true),
+        DateType,
+        TimestampType,
+        TimestampNTZType)
     TypeCollection(
       (if (dataType.isInstanceOf[TimestampType]) types :+ NumericType else types): _*
-    ) +: format.map(_ => StringTypeWithCollation).toSeq
+    ) +: format.map(_ => StringTypeWithCollation(supportsTrimCollation = true)).toSeq
   }
 
   override protected def withNewChildrenInternal(
@@ -2329,7 +2342,7 @@ case class TruncDate(date: Expression, format: Expression)
   override def right: Expression = format
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(DateType, StringTypeWithCollation)
+    Seq(DateType, StringTypeWithCollation(supportsTrimCollation = true))
   override def dataType: DataType = DateType
   override def prettyName: String = "trunc"
   override val instant = date
@@ -2399,7 +2412,7 @@ case class TruncTimestamp(
   override def right: Expression = timestamp
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeWithCollation, TimestampType)
+    Seq(StringTypeWithCollation(supportsTrimCollation = true), TimestampType)
   override def dataType: TimestampType = TimestampType
   override def prettyName: String = "date_trunc"
   override val instant = timestamp
@@ -2800,7 +2813,7 @@ case class MakeTimestamp(
   // casted into decimal safely, we use DecimalType(16, 6) which is wider than DecimalType(10, 0).
   override def inputTypes: Seq[AbstractDataType] =
     Seq(IntegerType, IntegerType, IntegerType, IntegerType, IntegerType, DecimalType(16, 6)) ++
-    timezone.map(_ => StringTypeWithCollation)
+    timezone.map(_ => StringTypeWithCollation(supportsTrimCollation = true))
   override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
@@ -3333,7 +3346,10 @@ case class ConvertTimezone(
   override def third: Expression = sourceTs
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeWithCollation, StringTypeWithCollation, TimestampNTZType)
+    Seq(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true),
+      TimestampNTZType)
   override def dataType: DataType = TimestampNTZType
 
   override def nullSafeEval(srcTz: Any, tgtTz: Any, micros: Any): Any = {
@@ -3415,7 +3431,7 @@ case class TimestampAdd(
   override def left: Expression = quantity
   override def right: Expression = timestamp
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, AnyTimestampType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(LongType, AnyTimestampType)
   override def dataType: DataType = timestamp.dataType
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
@@ -3424,7 +3440,7 @@ case class TimestampAdd(
   @transient private lazy val zoneIdInEval: ZoneId = zoneIdForType(timestamp.dataType)
 
   override def nullSafeEval(q: Any, micros: Any): Any = {
-    DateTimeUtils.timestampAdd(unit, q.asInstanceOf[Int], micros.asInstanceOf[Long], zoneIdInEval)
+    DateTimeUtils.timestampAdd(unit, q.asInstanceOf[Long], micros.asInstanceOf[Long], zoneIdInEval)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index 79879dc0edb4c..89d2259ea5c28 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -419,7 +419,7 @@ abstract class HashExpression[E] extends Expression {
 
   protected def genHashString(
       ctx: CodegenContext, stringType: StringType, input: String, result: String): String = {
-    if (stringType.supportsBinaryEquality && !stringType.usesTrimCollation) {
+    if (stringType.supportsBinaryEquality) {
       val baseObject = s"$input.getBaseObject()"
       val baseOffset = s"$input.getBaseOffset()"
       val numBytes = s"$input.numBytes()"
@@ -570,7 +570,7 @@ abstract class InterpretedHashFunction {
         hashUnsafeBytes(a, Platform.BYTE_ARRAY_OFFSET, a.length, seed)
       case s: UTF8String =>
         val st = dataType.asInstanceOf[StringType]
-        if (st.supportsBinaryEquality && !st.usesTrimCollation) {
+        if (st.supportsBinaryEquality) {
           hashUnsafeBytes(s.getBaseObject, s.getBaseOffset, s.numBytes(), seed)
         } else {
           val stringHash = CollationFactory
@@ -821,7 +821,7 @@ case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] {
 
   override protected def genHashString(
       ctx: CodegenContext, stringType: StringType, input: String, result: String): String = {
-    if (stringType.supportsBinaryEquality && !stringType.usesTrimCollation) {
+    if (stringType.supportsBinaryEquality) {
       val baseObject = s"$input.getBaseObject()"
       val baseOffset = s"$input.getBaseOffset()"
       val numBytes = s"$input.numBytes()"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionEvalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionEvalUtils.scala
index edc8012eb3da2..c9d15e1eb2e4d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionEvalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionEvalUtils.scala
@@ -16,12 +16,16 @@
  */
 package org.apache.spark.sql.catalyst.expressions.json
 
-import java.io.CharArrayWriter
+import java.io.{ByteArrayOutputStream, CharArrayWriter, StringWriter}
 
-import com.fasterxml.jackson.core.JsonFactory
+import scala.util.parsing.combinator.RegexParsers
 
+import com.fasterxml.jackson.core._
+import com.fasterxml.jackson.core.json.JsonReadFeature
+
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.ExprUtils
+import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericInternalRow}
 import org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils
 import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonGenerator, JacksonParser, JsonInferSchema, JSONOptions}
 import org.apache.spark.sql.catalyst.util.{ArrayData, FailFastMode, FailureSafeParser, MapData, PermissiveMode}
@@ -31,34 +35,79 @@ import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, St
 import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
 import org.apache.spark.util.Utils
 
-object JsonExpressionEvalUtils {
+private[this] sealed trait PathInstruction
+private[this] object PathInstruction {
+  private[expressions] case object Subscript extends PathInstruction
+  private[expressions] case object Wildcard extends PathInstruction
+  private[expressions] case object Key extends PathInstruction
+  private[expressions] case class Index(index: Long) extends PathInstruction
+  private[expressions] case class Named(name: String) extends PathInstruction
+}
 
-  def schemaOfJson(
-      jsonFactory: JsonFactory,
-      jsonOptions: JSONOptions,
-      jsonInferSchema: JsonInferSchema,
-      json: UTF8String): UTF8String = {
-    val dt = Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, json)) { parser =>
-      parser.nextToken()
-      // To match with schema inference from JSON datasource.
-      jsonInferSchema.inferField(parser) match {
-        case st: StructType =>
-          jsonInferSchema.canonicalizeType(st, jsonOptions).getOrElse(StructType(Nil))
-        case at: ArrayType if at.elementType.isInstanceOf[StructType] =>
-          jsonInferSchema
-            .canonicalizeType(at.elementType, jsonOptions)
-            .map(ArrayType(_, containsNull = at.containsNull))
-            .getOrElse(ArrayType(StructType(Nil), containsNull = at.containsNull))
-        case other: DataType =>
-          jsonInferSchema.canonicalizeType(other, jsonOptions).getOrElse(
-            SQLConf.get.defaultStringType)
-      }
+private[this] sealed trait WriteStyle
+private[this] object WriteStyle {
+  private[expressions] case object RawStyle extends WriteStyle
+  private[expressions] case object QuotedStyle extends WriteStyle
+  private[expressions] case object FlattenStyle extends WriteStyle
+}
+
+private[this] object JsonPathParser extends RegexParsers {
+  import PathInstruction._
+
+  def root: Parser[Char] = '$'
+
+  def long: Parser[Long] = "\\d+".r ^? {
+    case x => x.toLong
+  }
+
+  // parse `[*]` and `[123]` subscripts
+  def subscript: Parser[List[PathInstruction]] =
+    for {
+      operand <- '[' ~> ('*' ^^^ Wildcard | long ^^ Index) <~ ']'
+    } yield {
+      Subscript :: operand :: Nil
     }
 
-    UTF8String.fromString(dt.sql)
+  // parse `.name` or `['name']` child expressions
+  def named: Parser[List[PathInstruction]] =
+    for {
+      name <- '.' ~> "[^\\.\\[]+".r | "['" ~> "[^\\']+".r <~ "']"
+    } yield {
+      Key :: Named(name) :: Nil
+    }
+
+  // child wildcards: `..`, `.*` or `['*']`
+  def wildcard: Parser[List[PathInstruction]] =
+    (".*" | "['*']") ^^^ List(Wildcard)
+
+  def node: Parser[List[PathInstruction]] =
+    wildcard |
+      named |
+      subscript
+
+  val expression: Parser[List[PathInstruction]] = {
+    phrase(root ~> rep(node) ^^ (x => x.flatten))
+  }
+
+  def parse(str: String): Option[List[PathInstruction]] = {
+    this.parseAll(expression, str) match {
+      case Success(result, _) =>
+        Some(result)
+
+      case _ =>
+        None
+    }
   }
 }
 
+private[this] object SharedFactory {
+  val jsonFactory: JsonFactory = new JsonFactoryBuilder()
+    // The two options below enabled for Hive compatibility
+    .enable(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS)
+    .enable(JsonReadFeature.ALLOW_SINGLE_QUOTES)
+    .build()
+}
+
 case class JsonToStructsEvaluator(
     options: Map[String, String],
     nullableSchema: DataType,
@@ -103,6 +152,7 @@ case class JsonToStructsEvaluator(
   }
 
   final def evaluate(json: UTF8String): Any = {
+    if (json == null) return null
     nullableSchema match {
       case _: VariantType =>
         VariantExpressionEvalUtils.parseJson(json,
@@ -159,3 +209,370 @@ case class StructsToJsonEvaluator(
     converter(value)
   }
 }
+
+case class SchemaOfJsonEvaluator(options: Map[String, String]) {
+  @transient
+  private lazy val jsonOptions = new JSONOptions(options, "UTC")
+
+  @transient
+  private lazy val jsonFactory = jsonOptions.buildJsonFactory()
+
+  @transient
+  private lazy val jsonInferSchema = new JsonInferSchema(jsonOptions)
+
+  final def evaluate(json: UTF8String): Any = {
+    val dt = Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, json)) { parser =>
+      parser.nextToken()
+      // To match with schema inference from JSON datasource.
+      jsonInferSchema.inferField(parser) match {
+        case st: StructType =>
+          jsonInferSchema.canonicalizeType(st, jsonOptions).getOrElse(StructType(Nil))
+        case at: ArrayType if at.elementType.isInstanceOf[StructType] =>
+          jsonInferSchema
+            .canonicalizeType(at.elementType, jsonOptions)
+            .map(ArrayType(_, containsNull = at.containsNull))
+            .getOrElse(ArrayType(StructType(Nil), containsNull = at.containsNull))
+        case other: DataType =>
+          jsonInferSchema.canonicalizeType(other, jsonOptions).getOrElse(
+            SQLConf.get.defaultStringType)
+      }
+    }
+
+    UTF8String.fromString(dt.sql)
+  }
+}
+
+/**
+ * The expression `JsonTuple` will utilize it to support codegen.
+ */
+case class JsonTupleEvaluator(foldableFieldNames: Array[Option[String]]) {
+
+  import SharedFactory._
+
+  // If processing fails this shared value will be returned.
+  @transient private lazy val nullRow: Seq[InternalRow] =
+    new GenericInternalRow(Array.ofDim[Any](foldableFieldNames.length)) :: Nil
+
+  // And count the number of foldable fields, we'll use this later to optimize evaluation.
+  @transient private lazy val constantFields: Int = foldableFieldNames.count(_ != null)
+
+  private def getFieldNameStrings(fields: Array[UTF8String]): Array[String] = {
+    // Evaluate the field names as String rather than UTF8String to
+    // optimize lookups from the json token, which is also a String.
+    if (constantFields == fields.length) {
+      // Typically the user will provide the field names as foldable expressions
+      // so we can use the cached copy.
+      foldableFieldNames.map(_.orNull)
+    } else if (constantFields == 0) {
+      // None are foldable so all field names need to be evaluated from the input row.
+      fields.map { f => if (f != null) f.toString else null }
+    } else {
+      // If there is a mix of constant and non-constant expressions
+      // prefer the cached copy when available.
+      foldableFieldNames.zip(fields).map {
+        case (null, f) => if (f != null) f.toString else null
+        case (fieldName, _) => fieldName.orNull
+      }
+    }
+  }
+
+  private def parseRow(parser: JsonParser, fieldNames: Array[String]): Seq[InternalRow] = {
+    // Only objects are supported.
+    if (parser.nextToken() != JsonToken.START_OBJECT) return nullRow
+
+    val row = Array.ofDim[Any](fieldNames.length)
+
+    // Start reading through the token stream, looking for any requested field names.
+    while (parser.nextToken() != JsonToken.END_OBJECT) {
+      if (parser.getCurrentToken == JsonToken.FIELD_NAME) {
+        // Check to see if this field is desired in the output.
+        val jsonField = parser.currentName
+        var idx = fieldNames.indexOf(jsonField)
+        if (idx >= 0) {
+          // It is, copy the child tree to the correct location in the output row.
+          val output = new ByteArrayOutputStream()
+
+          // Write the output directly to UTF8 encoded byte array.
+          if (parser.nextToken() != JsonToken.VALUE_NULL) {
+            Utils.tryWithResource(jsonFactory.createGenerator(output, JsonEncoding.UTF8)) {
+              generator => copyCurrentStructure(generator, parser)
+            }
+
+            val jsonValue = UTF8String.fromBytes(output.toByteArray)
+
+            // SPARK-21804: json_tuple returns null values within repeated columns
+            // except the first one; so that we need to check the remaining fields.
+            do {
+              row(idx) = jsonValue
+              idx = fieldNames.indexOf(jsonField, idx + 1)
+            } while (idx >= 0)
+          }
+        }
+      }
+
+      // Always skip children, it's cheap enough to do even if copyCurrentStructure was called.
+      parser.skipChildren()
+    }
+    new GenericInternalRow(row) :: Nil
+  }
+
+  private def copyCurrentStructure(generator: JsonGenerator, parser: JsonParser): Unit = {
+    parser.getCurrentToken match {
+      // If the user requests a string field it needs to be returned without enclosing
+      // quotes which is accomplished via JsonGenerator.writeRaw instead of JsonGenerator.write.
+      case JsonToken.VALUE_STRING if parser.hasTextCharacters =>
+        // Slight optimization to avoid allocating a String instance, though the characters
+        // still have to be decoded... Jackson doesn't have a way to access the raw bytes.
+        generator.writeRaw(parser.getTextCharacters, parser.getTextOffset, parser.getTextLength)
+
+      case JsonToken.VALUE_STRING =>
+        // The normal String case, pass it through to the output without enclosing quotes.
+        generator.writeRaw(parser.getText)
+
+      case JsonToken.VALUE_NULL =>
+        // A special case that needs to be handled outside of this method.
+        // If a requested field is null, the result must be null. The easiest
+        // way to achieve this is just by ignoring null tokens entirely.
+        throw SparkException.internalError("Do not attempt to copy a null field.")
+
+      case _ =>
+        // Handle other types including objects, arrays, booleans and numbers.
+        generator.copyCurrentStructure(parser)
+    }
+  }
+
+  final def evaluate(json: UTF8String, fieldNames: Array[UTF8String]): IterableOnce[InternalRow] = {
+    if (json == null) return nullRow
+    try {
+      /* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
+      detect character encoding which could fail for some malformed strings. */
+      Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, json)) { parser =>
+        parseRow(parser, getFieldNameStrings(fieldNames))
+      }
+    } catch {
+      case _: JsonProcessingException => nullRow
+    }
+  }
+}
+
+/**
+ * The expression `GetJsonObject` will utilize it to support codegen.
+ */
+case class GetJsonObjectEvaluator(cachedPath: UTF8String) {
+  import com.fasterxml.jackson.core.JsonToken._
+  import PathInstruction._
+  import SharedFactory._
+  import WriteStyle._
+
+  def this() = this(null)
+
+  @transient
+  private lazy val parsedPath: Option[List[PathInstruction]] = parsePath(cachedPath)
+
+  @transient
+  private var jsonStr: UTF8String = _
+
+  @transient
+  private var pathStr: UTF8String = _
+
+  def setJson(arg: UTF8String): Unit = {
+    jsonStr = arg
+  }
+
+  def setPath(arg: UTF8String): Unit = {
+    pathStr = arg
+  }
+
+  def evaluate(): Any = {
+    if (jsonStr == null) return null
+
+    val parsed = if (cachedPath != null) {
+      parsedPath
+    } else {
+      parsePath(pathStr)
+    }
+
+    if (parsed.isDefined) {
+      try {
+        /* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
+          detect character encoding which could fail for some malformed strings */
+        Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, jsonStr)) { parser =>
+          val output = new ByteArrayOutputStream()
+          val matched = Utils.tryWithResource(
+            jsonFactory.createGenerator(output, JsonEncoding.UTF8)) { generator =>
+            parser.nextToken()
+            evaluatePath(parser, generator, RawStyle, parsed.get)
+          }
+          if (matched) {
+            UTF8String.fromBytes(output.toByteArray)
+          } else {
+            null
+          }
+        }
+      } catch {
+        case _: JsonProcessingException => null
+      }
+    } else {
+      null
+    }
+  }
+
+  private def parsePath(path: UTF8String): Option[List[PathInstruction]] = {
+    if (path != null) {
+      JsonPathParser.parse(path.toString)
+    } else {
+      None
+    }
+  }
+
+  // advance to the desired array index, assumes to start at the START_ARRAY token
+  private def arrayIndex(p: JsonParser, f: () => Boolean): Long => Boolean = {
+    case _ if p.getCurrentToken == END_ARRAY =>
+      // terminate, nothing has been written
+      false
+
+    case 0 =>
+      // we've reached the desired index
+      val dirty = f()
+
+      while (p.nextToken() != END_ARRAY) {
+        // advance the token stream to the end of the array
+        p.skipChildren()
+      }
+
+      dirty
+
+    case i if i > 0 =>
+      // skip this token and evaluate the next
+      p.skipChildren()
+      p.nextToken()
+      arrayIndex(p, f)(i - 1)
+  }
+
+  /**
+   * Evaluate a list of JsonPath instructions, returning a bool that indicates if any leaf nodes
+   * have been written to the generator
+   */
+  private def evaluatePath(
+      p: JsonParser,
+      g: JsonGenerator,
+      style: WriteStyle,
+      path: List[PathInstruction]): Boolean = {
+    (p.getCurrentToken, path) match {
+      case (VALUE_STRING, Nil) if style == RawStyle =>
+        // there is no array wildcard or slice parent, emit this string without quotes
+        if (p.hasTextCharacters) {
+          g.writeRaw(p.getTextCharacters, p.getTextOffset, p.getTextLength)
+        } else {
+          g.writeRaw(p.getText)
+        }
+        true
+
+      case (START_ARRAY, Nil) if style == FlattenStyle =>
+        // flatten this array into the parent
+        var dirty = false
+        while (p.nextToken() != END_ARRAY) {
+          dirty |= evaluatePath(p, g, style, Nil)
+        }
+        dirty
+
+      case (_, Nil) =>
+        // general case: just copy the child tree verbatim
+        g.copyCurrentStructure(p)
+        true
+
+      case (START_OBJECT, Key :: xs) =>
+        var dirty = false
+        while (p.nextToken() != END_OBJECT) {
+          if (dirty) {
+            // once a match has been found we can skip other fields
+            p.skipChildren()
+          } else {
+            dirty = evaluatePath(p, g, style, xs)
+          }
+        }
+        dirty
+
+      case (START_ARRAY, Subscript :: Wildcard :: Subscript :: Wildcard :: xs) =>
+        // special handling for the non-structure preserving double wildcard behavior in Hive
+        var dirty = false
+        g.writeStartArray()
+        while (p.nextToken() != END_ARRAY) {
+          dirty |= evaluatePath(p, g, FlattenStyle, xs)
+        }
+        g.writeEndArray()
+        dirty
+
+      case (START_ARRAY, Subscript :: Wildcard :: xs) if style != QuotedStyle =>
+        // retain Flatten, otherwise use Quoted... cannot use Raw within an array
+        val nextStyle = style match {
+          case RawStyle => QuotedStyle
+          case FlattenStyle => FlattenStyle
+          case QuotedStyle => throw SparkException.internalError("Unexpected the quoted style.")
+        }
+
+        // temporarily buffer child matches, the emitted json will need to be
+        // modified slightly if there is only a single element written
+        val buffer = new StringWriter()
+
+        var dirty = 0
+        Utils.tryWithResource(jsonFactory.createGenerator(buffer)) { flattenGenerator =>
+          flattenGenerator.writeStartArray()
+
+          while (p.nextToken() != END_ARRAY) {
+            // track the number of array elements and only emit an outer array if
+            // we've written more than one element, this matches Hive's behavior
+            dirty += (if (evaluatePath(p, flattenGenerator, nextStyle, xs)) 1 else 0)
+          }
+          flattenGenerator.writeEndArray()
+        }
+
+        val buf = buffer.getBuffer
+        if (dirty > 1) {
+          g.writeRawValue(buf.toString)
+        } else if (dirty == 1) {
+          // remove outer array tokens
+          g.writeRawValue(buf.substring(1, buf.length() - 1))
+        } // else do not write anything
+
+        dirty > 0
+
+      case (START_ARRAY, Subscript :: Wildcard :: xs) =>
+        var dirty = false
+        g.writeStartArray()
+        while (p.nextToken() != END_ARRAY) {
+          // wildcards can have multiple matches, continually update the dirty count
+          dirty |= evaluatePath(p, g, QuotedStyle, xs)
+        }
+        g.writeEndArray()
+
+        dirty
+
+      case (START_ARRAY, Subscript :: Index(idx) :: (xs@Subscript :: Wildcard :: _)) =>
+        p.nextToken()
+        // we're going to have 1 or more results, switch to QuotedStyle
+        arrayIndex(p, () => evaluatePath(p, g, QuotedStyle, xs))(idx)
+
+      case (START_ARRAY, Subscript :: Index(idx) :: xs) =>
+        p.nextToken()
+        arrayIndex(p, () => evaluatePath(p, g, style, xs))(idx)
+
+      case (FIELD_NAME, Named(name) :: xs) if p.currentName == name =>
+        // exact field match
+        if (p.nextToken() != JsonToken.VALUE_NULL) {
+          evaluatePath(p, g, style, xs)
+        } else {
+          false
+        }
+
+      case (FIELD_NAME, Wildcard :: xs) =>
+        // wildcard field match
+        p.nextToken()
+        evaluatePath(p, g, style, xs)
+
+      case _ =>
+        p.skipChildren()
+        false
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index ac6c233f7d2ea..e80f543f14eda 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -17,20 +17,12 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.io._
-
-import scala.util.parsing.combinator.RegexParsers
-
-import com.fasterxml.jackson.core._
-import com.fasterxml.jackson.core.json.JsonReadFeature
-
-import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, CodegenFallback, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
-import org.apache.spark.sql.catalyst.expressions.json.{JsonExpressionEvalUtils, JsonExpressionUtils, JsonToStructsEvaluator, StructsToJsonEvaluator}
+import org.apache.spark.sql.catalyst.expressions.json.{GetJsonObjectEvaluator, JsonExpressionUtils, JsonToStructsEvaluator, JsonTupleEvaluator, SchemaOfJsonEvaluator, StructsToJsonEvaluator}
 import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke}
 import org.apache.spark.sql.catalyst.json._
 import org.apache.spark.sql.catalyst.trees.TreePattern.{JSON_TO_STRUCT, RUNTIME_REPLACEABLE, TreePattern}
@@ -39,80 +31,6 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
-import org.apache.spark.util.Utils
-
-private[this] sealed trait PathInstruction
-private[this] object PathInstruction {
-  private[expressions] case object Subscript extends PathInstruction
-  private[expressions] case object Wildcard extends PathInstruction
-  private[expressions] case object Key extends PathInstruction
-  private[expressions] case class Index(index: Long) extends PathInstruction
-  private[expressions] case class Named(name: String) extends PathInstruction
-}
-
-private[this] sealed trait WriteStyle
-private[this] object WriteStyle {
-  private[expressions] case object RawStyle extends WriteStyle
-  private[expressions] case object QuotedStyle extends WriteStyle
-  private[expressions] case object FlattenStyle extends WriteStyle
-}
-
-private[this] object JsonPathParser extends RegexParsers {
-  import PathInstruction._
-
-  def root: Parser[Char] = '$'
-
-  def long: Parser[Long] = "\\d+".r ^? {
-    case x => x.toLong
-  }
-
-  // parse `[*]` and `[123]` subscripts
-  def subscript: Parser[List[PathInstruction]] =
-    for {
-      operand <- '[' ~> ('*' ^^^ Wildcard | long ^^ Index) <~ ']'
-    } yield {
-      Subscript :: operand :: Nil
-    }
-
-  // parse `.name` or `['name']` child expressions
-  def named: Parser[List[PathInstruction]] =
-    for {
-      name <- '.' ~> "[^\\.\\[]+".r | "['" ~> "[^\\']+".r <~ "']"
-    } yield {
-      Key :: Named(name) :: Nil
-    }
-
-  // child wildcards: `..`, `.*` or `['*']`
-  def wildcard: Parser[List[PathInstruction]] =
-    (".*" | "['*']") ^^^ List(Wildcard)
-
-  def node: Parser[List[PathInstruction]] =
-    wildcard |
-      named |
-      subscript
-
-  val expression: Parser[List[PathInstruction]] = {
-    phrase(root ~> rep(node) ^^ (x => x.flatten))
-  }
-
-  def parse(str: String): Option[List[PathInstruction]] = {
-    this.parseAll(expression, str) match {
-      case Success(result, _) =>
-        Some(result)
-
-      case _ =>
-        None
-    }
-  }
-}
-
-private[this] object SharedFactory {
-  val jsonFactory = new JsonFactoryBuilder()
-    // The two options below enabled for Hive compatibility
-    .enable(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS)
-    .enable(JsonReadFeature.ALLOW_SINGLE_QUOTES)
-    .build()
-}
 
 /**
  * Extracts json object from a json string based on json path specified, and returns json string
@@ -133,7 +51,9 @@ case class GetJsonObject(json: Expression, path: Expression)
   override def left: Expression = json
   override def right: Expression = path
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeWithCollation, StringTypeWithCollation)
+    Seq(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true))
   override def dataType: DataType = SQLConf.get.defaultStringType
   override def nullable: Boolean = true
   override def prettyName: String = "get_json_object"
@@ -211,228 +131,6 @@ case class GetJsonObject(json: Expression, path: Expression)
     copy(json = newLeft, path = newRight)
 }
 
-class GetJsonObjectEvaluator(cachedPath: UTF8String) {
-  import com.fasterxml.jackson.core.JsonToken._
-  import PathInstruction._
-  import SharedFactory._
-  import WriteStyle._
-
-  def this() = this(null)
-
-  @transient
-  private lazy val parsedPath: Option[List[PathInstruction]] =
-    parsePath(cachedPath)
-
-  @transient
-  private var jsonStr: UTF8String = null
-
-  @transient
-  private var pathStr: UTF8String = null
-
-  def setJson(arg: UTF8String): Unit = {
-    jsonStr = arg
-  }
-
-  def setPath(arg: UTF8String): Unit = {
-    pathStr = arg
-  }
-
-  def evaluate(): Any = {
-    if (jsonStr == null) {
-      return null
-    }
-
-    val parsed = if (cachedPath != null) {
-      parsedPath
-    } else {
-      parsePath(pathStr)
-    }
-
-    if (parsed.isDefined) {
-      try {
-        /* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
-          detect character encoding which could fail for some malformed strings */
-        Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, jsonStr)) { parser =>
-          val output = new ByteArrayOutputStream()
-          val matched = Utils.tryWithResource(
-            jsonFactory.createGenerator(output, JsonEncoding.UTF8)) { generator =>
-            parser.nextToken()
-            evaluatePath(parser, generator, RawStyle, parsed.get)
-          }
-          if (matched) {
-            UTF8String.fromBytes(output.toByteArray)
-          } else {
-            null
-          }
-        }
-      } catch {
-        case _: JsonProcessingException => null
-      }
-    } else {
-      null
-    }
-  }
-
-  private def parsePath(path: UTF8String): Option[List[PathInstruction]] = {
-    if (path != null) {
-      JsonPathParser.parse(path.toString)
-    } else {
-      None
-    }
-  }
-
-  // advance to the desired array index, assumes to start at the START_ARRAY token
-  private def arrayIndex(p: JsonParser, f: () => Boolean): Long => Boolean = {
-    case _ if p.getCurrentToken == END_ARRAY =>
-      // terminate, nothing has been written
-      false
-
-    case 0 =>
-      // we've reached the desired index
-      val dirty = f()
-
-      while (p.nextToken() != END_ARRAY) {
-        // advance the token stream to the end of the array
-        p.skipChildren()
-      }
-
-      dirty
-
-    case i if i > 0 =>
-      // skip this token and evaluate the next
-      p.skipChildren()
-      p.nextToken()
-      arrayIndex(p, f)(i - 1)
-  }
-
-  /**
-   * Evaluate a list of JsonPath instructions, returning a bool that indicates if any leaf nodes
-   * have been written to the generator
-   */
-  private def evaluatePath(
-      p: JsonParser,
-      g: JsonGenerator,
-      style: WriteStyle,
-      path: List[PathInstruction]): Boolean = {
-    (p.getCurrentToken, path) match {
-      case (VALUE_STRING, Nil) if style == RawStyle =>
-        // there is no array wildcard or slice parent, emit this string without quotes
-        if (p.hasTextCharacters) {
-          g.writeRaw(p.getTextCharacters, p.getTextOffset, p.getTextLength)
-        } else {
-          g.writeRaw(p.getText)
-        }
-        true
-
-      case (START_ARRAY, Nil) if style == FlattenStyle =>
-        // flatten this array into the parent
-        var dirty = false
-        while (p.nextToken() != END_ARRAY) {
-          dirty |= evaluatePath(p, g, style, Nil)
-        }
-        dirty
-
-      case (_, Nil) =>
-        // general case: just copy the child tree verbatim
-        g.copyCurrentStructure(p)
-        true
-
-      case (START_OBJECT, Key :: xs) =>
-        var dirty = false
-        while (p.nextToken() != END_OBJECT) {
-          if (dirty) {
-            // once a match has been found we can skip other fields
-            p.skipChildren()
-          } else {
-            dirty = evaluatePath(p, g, style, xs)
-          }
-        }
-        dirty
-
-      case (START_ARRAY, Subscript :: Wildcard :: Subscript :: Wildcard :: xs) =>
-        // special handling for the non-structure preserving double wildcard behavior in Hive
-        var dirty = false
-        g.writeStartArray()
-        while (p.nextToken() != END_ARRAY) {
-          dirty |= evaluatePath(p, g, FlattenStyle, xs)
-        }
-        g.writeEndArray()
-        dirty
-
-      case (START_ARRAY, Subscript :: Wildcard :: xs) if style != QuotedStyle =>
-        // retain Flatten, otherwise use Quoted... cannot use Raw within an array
-        val nextStyle = style match {
-          case RawStyle => QuotedStyle
-          case FlattenStyle => FlattenStyle
-          case QuotedStyle => throw SparkException.internalError("Unexpected the quoted style.")
-        }
-
-        // temporarily buffer child matches, the emitted json will need to be
-        // modified slightly if there is only a single element written
-        val buffer = new StringWriter()
-
-        var dirty = 0
-        Utils.tryWithResource(jsonFactory.createGenerator(buffer)) { flattenGenerator =>
-          flattenGenerator.writeStartArray()
-
-          while (p.nextToken() != END_ARRAY) {
-            // track the number of array elements and only emit an outer array if
-            // we've written more than one element, this matches Hive's behavior
-            dirty += (if (evaluatePath(p, flattenGenerator, nextStyle, xs)) 1 else 0)
-          }
-          flattenGenerator.writeEndArray()
-        }
-
-        val buf = buffer.getBuffer
-        if (dirty > 1) {
-          g.writeRawValue(buf.toString)
-        } else if (dirty == 1) {
-          // remove outer array tokens
-          g.writeRawValue(buf.substring(1, buf.length() - 1))
-        } // else do not write anything
-
-        dirty > 0
-
-      case (START_ARRAY, Subscript :: Wildcard :: xs) =>
-        var dirty = false
-        g.writeStartArray()
-        while (p.nextToken() != END_ARRAY) {
-          // wildcards can have multiple matches, continually update the dirty count
-          dirty |= evaluatePath(p, g, QuotedStyle, xs)
-        }
-        g.writeEndArray()
-
-        dirty
-
-      case (START_ARRAY, Subscript :: Index(idx) :: (xs@Subscript :: Wildcard :: _)) =>
-        p.nextToken()
-        // we're going to have 1 or more results, switch to QuotedStyle
-        arrayIndex(p, () => evaluatePath(p, g, QuotedStyle, xs))(idx)
-
-      case (START_ARRAY, Subscript :: Index(idx) :: xs) =>
-        p.nextToken()
-        arrayIndex(p, () => evaluatePath(p, g, style, xs))(idx)
-
-      case (FIELD_NAME, Named(name) :: xs) if p.currentName == name =>
-        // exact field match
-        if (p.nextToken() != JsonToken.VALUE_NULL) {
-          evaluatePath(p, g, style, xs)
-        } else {
-          false
-        }
-
-      case (FIELD_NAME, Wildcard :: xs) =>
-        // wildcard field match
-        p.nextToken()
-        evaluatePath(p, g, style, xs)
-
-      case _ =>
-        p.skipChildren()
-        false
-    }
-  }
-}
-
 // scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = "_FUNC_(jsonStr, p1, p2, ..., pn) - Returns a tuple like the function get_json_object, but it takes multiple names. All the input parameters and output column types are string.",
@@ -446,37 +144,27 @@ class GetJsonObjectEvaluator(cachedPath: UTF8String) {
 // scalastyle:on line.size.limit line.contains.tab
 case class JsonTuple(children: Seq[Expression])
   extends Generator
-  with CodegenFallback
   with QueryErrorsBase {
 
-  import SharedFactory._
-
   override def nullable: Boolean = {
-    // a row is always returned
+    // A row is always returned.
     false
   }
 
-  // if processing fails this shared value will be returned
-  @transient private lazy val nullRow: Seq[InternalRow] =
-    new GenericInternalRow(Array.ofDim[Any](fieldExpressions.length)) :: Nil
-
-  // the json body is the first child
+  // The json body is the first child.
   @transient private lazy val jsonExpr: Expression = children.head
 
-  // the fields to query are the remaining children
+  // The fields to query are the remaining children.
   @transient private lazy val fieldExpressions: Seq[Expression] = children.tail
 
-  // eagerly evaluate any foldable the field names
-  @transient private lazy val foldableFieldNames: IndexedSeq[Option[String]] = {
+  // Eagerly evaluate any foldable the field names.
+  @transient private lazy val foldableFieldNames: Array[Option[String]] = {
     fieldExpressions.map {
       case expr if expr.foldable => Option(expr.eval()).map(_.asInstanceOf[UTF8String].toString)
       case _ => null
-    }.toIndexedSeq
+    }.toArray
   }
 
-  // and count the number of foldable fields, we'll use this later to optimize evaluation
-  @transient private lazy val constantFields: Int = foldableFieldNames.count(_ != null)
-
   override def elementSchema: StructType = StructType(fieldExpressions.zipWithIndex.map {
     case (_, idx) => StructField(s"c$idx", children.head.dataType, nullable = true)
   })
@@ -490,7 +178,8 @@ case class JsonTuple(children: Seq[Expression])
       )
     } else if (
       children.forall(
-        child => StringTypeWithCollation.acceptsType(child.dataType))) {
+        child => StringTypeWithCollation(supportsTrimCollation = true)
+          .acceptsType(child.dataType))) {
       TypeCheckResult.TypeCheckSuccess
     } else {
       DataTypeMismatch(
@@ -499,111 +188,41 @@ case class JsonTuple(children: Seq[Expression])
     }
   }
 
+  @transient
+  private lazy val evaluator: JsonTupleEvaluator = JsonTupleEvaluator(foldableFieldNames)
+
   override def eval(input: InternalRow): IterableOnce[InternalRow] = {
     val json = jsonExpr.eval(input).asInstanceOf[UTF8String]
-    if (json == null) {
-      return nullRow
-    }
-
-    try {
-      /* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson
-      detect character encoding which could fail for some malformed strings */
-      Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, json)) { parser =>
-        parseRow(parser, input)
-      }
-    } catch {
-      case _: JsonProcessingException =>
-        nullRow
-    }
+    val filedNames = fieldExpressions.map(_.eval(input).asInstanceOf[UTF8String]).toArray
+    evaluator.evaluate(json, filedNames)
   }
 
-  private def parseRow(parser: JsonParser, input: InternalRow): Seq[InternalRow] = {
-    // only objects are supported
-    if (parser.nextToken() != JsonToken.START_OBJECT) {
-      return nullRow
-    }
-
-    // evaluate the field names as String rather than UTF8String to
-    // optimize lookups from the json token, which is also a String
-    val fieldNames = if (constantFields == fieldExpressions.length) {
-      // typically the user will provide the field names as foldable expressions
-      // so we can use the cached copy
-      foldableFieldNames.map(_.orNull)
-    } else if (constantFields == 0) {
-      // none are foldable so all field names need to be evaluated from the input row
-      fieldExpressions.map { expr =>
-        Option(expr.eval(input)).map(_.asInstanceOf[UTF8String].toString).orNull
-      }
-    } else {
-      // if there is a mix of constant and non-constant expressions
-      // prefer the cached copy when available
-      foldableFieldNames.zip(fieldExpressions).map {
-        case (null, expr) =>
-          Option(expr.eval(input)).map(_.asInstanceOf[UTF8String].toString).orNull
-        case (fieldName, _) => fieldName.orNull
-      }
-    }
-
-    val row = Array.ofDim[Any](fieldNames.length)
-
-    // start reading through the token stream, looking for any requested field names
-    while (parser.nextToken() != JsonToken.END_OBJECT) {
-      if (parser.getCurrentToken == JsonToken.FIELD_NAME) {
-        // check to see if this field is desired in the output
-        val jsonField = parser.currentName
-        var idx = fieldNames.indexOf(jsonField)
-        if (idx >= 0) {
-          // it is, copy the child tree to the correct location in the output row
-          val output = new ByteArrayOutputStream()
-
-          // write the output directly to UTF8 encoded byte array
-          if (parser.nextToken() != JsonToken.VALUE_NULL) {
-            Utils.tryWithResource(jsonFactory.createGenerator(output, JsonEncoding.UTF8)) {
-              generator => copyCurrentStructure(generator, parser)
-            }
-
-            val jsonValue = UTF8String.fromBytes(output.toByteArray)
-
-            // SPARK-21804: json_tuple returns null values within repeated columns
-            // except the first one; so that we need to check the remaining fields.
-            do {
-              row(idx) = jsonValue
-              idx = fieldNames.indexOf(jsonField, idx + 1)
-            } while (idx >= 0)
-          }
-        }
-      }
-
-      // always skip children, it's cheap enough to do even if copyCurrentStructure was called
-      parser.skipChildren()
-    }
-
-    new GenericInternalRow(row) :: Nil
-  }
-
-  private def copyCurrentStructure(generator: JsonGenerator, parser: JsonParser): Unit = {
-    parser.getCurrentToken match {
-      // if the user requests a string field it needs to be returned without enclosing
-      // quotes which is accomplished via JsonGenerator.writeRaw instead of JsonGenerator.write
-      case JsonToken.VALUE_STRING if parser.hasTextCharacters =>
-        // slight optimization to avoid allocating a String instance, though the characters
-        // still have to be decoded... Jackson doesn't have a way to access the raw bytes
-        generator.writeRaw(parser.getTextCharacters, parser.getTextOffset, parser.getTextLength)
-
-      case JsonToken.VALUE_STRING =>
-        // the normal String case, pass it through to the output without enclosing quotes
-        generator.writeRaw(parser.getText)
-
-      case JsonToken.VALUE_NULL =>
-        // a special case that needs to be handled outside of this method.
-        // if a requested field is null, the result must be null. the easiest
-        // way to achieve this is just by ignoring null tokens entirely
-        throw SparkException.internalError("Do not attempt to copy a null field.")
-
-      case _ =>
-        // handle other types including objects, arrays, booleans and numbers
-        generator.copyCurrentStructure(parser)
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val refEvaluator = ctx.addReferenceObj("evaluator", evaluator)
+    val jsonEval = jsonExpr.genCode(ctx)
+    val filedNamesTerm = ctx.freshName("fieldNames")
+    val fieldNamesEval = fieldExpressions.map(_.genCode(ctx))
+    val wrapperClass = classOf[IterableOnce[_]].getName
+    val setFieldNames = fieldNamesEval.zipWithIndex.map {
+      case (fieldNameEval, idx) =>
+        s"""
+           |if (${fieldNameEval.isNull}) {
+           |  $filedNamesTerm[$idx] = null;
+           |} else {
+           |  $filedNamesTerm[$idx] = ${fieldNameEval.value};
+           |}
+           |""".stripMargin
     }
+    ev.copy(code =
+      code"""
+         |UTF8String[] $filedNamesTerm = new UTF8String[${fieldExpressions.length}];
+         |${jsonEval.code}
+         |${fieldNamesEval.map(_.code).mkString("\n")}
+         |${setFieldNames.mkString("\n")}
+         |boolean ${ev.isNull} = false;
+         |$wrapperClass<InternalRow> ${ev.value} =
+         |  $refEvaluator.evaluate(${jsonEval.value}, $filedNamesTerm);
+         |""".stripMargin)
   }
 
   override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): JsonTuple =
@@ -636,9 +255,8 @@ case class JsonToStructs(
     timeZoneId: Option[String] = None,
     variantAllowDuplicateKeys: Boolean = SQLConf.get.getConf(SQLConf.VARIANT_ALLOW_DUPLICATE_KEYS))
   extends UnaryExpression
-  with RuntimeReplaceable
-  with ExpectsInputTypes
   with TimeZoneAwareExpression
+  with ExpectsInputTypes
   with QueryErrorsBase {
 
   // The JSON input data might be missing certain fields. We force the nullability
@@ -648,7 +266,9 @@ case class JsonToStructs(
 
   override def nullable: Boolean = true
 
-  override def nodePatternsInternal(): Seq[TreePattern] = Seq(JSON_TO_STRUCT, RUNTIME_REPLACEABLE)
+  final override def nodePatternsInternal(): Seq[TreePattern] = Seq(JSON_TO_STRUCT)
+
+  override def nullIntolerant: Boolean = true
 
   // Used in `FunctionRegistry`
   def this(child: Expression, schema: Expression, options: Map[String, String]) =
@@ -682,7 +302,34 @@ case class JsonToStructs(
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
 
-  override def inputTypes: Seq[AbstractDataType] = StringTypeWithCollation :: Nil
+  @transient
+  private val nameOfCorruptRecord = SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
+
+  @transient
+  private lazy val evaluator = new JsonToStructsEvaluator(
+    options, nullableSchema, nameOfCorruptRecord, timeZoneId, variantAllowDuplicateKeys)
+
+  override def nullSafeEval(json: Any): Any = evaluator.evaluate(json.asInstanceOf[UTF8String])
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val refEvaluator = ctx.addReferenceObj("evaluator", evaluator)
+    val eval = child.genCode(ctx)
+    val resultType = CodeGenerator.boxedType(dataType)
+    val resultTerm = ctx.freshName("result")
+    ev.copy(code =
+      code"""
+         |${eval.code}
+         |$resultType $resultTerm = ($resultType) $refEvaluator.evaluate(${eval.value});
+         |boolean ${ev.isNull} = $resultTerm == null;
+         |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+         |if (!${ev.isNull}) {
+         |  ${ev.value} = $resultTerm;
+         |}
+         |""".stripMargin)
+  }
+
+  override def inputTypes: Seq[AbstractDataType] =
+    StringTypeWithCollation(supportsTrimCollation = true) :: Nil
 
   override def sql: String = schema match {
     case _: MapType => "entries"
@@ -691,21 +338,6 @@ case class JsonToStructs(
 
   override def prettyName: String = "from_json"
 
-  @transient
-  private val nameOfCorruptRecord = SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
-
-  @transient
-  lazy val evaluator: JsonToStructsEvaluator = JsonToStructsEvaluator(
-    options, nullableSchema, nameOfCorruptRecord, timeZoneId, variantAllowDuplicateKeys)
-
-  override def replacement: Expression = Invoke(
-    Literal.create(evaluator, ObjectType(classOf[JsonToStructsEvaluator])),
-    "evaluate",
-    dataType,
-    Seq(child),
-    Seq(child.dataType)
-  )
-
   override protected def withNewChildInternal(newChild: Expression): JsonToStructs =
     copy(child = newChild)
 }
@@ -833,15 +465,6 @@ case class SchemaOfJson(
 
   override def nullable: Boolean = false
 
-  @transient
-  private lazy val jsonOptions = new JSONOptions(options, "UTC")
-
-  @transient
-  private lazy val jsonFactory = jsonOptions.buildJsonFactory()
-
-  @transient
-  private lazy val jsonInferSchema = new JsonInferSchema(jsonOptions)
-
   @transient
   private lazy val json = child.eval().asInstanceOf[UTF8String]
 
@@ -862,20 +485,16 @@ case class SchemaOfJson(
     }
   }
 
-  @transient private lazy val jsonFactoryObjectType = ObjectType(classOf[JsonFactory])
-  @transient private lazy val jsonOptionsObjectType = ObjectType(classOf[JSONOptions])
-  @transient private lazy val jsonInferSchemaObjectType = ObjectType(classOf[JsonInferSchema])
+  @transient
+  private lazy val evaluator: SchemaOfJsonEvaluator = SchemaOfJsonEvaluator(options)
 
-  override def replacement: Expression = StaticInvoke(
-    JsonExpressionEvalUtils.getClass,
+  override def replacement: Expression = Invoke(
+    Literal.create(evaluator, ObjectType(classOf[SchemaOfJsonEvaluator])),
+    "evaluate",
     dataType,
-    "schemaOfJson",
-    Seq(Literal(jsonFactory, jsonFactoryObjectType),
-      Literal(jsonOptions, jsonOptionsObjectType),
-      Literal(jsonInferSchema, jsonInferSchemaObjectType),
-      child),
-    Seq(jsonFactoryObjectType, jsonOptionsObjectType, jsonInferSchemaObjectType, child.dataType)
-  )
+    Seq(child),
+    Seq(child.dataType),
+    returnNullable = false)
 
   override def prettyName: String = "schema_of_json"
 
@@ -910,7 +529,8 @@ case class LengthOfJsonArray(child: Expression)
   with ExpectsInputTypes
   with RuntimeReplaceable {
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeWithCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
   override def dataType: DataType = IntegerType
   override def nullable: Boolean = true
   override def prettyName: String = "json_array_length"
@@ -955,7 +575,8 @@ case class JsonObjectKeys(child: Expression)
   with ExpectsInputTypes
   with RuntimeReplaceable {
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeWithCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
   override def dataType: DataType = ArrayType(SQLConf.get.defaultStringType)
   override def nullable: Boolean = true
   override def prettyName: String = "json_object_keys"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 4cffc7f0b53a3..c1225f9e5b502 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -166,6 +166,8 @@ object Literal {
       case _: DayTimeIntervalType if v.isInstanceOf[Duration] =>
         Literal(CatalystTypeConverters.createToCatalystConverter(dataType)(v), dataType)
       case _: ObjectType => Literal(v, dataType)
+      case CharType(_) | VarcharType(_) if SQLConf.get.preserveCharVarcharTypeInfo =>
+        Literal(CatalystTypeConverters.createToCatalystConverter(dataType)(v), dataType)
       case _ => Literal(CatalystTypeConverters.convertToCatalyst(v), dataType)
     }
   }
@@ -196,6 +198,12 @@ object Literal {
     case TimestampNTZType => create(0L, TimestampNTZType)
     case it: DayTimeIntervalType => create(0L, it)
     case it: YearMonthIntervalType => create(0, it)
+    case CharType(length) =>
+      create(CharVarcharCodegenUtils.charTypeWriteSideCheck(UTF8String.fromString(""), length),
+        dataType)
+    case VarcharType(length) =>
+      create(CharVarcharCodegenUtils.varcharTypeWriteSideCheck(UTF8String.fromString(""), length),
+        dataType)
     case st: StringType => Literal(UTF8String.fromString(""), st)
     case BinaryType => Literal("".getBytes(StandardCharsets.UTF_8))
     case CalendarIntervalType => Literal(new CalendarInterval(0, 0, 0))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
index 7be6df14194fc..5b17d2029ed1b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
@@ -193,11 +193,11 @@ case class Mask(
    */
   override def inputTypes: Seq[AbstractDataType] =
     Seq(
-      StringTypeWithCollation,
-      StringTypeWithCollation,
-      StringTypeWithCollation,
-      StringTypeWithCollation,
-      StringTypeWithCollation)
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true))
 
   override def nullable: Boolean = true
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 30f07dcc1e67e..317a08b8c64c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -455,7 +455,7 @@ case class Conv(
   override def second: Expression = fromBaseExpr
   override def third: Expression = toBaseExpr
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeWithCollation, IntegerType, IntegerType)
+    Seq(StringTypeWithCollation(supportsTrimCollation = true), IntegerType, IntegerType)
   override def dataType: DataType = first.dataType
   override def nullable: Boolean = true
 
@@ -1118,7 +1118,7 @@ case class Hex(child: Expression)
   override def nullIntolerant: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(LongType, BinaryType, StringTypeWithCollation))
+    Seq(TypeCollection(LongType, BinaryType, StringTypeWithCollation(supportsTrimCollation = true)))
 
   override def dataType: DataType = child.dataType match {
     case st: StringType => st
@@ -1163,7 +1163,8 @@ case class Unhex(child: Expression, failOnError: Boolean = false)
 
   def this(expr: Expression) = this(expr, false)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeWithCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   override def nullable: Boolean = true
   override def dataType: DataType = BinaryType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 5f1b3dc0a01ac..fb30eab327d4c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.util.{MapData, RandomUUIDGenerator}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.errors.QueryExecutionErrors.raiseError
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeWithCollation
+import org.apache.spark.sql.internal.types.{AbstractMapType, StringTypeWithCollation}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -85,7 +85,12 @@ case class RaiseError(errorClass: Expression, errorParms: Expression, dataType:
   override def foldable: Boolean = false
   override def nullable: Boolean = true
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeWithCollation, MapType(StringType, StringType))
+    Seq(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      AbstractMapType(
+        StringTypeWithCollation(supportsTrimCollation = true),
+        StringTypeWithCollation(supportsTrimCollation = true)
+      ))
 
   override def left: Expression = errorClass
   override def right: Expression = errorParms
@@ -416,8 +421,8 @@ case class AesEncrypt(
 
   override def inputTypes: Seq[AbstractDataType] =
     Seq(BinaryType, BinaryType,
-      StringTypeWithCollation,
-      StringTypeWithCollation,
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true),
       BinaryType, BinaryType)
 
   override def children: Seq[Expression] = Seq(input, key, mode, padding, iv, aad)
@@ -493,8 +498,8 @@ case class AesDecrypt(
   override def inputTypes: Seq[AbstractDataType] = {
     Seq(BinaryType,
       BinaryType,
-      StringTypeWithCollation,
-      StringTypeWithCollation, BinaryType)
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true), BinaryType)
   }
 
   override def prettyName: String = "aes_decrypt"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
index d4dcfdc5e72fb..fd6399d65271e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
@@ -51,7 +51,9 @@ abstract class ToNumberBase(left: Expression, right: Expression, errorOnFail: Bo
   }
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeWithCollation, StringTypeWithCollation)
+    Seq(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true))
 
   override def checkInputDataTypes(): TypeCheckResult = {
     val inputTypeCheck = super.checkInputDataTypes()
@@ -288,7 +290,7 @@ case class ToCharacter(left: Expression, right: Expression)
 
   override def dataType: DataType = SQLConf.get.defaultStringType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(DecimalType, StringTypeWithCollation)
+    Seq(DecimalType, StringTypeWithCollation(supportsTrimCollation = true))
   override def checkInputDataTypes(): TypeCheckResult = {
     val inputTypeCheck = super.checkInputDataTypes()
     if (inputTypeCheck.isSuccess) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 86d3cee6a0600..114a43c34c040 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -346,7 +346,16 @@ package object expressions  {
      */
     def resolve(nameParts: Seq[String], resolver: Resolver): Option[NamedExpression] = {
       val (candidates, nestedFields) = getCandidatesForResolution(nameParts, resolver)
-      resolveCandidates(nameParts, resolver, candidates, nestedFields)
+      val resolvedCandidates = resolveCandidates(nameParts, resolver, candidates, nestedFields)
+      resolvedCandidates match {
+        case Seq() => None
+        case Seq(a) => Some(a)
+        case _ =>
+          throw QueryCompilationErrors.ambiguousReferenceError(
+            UnresolvedAttribute(nameParts).name,
+            resolvedCandidates.map(_.toAttribute)
+          )
+      }
     }
 
     def getCandidatesForResolution(
@@ -371,7 +380,7 @@ package object expressions  {
         nameParts: Seq[String],
         resolver: Resolver,
         candidates: Seq[Attribute],
-        nestedFields: Seq[String]): Option[NamedExpression] = {
+        nestedFields: Seq[String]): Seq[NamedExpression] = {
       def name = UnresolvedAttribute(nameParts).name
       // We may have resolved the attributes from metadata columns. The resolved attributes will be
       // put in a logical plan node and becomes normal attributes. They can still keep the special
@@ -389,19 +398,19 @@ package object expressions  {
           val fieldExprs = nestedFields.foldLeft(a: Expression) { (e, name) =>
             ExtractValue(e, Literal(name), resolver)
           }
-          Some(Alias(fieldExprs, nestedFields.last)())
+          Seq(Alias(fieldExprs, nestedFields.last)())
 
         case Seq(a) =>
           // One match, no nested fields, use it.
-          Some(a)
+          Seq(a)
 
         case Seq() =>
           // No matches.
-          None
+          Seq()
 
         case ambiguousReferences =>
           // More than one match.
-          throw QueryCompilationErrors.ambiguousReferenceError(name, ambiguousReferences)
+          ambiguousReferences
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/pipeOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/pipeOperators.scala
index 1b5ee54729136..2ee68663ad2fd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/pipeOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/pipeOperators.scala
@@ -18,7 +18,11 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.{PIPE_EXPRESSION, PIPE_OPERATOR, TreePattern}
 import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.DataType
 
 /**
  * Represents an expression when used with a SQL pipe operator.
@@ -30,19 +34,56 @@ import org.apache.spark.sql.errors.QueryCompilationErrors
  * @param clause The clause of the pipe operator. This is used to generate error messages.
  */
 case class PipeExpression(child: Expression, isAggregate: Boolean, clause: String)
-  extends UnaryExpression with RuntimeReplaceable {
+  extends UnaryExpression with Unevaluable {
+  final override val nodePatterns = Seq(PIPE_EXPRESSION)
+  final override lazy val resolved = false
   override def withNewChildInternal(newChild: Expression): Expression =
     PipeExpression(newChild, isAggregate, clause)
-  override lazy val replacement: Expression = {
-    val firstAggregateFunction: Option[AggregateFunction] = findFirstAggregate(child)
-    if (isAggregate && firstAggregateFunction.isEmpty) {
-      throw QueryCompilationErrors.pipeOperatorAggregateExpressionContainsNoAggregateFunction(child)
-    } else if (!isAggregate) {
-      firstAggregateFunction.foreach { a =>
-        throw QueryCompilationErrors.pipeOperatorContainsAggregateFunction(a, clause)
+  override def dataType: DataType = child.dataType
+}
+
+/**
+ * Represents the location within a logical plan that a SQL pipe operator appeared.
+ * This acts as a logical boundary that works to prevent the analyzer from modifying the logical
+ * operators above and below the boundary.
+ */
+case class PipeOperator(child: LogicalPlan) extends UnaryNode {
+  final override val nodePatterns: Seq[TreePattern] = Seq(PIPE_OPERATOR)
+  override def output: Seq[Attribute] = child.output
+  override def withNewChildInternal(newChild: LogicalPlan): PipeOperator = copy(child = newChild)
+}
+
+/** This rule removes all PipeOperator nodes from a logical plan at the end of analysis. */
+object EliminatePipeOperators extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
+    _.containsPattern(PIPE_OPERATOR), ruleId) {
+    case PipeOperator(child) => child
+  }
+}
+
+/**
+ * Validates and strips PipeExpression nodes from a logical plan once the child expressions are
+ * resolved.
+ */
+object ValidateAndStripPipeExpressions extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
+    _.containsPattern(PIPE_EXPRESSION), ruleId) {
+    case node: LogicalPlan =>
+      node.resolveExpressions {
+        case p: PipeExpression if p.child.resolved =>
+          // Once the child expression is resolved, we can perform the necessary invariant checks
+          // and then remove this expression, replacing it with the child expression instead.
+          val firstAggregateFunction: Option[AggregateFunction] = findFirstAggregate(p.child)
+          if (p.isAggregate && firstAggregateFunction.isEmpty) {
+            throw QueryCompilationErrors
+              .pipeOperatorAggregateExpressionContainsNoAggregateFunction(p.child)
+          } else if (!p.isAggregate) {
+            firstAggregateFunction.foreach { a =>
+              throw QueryCompilationErrors.pipeOperatorContainsAggregateFunction(a, p.clause)
+            }
+          }
+          p.child
       }
-    }
-    child
   }
 
   /** Returns the first aggregate function in the given expression, or None if not found. */
@@ -67,6 +108,7 @@ object PipeOperators {
   val offsetClause = "OFFSET"
   val orderByClause = "ORDER BY"
   val selectClause = "SELECT"
+  val setClause = "SET"
   val sortByClause = "SORT BY"
   val sortByDistributeByClause = "SORT BY ... DISTRIBUTE BY ..."
   val windowClause = "WINDOW"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 986bc63363d5d..d8d81a9cc12f8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LeafNode, LogicalPlan, Project, Union}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.util.{CollationFactory, TypeUtils}
+import org.apache.spark.sql.catalyst.util.SparkStringUtils.truncatedString
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -487,7 +488,10 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
     }
   }
 
-  override def toString: String = s"$value IN ${list.mkString("(", ",", ")")}"
+  override def simpleString(maxFields: Int): String =
+    s"$value IN ${truncatedString(list, "(", ",", ")", maxFields)}"
+
+  override def toString: String = simpleString(Int.MaxValue)
 
   override def eval(input: InternalRow): Any = {
     if (list.isEmpty && !legacyNullInEmptyBehavior) {
@@ -608,15 +612,29 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
 
   require(hset != null, "hset could not be null")
 
-  override def toString: String = {
-    val listString = hset.toSeq
-      .map(elem => Literal(elem, child.dataType).toString)
-      // Sort elements for deterministic behaviours
-      .sorted
-      .mkString(", ")
-    s"$child INSET $listString"
+  override def simpleString(maxFields: Int): String = {
+    if (!child.resolved) {
+      return s"$child INSET (values with unresolved data types)"
+    }
+    if (hset.size <= maxFields) {
+      val listString = hset.toSeq
+        .map(elem => Literal(elem, child.dataType).toString)
+        // Sort elements for deterministic behaviours
+        .sorted
+        .mkString(", ")
+      s"$child INSET $listString"
+    } else {
+      // Skip sorting if there are many elements. Do not use truncatedString because we would have
+      // to convert elements we do not print to Literals.
+      val listString = hset.take(maxFields).toSeq
+        .map(elem => Literal(elem, child.dataType).toString)
+        .mkString(", ")
+      s"$child INSET $listString, ... ${hset.size - maxFields} more fields"
+    }
   }
 
+  override def toString: String = simpleString(Int.MaxValue)
+
   @transient private[this] lazy val hasNull: Boolean = hset.contains(null)
   @transient private[this] lazy val isNaN: Any => Boolean = child.dataType match {
     case DoubleType => (value: Any) => java.lang.Double.isNaN(value.asInstanceOf[java.lang.Double])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index 7148d3738f7fa..50c699ef69bd6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -21,12 +21,12 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedSeed}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
-import org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.{ordinalNumber, toSQLExpr, toSQLId, toSQLType}
+import org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.{toSQLExpr, toSQLId}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{EXPRESSION_WITH_RANDOM_SEED, RUNTIME_REPLACEABLE, TreePattern}
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.random.XORShiftRandom
 
@@ -206,7 +206,7 @@ object Randn {
   since = "4.0.0",
   group = "math_funcs")
 case class Uniform(min: Expression, max: Expression, seedExpression: Expression, hideSeed: Boolean)
-  extends RuntimeReplaceable with TernaryLike[Expression] with RDG {
+  extends RuntimeReplaceable with TernaryLike[Expression] with RDG with ExpectsInputTypes {
   def this(min: Expression, max: Expression) =
     this(min, max, UnresolvedSeed, hideSeed = true)
   def this(min: Expression, max: Expression, seedExpression: Expression) =
@@ -216,63 +216,46 @@ case class Uniform(min: Expression, max: Expression, seedExpression: Expression,
   override val nodePatterns: Seq[TreePattern] =
     Seq(RUNTIME_REPLACEABLE, EXPRESSION_WITH_RANDOM_SEED)
 
+  override def inputTypes: Seq[AbstractDataType] = {
+    val randomSeedTypes = TypeCollection(IntegerType, LongType)
+    Seq(NumericType, NumericType, randomSeedTypes)
+  }
+
   override def dataType: DataType = {
-    val first = min.dataType
-    val second = max.dataType
     (min.dataType, max.dataType) match {
       case _ if !seedExpression.resolved || seedExpression.dataType == NullType =>
         NullType
-      case (_, NullType) | (NullType, _) => NullType
-      case (_, LongType) | (LongType, _)
-        if Seq(first, second).forall(integer) => LongType
-      case (_, IntegerType) | (IntegerType, _)
-        if Seq(first, second).forall(integer) => IntegerType
-      case (_, ShortType) | (ShortType, _)
-        if Seq(first, second).forall(integer) => ShortType
-      case (_, DoubleType) | (DoubleType, _) => DoubleType
-      case (_, FloatType) | (FloatType, _) => FloatType
+      case (left: IntegralType, right: IntegralType) =>
+        if (UpCastRule.legalNumericPrecedence(left, right)) right else left
+      case (_: NumericType, DoubleType) | (DoubleType, _: NumericType) => DoubleType
+      case (_: NumericType, FloatType) | (FloatType, _: NumericType) => FloatType
+      case (lhs: DecimalType, rhs: DecimalType) => if (lhs.isWiderThan(rhs)) lhs else rhs
+      case (_, d: DecimalType) => d
+      case (d: DecimalType, _) => d
       case _ =>
         throw SparkException.internalError(
           s"Unexpected argument data types: ${min.dataType}, ${max.dataType}")
     }
   }
 
-  private def integer(t: DataType): Boolean = t match {
-    case _: ShortType | _: IntegerType | _: LongType => true
-    case _ => false
-  }
-
   override def sql: String = {
     s"uniform(${min.sql}, ${max.sql}${if (hideSeed) "" else s", ${seedExpression.sql}"})"
   }
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    var result: TypeCheckResult = TypeCheckResult.TypeCheckSuccess
+    var result: TypeCheckResult = super.checkInputDataTypes()
     def requiredType = "integer or floating-point"
-    Seq((min, "min", 0),
-      (max, "max", 1),
-      (seedExpression, "seed", 2)).foreach {
-      case (expr: Expression, name: String, index: Int) =>
-        if (result == TypeCheckResult.TypeCheckSuccess) {
-          if (!expr.foldable) {
-            result = DataTypeMismatch(
-              errorSubClass = "NON_FOLDABLE_INPUT",
-              messageParameters = Map(
-                "inputName" -> toSQLId(name),
-                "inputType" -> requiredType,
-                "inputExpr" -> toSQLExpr(expr)))
-          } else expr.dataType match {
-            case _: ShortType | _: IntegerType | _: LongType | _: FloatType | _: DoubleType |
-                 _: NullType =>
-            case _ =>
-              result = DataTypeMismatch(
-                errorSubClass = "UNEXPECTED_INPUT_TYPE",
-                messageParameters = Map(
-                  "paramIndex" -> ordinalNumber(index),
-                  "requiredType" -> requiredType,
-                  "inputSql" -> toSQLExpr(expr),
-                  "inputType" -> toSQLType(expr.dataType)))
-          }
+    Seq((min, "min"),
+      (max, "max"),
+      (seedExpression, "seed")).foreach {
+      case (expr: Expression, name: String) =>
+        if (result == TypeCheckResult.TypeCheckSuccess && !expr.foldable) {
+          result = DataTypeMismatch(
+            errorSubClass = "NON_FOLDABLE_INPUT",
+            messageParameters = Map(
+              "inputName" -> toSQLId(name),
+              "inputType" -> requiredType,
+              "inputExpr" -> toSQLExpr(expr)))
         }
     }
     result
@@ -330,7 +313,8 @@ object Uniform {
   group = "string_funcs")
 case class RandStr(
     length: Expression, override val seedExpression: Expression, hideSeed: Boolean)
-  extends ExpressionWithRandomSeed with BinaryLike[Expression] with Nondeterministic {
+  extends ExpressionWithRandomSeed with BinaryLike[Expression] with Nondeterministic
+    with ExpectsInputTypes {
   def this(length: Expression) =
     this(length, UnresolvedSeed, hideSeed = true)
   def this(length: Expression, seedExpression: Expression) =
@@ -342,6 +326,10 @@ case class RandStr(
   override def left: Expression = length
   override def right: Expression = seedExpression
 
+  override def inputTypes: Seq[AbstractDataType] = Seq(
+    IntegerType,
+    TypeCollection(IntegerType, LongType))
+
   /**
    * Record ID within each partition. By being transient, the Random Number Generator is
    * reset every time we serialize and deserialize and initialize it.
@@ -366,52 +354,48 @@ case class RandStr(
   }
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    var result: TypeCheckResult = TypeCheckResult.TypeCheckSuccess
-    def requiredType = "INT or SMALLINT"
-    Seq((length, "length", 0),
-      (seedExpression, "seed", 1)).foreach {
-      case (expr: Expression, name: String, index: Int) =>
-        if (result == TypeCheckResult.TypeCheckSuccess) {
-          if (!expr.foldable) {
-            result = DataTypeMismatch(
-              errorSubClass = "NON_FOLDABLE_INPUT",
-              messageParameters = Map(
-                "inputName" -> toSQLId(name),
-                "inputType" -> requiredType,
-                "inputExpr" -> toSQLExpr(expr)))
-          } else expr.dataType match {
-            case _: ShortType | _: IntegerType =>
-            case _: LongType if index == 1 =>
-            case _ =>
-              result = DataTypeMismatch(
-                errorSubClass = "UNEXPECTED_INPUT_TYPE",
-                messageParameters = Map(
-                  "paramIndex" -> ordinalNumber(index),
-                  "requiredType" -> requiredType,
-                  "inputSql" -> toSQLExpr(expr),
-                  "inputType" -> toSQLType(expr.dataType)))
-          }
+    var result: TypeCheckResult = super.checkInputDataTypes()
+    Seq((length, "length"),
+      (seedExpression, "seed")).foreach {
+      case (expr: Expression, name: String) =>
+        if (result == TypeCheckResult.TypeCheckSuccess && !expr.foldable) {
+          result = DataTypeMismatch(
+            errorSubClass = "NON_FOLDABLE_INPUT",
+            messageParameters = Map(
+              "inputName" -> toSQLId(name),
+              "inputType" -> "integer",
+              "inputExpr" -> toSQLExpr(expr)))
         }
     }
     result
   }
 
   override def evalInternal(input: InternalRow): Any = {
-    val numChars = length.eval(input).asInstanceOf[Number].intValue()
+    val numChars = lengthInteger()
     ExpressionImplUtils.randStr(rng, numChars)
   }
 
+  private def lengthInteger(): Int = {
+    // We should have already added a cast to IntegerType (if necessary) in
+    // FunctionArgumentTypeCoercion.
+    assert(length.dataType == IntegerType, s"Expected IntegerType, got ${length.dataType}")
+    val result = length.eval().asInstanceOf[Int]
+    if (result < 0) {
+      throw QueryExecutionErrors.unexpectedValueForLengthInFunctionError(prettyName, result)
+    }
+    result
+  }
+
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val className = classOf[XORShiftRandom].getName
     val rngTerm = ctx.addMutableState(className, "rng")
     ctx.addPartitionInitializationStatement(
       s"$rngTerm = new $className(${seed}L + partitionIndex);")
-    val eval = length.genCode(ctx)
+    val numChars = lengthInteger()
     ev.copy(code =
       code"""
-        |${eval.code}
         |UTF8String ${ev.value} =
-        |  ${classOf[ExpressionImplUtils].getName}.randStr($rngTerm, (int)(${eval.value}));
+        |  ${classOf[ExpressionImplUtils].getName}.randStr($rngTerm, $numChars);
         |boolean ${ev.isNull} = false;
         |""".stripMargin,
       isNull = FalseLiteral)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index c97920619ba4d..efd7e5c07de40 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1863,7 +1863,7 @@ trait PadExpressionBuilderBase extends ExpressionBuilder {
         BinaryPad(funcName, expressions(0), expressions(1), Literal(Array[Byte](0)))
       } else {
         createStringPad(expressions(0),
-          expressions(1), Literal.create(" ", SQLConf.get.defaultStringType))
+          expressions(1), Literal(" "))
       }
     } else if (numArgs == 3) {
       if (expressions(0).dataType == BinaryType && expressions(2).dataType == BinaryType
@@ -3557,9 +3557,9 @@ case class Sentences(
     ArrayType(ArrayType(str.dataType, containsNull = false), containsNull = false)
   override def inputTypes: Seq[AbstractDataType] =
     Seq(
-      StringTypeWithCollation,
-      StringTypeWithCollation,
-      StringTypeWithCollation
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true)
     )
   override def first: Expression = str
   override def second: Expression = language
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
index bd6f65b61468d..c0a2bf25fbe67 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
@@ -19,11 +19,10 @@ package org.apache.spark.sql.catalyst.expressions
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.sql.catalyst.analysis.{LazyOuterReference, UnresolvedOuterReference}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedPlanId
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.trees.TreePattern
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
@@ -374,13 +373,6 @@ object SubExprUtils extends PredicateHelper {
     val nonEquivalentGroupByExprs = groupByExprs -- correlatedEquivalentExprs
     nonEquivalentGroupByExprs
   }
-
-  def removeLazyOuterReferences(logicalPlan: LogicalPlan): LogicalPlan = {
-    logicalPlan.transformAllExpressionsWithPruning(
-      _.containsPattern(TreePattern.LAZY_OUTER_REFERENCE)) {
-      case or: LazyOuterReference => UnresolvedOuterReference(or.nameParts)
-    }
-  }
 }
 
 /**
@@ -407,8 +399,7 @@ case class ScalarSubquery(
     joinCond: Seq[Expression] = Seq.empty,
     hint: Option[HintInfo] = None,
     mayHaveCountBug: Option[Boolean] = None,
-    needSingleJoin: Option[Boolean] = None,
-    hasExplicitOuterRefs: Boolean = false)
+    needSingleJoin: Option[Boolean] = None)
   extends SubqueryExpression(plan, outerAttrs, exprId, joinCond, hint) with Unevaluable {
   override def dataType: DataType = {
     if (!plan.schema.fields.nonEmpty) {
@@ -449,6 +440,14 @@ object ScalarSubquery {
   }
 }
 
+case class UnresolvedScalarSubqueryPlanId(planId: Long)
+  extends UnresolvedPlanId {
+
+  override def withPlan(plan: LogicalPlan): Expression = {
+    ScalarSubquery(plan)
+  }
+}
+
 /**
  * A subquery that can return multiple rows and columns. This should be rewritten as a join
  * with the outer query during the optimization phase.
@@ -577,8 +576,7 @@ case class Exists(
     outerAttrs: Seq[Expression] = Seq.empty,
     exprId: ExprId = NamedExpression.newExprId,
     joinCond: Seq[Expression] = Seq.empty,
-    hint: Option[HintInfo] = None,
-    hasExplicitOuterRefs: Boolean = false)
+    hint: Option[HintInfo] = None)
   extends SubqueryExpression(plan, outerAttrs, exprId, joinCond, hint)
   with Predicate
   with Unevaluable {
@@ -603,3 +601,11 @@ case class Exists(
 
   final override def nodePatternsInternal(): Seq[TreePattern] = Seq(EXISTS_SUBQUERY)
 }
+
+case class UnresolvedExistsPlanId(planId: Long)
+  extends UnresolvedPlanId {
+
+  override def withPlan(plan: LogicalPlan): Expression = {
+    Exists(plan)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
index 22dcd33937dfb..845ca0b608ef3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
@@ -57,13 +57,14 @@ case class UrlEncode(child: Expression)
       SQLConf.get.defaultStringType,
       "encode",
       Seq(child),
-      Seq(StringTypeWithCollation))
+      Seq(StringTypeWithCollation(supportsTrimCollation = true)))
 
   override protected def withNewChildInternal(newChild: Expression): Expression = {
     copy(child = newChild)
   }
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeWithCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   override def prettyName: String = "url_encode"
 }
@@ -96,13 +97,14 @@ case class UrlDecode(child: Expression, failOnError: Boolean = true)
       SQLConf.get.defaultStringType,
       "decode",
       Seq(child, Literal(failOnError)),
-      Seq(StringTypeWithCollation, BooleanType))
+      Seq(StringTypeWithCollation(supportsTrimCollation = true), BooleanType))
 
   override protected def withNewChildInternal(newChild: Expression): Expression = {
     copy(child = newChild)
   }
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeWithCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   override def prettyName: String = "url_decode"
 }
@@ -211,7 +213,7 @@ case class ParseUrl(
 
   override def nullable: Boolean = true
   override def inputTypes: Seq[AbstractDataType] =
-    Seq.fill(children.size)(StringTypeWithCollation)
+    Seq.fill(children.size)(StringTypeWithCollation(supportsTrimCollation = true))
   override def dataType: DataType = SQLConf.get.defaultStringType
   override def prettyName: String = "parse_url"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
index 06aec93912984..ff8b168793b5d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
@@ -66,7 +66,8 @@ case class ParseJson(child: Expression, failOnError: Boolean = true)
     inputTypes :+ BooleanType :+ BooleanType,
     returnNullable = !failOnError)
 
-  override def inputTypes: Seq[AbstractDataType] = StringTypeWithCollation :: Nil
+  override def inputTypes: Seq[AbstractDataType] =
+    StringTypeWithCollation(supportsTrimCollation = true) :: Nil
 
   override def dataType: DataType = VariantType
 
@@ -183,33 +184,37 @@ case class ToVariantObject(child: Expression)
   }
 }
 
-object VariantPathParser extends RegexParsers {
-  // A path segment in the `VariantGet` expression represents either an object key access or an
-  // array index access.
-  type PathSegment = Either[String, Int]
+// A path segment in the `VariantGet` expression represents either an object key access or an array
+// index access.
+sealed abstract class VariantPathSegment extends Serializable
+
+case class ObjectExtraction(key: String) extends VariantPathSegment
+
+case class ArrayExtraction(index: Int) extends VariantPathSegment
 
+object VariantPathParser extends RegexParsers {
   private def root: Parser[Char] = '$'
 
   // Parse index segment like `[123]`.
-  private def index: Parser[PathSegment] =
+  private def index: Parser[VariantPathSegment] =
     for {
       index <- '[' ~> "\\d+".r <~ ']'
     } yield {
-      scala.util.Right(index.toInt)
+      ArrayExtraction(index.toInt)
     }
 
   // Parse key segment like `.name`, `['name']`, or `["name"]`.
-  private def key: Parser[PathSegment] =
+  private def key: Parser[VariantPathSegment] =
     for {
       key <- '.' ~> "[^\\.\\[]+".r | "['" ~> "[^\\'\\?]+".r <~ "']" |
         "[\"" ~> "[^\\\"\\?]+".r <~ "\"]"
     } yield {
-      scala.util.Left(key)
+      ObjectExtraction(key)
     }
 
-  private val parser: Parser[List[PathSegment]] = phrase(root ~> rep(key | index))
+  private val parser: Parser[List[VariantPathSegment]] = phrase(root ~> rep(key | index))
 
-  def parse(str: String): Option[Array[PathSegment]] = {
+  def parse(str: String): Option[Array[VariantPathSegment]] = {
     this.parseAll(parser, str) match {
       case Success(result, _) => Some(result.toArray)
       case _ => None
@@ -270,21 +275,20 @@ case class VariantGet(
   final override def nodePatternsInternal(): Seq[TreePattern] = Seq(VARIANT_GET)
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(VariantType, StringTypeWithCollation)
+    Seq(VariantType, StringTypeWithCollation(supportsTrimCollation = true))
 
   override def prettyName: String = if (failOnError) "variant_get" else "try_variant_get"
 
   override def nullable: Boolean = true
   override def nullIntolerant: Boolean = true
 
+  private lazy val castArgs = VariantCastArgs(
+    failOnError,
+    timeZoneId,
+    zoneId)
+
   protected override def nullSafeEval(input: Any, path: Any): Any = {
-    VariantGet.variantGet(
-      input.asInstanceOf[VariantVal],
-      parsedPath,
-      dataType,
-      failOnError,
-      timeZoneId,
-      zoneId)
+    VariantGet.variantGet(input.asInstanceOf[VariantVal], parsedPath, dataType, castArgs)
   }
 
   protected override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -292,15 +296,14 @@ case class VariantGet(
     val tmp = ctx.freshVariable("tmp", classOf[Object])
     val parsedPathArg = ctx.addReferenceObj("parsedPath", parsedPath)
     val dataTypeArg = ctx.addReferenceObj("dataType", dataType)
-    val zoneStrArg = ctx.addReferenceObj("zoneStr", timeZoneId)
-    val zoneIdArg = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
+    val castArgsArg = ctx.addReferenceObj("castArgs", castArgs)
     val code = code"""
       ${childCode.code}
       boolean ${ev.isNull} = ${childCode.isNull};
       ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
       if (!${ev.isNull}) {
         Object $tmp = org.apache.spark.sql.catalyst.expressions.variant.VariantGet.variantGet(
-          ${childCode.value}, $parsedPathArg, $dataTypeArg, $failOnError, $zoneStrArg, $zoneIdArg);
+          ${childCode.value}, $parsedPathArg, $dataTypeArg, $castArgsArg);
         if ($tmp == null) {
           ${ev.isNull} = true;
         } else {
@@ -322,6 +325,12 @@ case class VariantGet(
   override def withTimeZone(timeZoneId: String): VariantGet = copy(timeZoneId = Option(timeZoneId))
 }
 
+// Several parameters used by `VariantGet.cast`. Packed together to simplify parameter passing.
+case class VariantCastArgs(
+    failOnError: Boolean,
+    zoneStr: Option[String],
+    zoneId: ZoneId)
+
 case object VariantGet {
   /**
    * Returns whether a data type can be cast into/from variant. For scalar types, we allow a subset
@@ -329,6 +338,7 @@ case object VariantGet {
    */
   def checkDataType(dataType: DataType, allowStructsAndMaps: Boolean = true): Boolean =
     dataType match {
+    case CharType(_) | VarcharType(_) => false
     case _: NumericType | BooleanType | _: StringType | BinaryType | _: DatetimeType |
         VariantType =>
       true
@@ -343,35 +353,28 @@ case object VariantGet {
   /** The actual implementation of the `VariantGet` expression. */
   def variantGet(
       input: VariantVal,
-      parsedPath: Array[VariantPathParser.PathSegment],
+      parsedPath: Array[VariantPathSegment],
       dataType: DataType,
-      failOnError: Boolean,
-      zoneStr: Option[String],
-      zoneId: ZoneId): Any = {
+      castArgs: VariantCastArgs): Any = {
     var v = new Variant(input.getValue, input.getMetadata)
     for (path <- parsedPath) {
       v = path match {
-        case scala.util.Left(key) if v.getType == Type.OBJECT => v.getFieldByKey(key)
-        case scala.util.Right(index) if v.getType == Type.ARRAY => v.getElementAtIndex(index)
+        case ObjectExtraction(key) if v.getType == Type.OBJECT => v.getFieldByKey(key)
+        case ArrayExtraction(index) if v.getType == Type.ARRAY => v.getElementAtIndex(index)
         case _ => null
       }
       if (v == null) return null
     }
-    VariantGet.cast(v, dataType, failOnError, zoneStr, zoneId)
+    VariantGet.cast(v, dataType, castArgs)
   }
 
   /**
    * A simple wrapper of the `cast` function that takes `Variant` rather than `VariantVal`. The
    * `Cast` expression uses it and makes the implementation simpler.
    */
-  def cast(
-      input: VariantVal,
-      dataType: DataType,
-      failOnError: Boolean,
-      zoneStr: Option[String],
-      zoneId: ZoneId): Any = {
+  def cast(input: VariantVal, dataType: DataType, castArgs: VariantCastArgs): Any = {
     val v = new Variant(input.getValue, input.getMetadata)
-    VariantGet.cast(v, dataType, failOnError, zoneStr, zoneId)
+    VariantGet.cast(v, dataType, castArgs)
   }
 
   /**
@@ -381,15 +384,10 @@ case object VariantGet {
    * "hello" to int). If the cast fails, throw an exception when `failOnError` is true, or return a
    * SQL NULL when it is false.
    */
-  def cast(
-      v: Variant,
-      dataType: DataType,
-      failOnError: Boolean,
-      zoneStr: Option[String],
-      zoneId: ZoneId): Any = {
+  def cast(v: Variant, dataType: DataType, castArgs: VariantCastArgs): Any = {
     def invalidCast(): Any = {
-      if (failOnError) {
-        throw QueryExecutionErrors.invalidVariantCast(v.toJson(zoneId), dataType)
+      if (castArgs.failOnError) {
+        throw QueryExecutionErrors.invalidVariantCast(v.toJson(castArgs.zoneId), dataType)
       } else {
         null
       }
@@ -409,7 +407,7 @@ case object VariantGet {
         val input = variantType match {
           case Type.OBJECT | Type.ARRAY =>
             return if (dataType.isInstanceOf[StringType]) {
-              UTF8String.fromString(v.toJson(zoneId))
+              UTF8String.fromString(v.toJson(castArgs.zoneId))
             } else {
               invalidCast()
             }
@@ -433,29 +431,20 @@ case object VariantGet {
             messageParameters = Map("id" -> v.getTypeInfo.toString)
           )
         }
-        // We mostly use the `Cast` expression to implement the cast. However, `Cast` silently
-        // ignores the overflow in the long/decimal -> timestamp cast, and we want to enforce
-        // strict overflow checks.
         input.dataType match {
           case LongType if dataType == TimestampType =>
-            try Math.multiplyExact(input.value.asInstanceOf[Long], MICROS_PER_SECOND)
+            try castLongToTimestamp(input.value.asInstanceOf[Long])
             catch {
               case _: ArithmeticException => invalidCast()
             }
           case _: DecimalType if dataType == TimestampType =>
-            try {
-              input.value
-                .asInstanceOf[Decimal]
-                .toJavaBigDecimal
-                .multiply(new java.math.BigDecimal(MICROS_PER_SECOND))
-                .toBigInteger
-                .longValueExact()
-            } catch {
+            try castDecimalToTimestamp(input.value.asInstanceOf[Decimal])
+            catch {
               case _: ArithmeticException => invalidCast()
             }
           case _ =>
             if (Cast.canAnsiCast(input.dataType, dataType)) {
-              val result = Cast(input, dataType, zoneStr, EvalMode.TRY).eval()
+              val result = Cast(input, dataType, castArgs.zoneStr, EvalMode.TRY).eval()
               if (result == null) invalidCast() else result
             } else {
               invalidCast()
@@ -466,7 +455,7 @@ case object VariantGet {
           val size = v.arraySize()
           val array = new Array[Any](size)
           for (i <- 0 until size) {
-            array(i) = cast(v.getElementAtIndex(i), elementType, failOnError, zoneStr, zoneId)
+            array(i) = cast(v.getElementAtIndex(i), elementType, castArgs)
           }
           new GenericArrayData(array)
         } else {
@@ -480,7 +469,7 @@ case object VariantGet {
           for (i <- 0 until size) {
             val field = v.getFieldAtIndex(i)
             keyArray(i) = UTF8String.fromString(field.key)
-            valueArray(i) = cast(field.value, valueType, failOnError, zoneStr, zoneId)
+            valueArray(i) = cast(field.value, valueType, castArgs)
           }
           ArrayBasedMapData(keyArray, valueArray)
         } else {
@@ -493,8 +482,7 @@ case object VariantGet {
             val field = v.getFieldAtIndex(i)
             st.getFieldIndex(field.key) match {
               case Some(idx) =>
-                row.update(idx,
-                  cast(field.value, fields(idx).dataType, failOnError, zoneStr, zoneId))
+                row.update(idx, cast(field.value, fields(idx).dataType, castArgs))
               case _ =>
             }
           }
@@ -504,6 +492,27 @@ case object VariantGet {
         }
     }
   }
+
+  // We mostly use the `Cast` expression to implement the cast, but we need some custom logic for
+  // certain type combinations.
+  //
+  // `castLongToTimestamp/castDecimalToTimestamp`: `Cast` silently ignores the overflow in the
+  // long/decimal -> timestamp cast, and we want to enforce strict overflow checks. They both throw
+  // an `ArithmeticException` when overflow happens.
+  def castLongToTimestamp(input: Long): Long =
+    Math.multiplyExact(input, MICROS_PER_SECOND)
+
+  def castDecimalToTimestamp(input: Decimal): Long = {
+    val multiplier = new java.math.BigDecimal(MICROS_PER_SECOND)
+    input.toJavaBigDecimal.multiply(multiplier).toBigInteger.longValueExact()
+  }
+
+  // Cast decimal to string, but strip any trailing zeros. We don't have to call it if the decimal
+  // is returned by `Variant.getDecimal`, which already strips any trailing zeros. But we need it
+  // if the decimal is produced by Spark internally, e.g., on a shredded decimal produced by the
+  // Spark Parquet reader.
+  def castDecimalToString(input: Decimal): UTF8String =
+    UTF8String.fromString(input.toJavaBigDecimal.stripTrailingZeros.toPlainString)
 }
 
 abstract class ParseJsonExpressionBuilderBase(failOnError: Boolean) extends ExpressionBuilder {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XmlExpressionEvalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XmlExpressionEvalUtils.scala
index dff88475327a2..44b98026d62d5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XmlExpressionEvalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XmlExpressionEvalUtils.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions.xml
 
+import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.catalyst.xml.XmlInferSchema
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{ArrayType, DataType, StructType}
+import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 object XmlExpressionEvalUtils {
@@ -40,3 +41,82 @@ object XmlExpressionEvalUtils {
     UTF8String.fromString(dataType.sql)
   }
 }
+
+trait XPathEvaluator {
+
+  protected val path: UTF8String
+
+  @transient protected lazy val xpathUtil: UDFXPathUtil = new UDFXPathUtil
+
+  final def evaluate(xml: UTF8String): Any = {
+    if (xml == null || xml.toString.isEmpty || path == null || path.toString.isEmpty) return null
+    doEvaluate(xml)
+  }
+
+  def doEvaluate(xml: UTF8String): Any
+}
+
+case class XPathBooleanEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    xpathUtil.evalBoolean(xml.toString, path.toString)
+  }
+}
+
+case class XPathShortEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val ret = xpathUtil.evalNumber(xml.toString, path.toString)
+    if (ret eq null) null.asInstanceOf[Short] else ret.shortValue()
+  }
+}
+
+case class XPathIntEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val ret = xpathUtil.evalNumber(xml.toString, path.toString)
+    if (ret eq null) null.asInstanceOf[Int] else ret.intValue()
+  }
+}
+
+case class XPathLongEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val ret = xpathUtil.evalNumber(xml.toString, path.toString)
+    if (ret eq null) null.asInstanceOf[Long] else ret.longValue()
+  }
+}
+
+case class XPathFloatEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val ret = xpathUtil.evalNumber(xml.toString, path.toString)
+    if (ret eq null) null.asInstanceOf[Float] else ret.floatValue()
+  }
+}
+
+case class XPathDoubleEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val ret = xpathUtil.evalNumber(xml.toString, path.toString)
+    if (ret eq null) null.asInstanceOf[Double] else ret.doubleValue()
+  }
+}
+
+case class XPathStringEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val ret = xpathUtil.evalString(xml.toString, path.toString)
+    UTF8String.fromString(ret)
+  }
+}
+
+case class XPathListEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val nodeList = xpathUtil.evalNodeList(xml.toString, path.toString)
+    if (nodeList ne null) {
+      val ret = new Array[AnyRef](nodeList.getLength)
+      var i = 0
+      while (i < nodeList.getLength) {
+        ret(i) = UTF8String.fromString(nodeList.item(i).getNodeValue)
+        i += 1
+      }
+      new GenericArrayData(ret)
+    } else {
+      null
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
index 9848e062a08fd..2e591288a21cf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
@@ -21,8 +21,7 @@ import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Cast._
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.catalyst.expressions.objects.Invoke
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
@@ -34,16 +33,17 @@ import org.apache.spark.unsafe.types.UTF8String
  * This is not the world's most efficient implementation due to type conversion, but works.
  */
 abstract class XPathExtract
-  extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
+  extends BinaryExpression with RuntimeReplaceable with ExpectsInputTypes {
   override def left: Expression = xml
   override def right: Expression = path
-  override def nullIntolerant: Boolean = true
 
   /** XPath expressions are always nullable, e.g. if the xml string is empty. */
   override def nullable: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeWithCollation, StringTypeWithCollation)
+    Seq(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true))
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (!path.foldable) {
@@ -51,7 +51,7 @@ abstract class XPathExtract
         errorSubClass = "NON_FOLDABLE_INPUT",
         messageParameters = Map(
           "inputName" -> toSQLId("path"),
-          "inputType" -> toSQLType(StringTypeWithCollation),
+          "inputType" -> toSQLType(StringTypeWithCollation(supportsTrimCollation = true)),
           "inputExpr" -> toSQLExpr(path)
         )
       )
@@ -60,12 +60,20 @@ abstract class XPathExtract
     }
   }
 
-  @transient protected lazy val xpathUtil = new UDFXPathUtil
-  @transient protected lazy val pathString: String = path.eval().asInstanceOf[UTF8String].toString
-
   /** Concrete implementations need to override the following three methods. */
   def xml: Expression
   def path: Expression
+
+  @transient protected lazy val pathUTF8String: UTF8String = path.eval().asInstanceOf[UTF8String]
+
+  protected def evaluator: XPathEvaluator
+
+  override def replacement: Expression = Invoke(
+    Literal.create(evaluator, ObjectType(classOf[XPathEvaluator])),
+    "evaluate",
+    dataType,
+    Seq(xml),
+    Seq(xml.dataType))
 }
 
 // scalastyle:off line.size.limit
@@ -81,11 +89,9 @@ abstract class XPathExtract
 // scalastyle:on line.size.limit
 case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract with Predicate {
 
-  override def prettyName: String = "xpath_boolean"
+  @transient override lazy val evaluator: XPathEvaluator = XPathBooleanEvaluator(pathUTF8String)
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    xpathUtil.evalBoolean(xml.asInstanceOf[UTF8String].toString, pathString)
-  }
+  override def prettyName: String = "xpath_boolean"
 
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): XPathBoolean = copy(xml = newLeft, path = newRight)
@@ -103,14 +109,12 @@ case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathShort(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathShortEvaluator(pathUTF8String)
+
   override def prettyName: String = "xpath_short"
   override def dataType: DataType = ShortType
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
-    if (ret eq null) null else ret.shortValue()
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): XPathShort = copy(xml = newLeft, path = newRight)
 }
@@ -127,14 +131,12 @@ case class XPathShort(xml: Expression, path: Expression) extends XPathExtract {
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathInt(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathIntEvaluator(pathUTF8String)
+
   override def prettyName: String = "xpath_int"
   override def dataType: DataType = IntegerType
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
-    if (ret eq null) null else ret.intValue()
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): Expression = copy(xml = newLeft, path = newRight)
 }
@@ -151,14 +153,12 @@ case class XPathInt(xml: Expression, path: Expression) extends XPathExtract {
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathLong(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathLongEvaluator(pathUTF8String)
+
   override def prettyName: String = "xpath_long"
   override def dataType: DataType = LongType
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
-    if (ret eq null) null else ret.longValue()
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): XPathLong = copy(xml = newLeft, path = newRight)
 }
@@ -175,14 +175,12 @@ case class XPathLong(xml: Expression, path: Expression) extends XPathExtract {
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathFloatEvaluator(pathUTF8String)
+
   override def prettyName: String = "xpath_float"
   override def dataType: DataType = FloatType
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
-    if (ret eq null) null else ret.floatValue()
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): XPathFloat = copy(xml = newLeft, path = newRight)
 }
@@ -199,15 +197,13 @@ case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract {
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathDoubleEvaluator(pathUTF8String)
+
   override def prettyName: String =
     getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("xpath_double")
   override def dataType: DataType = DoubleType
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
-    if (ret eq null) null else ret.doubleValue()
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): XPathDouble = copy(xml = newLeft, path = newRight)
 }
@@ -224,14 +220,12 @@ case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract {
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathStringEvaluator(pathUTF8String)
+
   override def prettyName: String = "xpath_string"
   override def dataType: DataType = SQLConf.get.defaultStringType
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val ret = xpathUtil.evalString(xml.asInstanceOf[UTF8String].toString, pathString)
-    UTF8String.fromString(ret)
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): Expression = copy(xml = newLeft, path = newRight)
 }
@@ -250,24 +244,12 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathList(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathListEvaluator(pathUTF8String)
+
   override def prettyName: String = "xpath"
   override def dataType: DataType = ArrayType(SQLConf.get.defaultStringType)
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val nodeList = xpathUtil.evalNodeList(xml.asInstanceOf[UTF8String].toString, pathString)
-    if (nodeList ne null) {
-      val ret = new Array[AnyRef](nodeList.getLength)
-      var i = 0
-      while (i < nodeList.getLength) {
-        ret(i) = UTF8String.fromString(nodeList.item(i).getNodeValue)
-        i += 1
-      }
-      new GenericArrayData(ret)
-    } else {
-      null
-    }
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): XPathList = copy(xml = newLeft, path = newRight)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala
index 6f004cbce4262..d8254f04b4d94 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala
@@ -126,7 +126,8 @@ case class XmlToStructs(
     defineCodeGen(ctx, ev, input => s"(InternalRow) $expr.nullSafeEval($input)")
   }
 
-  override def inputTypes: Seq[AbstractDataType] = StringTypeWithCollation :: Nil
+  override def inputTypes: Seq[AbstractDataType] =
+    StringTypeWithCollation(supportsTrimCollation = true) :: Nil
 
   override def prettyName: String = "from_xml"
 
@@ -208,8 +209,8 @@ case class SchemaOfXml(
     dataType,
     "schemaOfXml",
     Seq(Literal(xmlInferSchema, xmlInferSchemaObjectType), child),
-    Seq(xmlInferSchemaObjectType, child.dataType)
-  )
+    Seq(xmlInferSchemaObjectType, child.dataType),
+    returnNullable = false)
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 13129d44fe0c2..1cd4b4cd29bcf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -292,13 +292,9 @@ class JacksonParser(
 
     case _: StringType => (parser: JsonParser) => {
       // This must be enabled if we will retrieve the bytes directly from the raw content:
-      val includeSourceInLocation = JsonParser.Feature.INCLUDE_SOURCE_IN_LOCATION
-      val originalMask = if (includeSourceInLocation.enabledIn(parser.getFeatureMask)) {
-        1
-      } else {
-        0
-      }
-      parser.overrideStdFeatures(includeSourceInLocation.getMask, includeSourceInLocation.getMask)
+      val oldFeature = parser.getFeatureMask
+      val featureToAdd = JsonParser.Feature.INCLUDE_SOURCE_IN_LOCATION.getMask
+      parser.overrideStdFeatures(oldFeature | featureToAdd, featureToAdd)
       val result = parseJsonToken[UTF8String](parser, dataType) {
         case VALUE_STRING =>
           UTF8String.fromString(parser.getText)
@@ -343,8 +339,11 @@ class JacksonParser(
               UTF8String.fromBytes(writer.toByteArray)
           }
         }
-      // Reset back to the original configuration:
-      parser.overrideStdFeatures(includeSourceInLocation.getMask, originalMask)
+      // Reset back to the original configuration using `~0` as the mask,
+      // which is a bitmask with all bits set, effectively allowing all features
+      // to be reset. This ensures that every feature is restored to its previous
+      // state as defined by `oldFeature`.
+      parser.overrideStdFeatures(oldFeature, ~0)
       result
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSQLFunctionNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSQLFunctionNode.scala
new file mode 100644
index 0000000000000..d9da38b4c2af4
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSQLFunctionNode.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.analysis.{SQLFunctionExpression, SQLFunctionNode, SQLScalarFunction, SQLTableFunction}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.Rule
+
+/**
+ * This rule removes [[SQLScalarFunction]] and [[SQLFunctionNode]] wrapper. They are respected
+ * till the end of analysis stage because we want to see which part of an analyzed logical
+ * plan is generated from a SQL function and also perform ACL checks.
+ */
+object EliminateSQLFunctionNode extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    // Include subqueries when eliminating SQL function expressions otherwise we might miss
+    // expressions in subqueries which can be inlined by the rule `OptimizeOneRowRelationSubquery`.
+    plan.transformWithSubqueries {
+      case SQLFunctionNode(_, child) => child
+      case f: SQLTableFunction =>
+        throw SparkException.internalError(
+          s"SQL table function plan should be rewritten during analysis: $f")
+      case p: LogicalPlan => p.transformExpressions {
+        case f: SQLScalarFunction => f.child
+        case f: SQLFunctionExpression =>
+          throw SparkException.internalError(
+            s"SQL function expression should be rewritten during analysis: $f")
+      }
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
index b3384c4e29566..ad1a1a99b8257 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
@@ -71,53 +71,64 @@ case class InlineCTE(
    * @param plan The plan to collect the CTEs from
    * @param cteMap A mutable map that accumulates the CTEs and their reference information by CTE
    *               ids.
-   * @param collectCTERefs A function to collect CTE references so that the caller side can do some
-   *                       bookkeeping work.
+   * @param outerCTEId While collecting the map we use this optional CTE id to identify the
+   *                   current outer CTE.
    */
   private def buildCTEMap(
       plan: LogicalPlan,
       cteMap: mutable.Map[Long, CTEReferenceInfo],
-      collectCTERefs: CTERelationRef => Unit = _ => ()): Unit = {
+      outerCTEId: Option[Long] = None): Unit = {
     plan match {
       case WithCTE(child, cteDefs) =>
-        cteDefs.foreach { cteDef =>
-          cteMap(cteDef.id) = CTEReferenceInfo(
-            cteDef = cteDef,
-            refCount = 0,
-            outgoingRefs = mutable.Map.empty.withDefaultValue(0),
-            shouldInline = true
-          )
-        }
-        cteDefs.foreach { cteDef =>
-          buildCTEMap(cteDef, cteMap, ref => {
-            // A CTE relation can references CTE relations defined before it in the same `WithCTE`.
-            // Here we update the out-going-ref-count for it, in case this CTE relation is not
-            // referenced at all and can be optimized out, and we need to decrease the ref counts
-            // for CTE relations that are referenced by it.
-            if (cteDefs.exists(_.id == ref.cteId)) {
-              cteMap(cteDef.id).increaseOutgoingRefCount(ref.cteId, 1)
-            }
-            // Similarly, a CTE relation can reference CTE relations defined in the outer `WithCTE`.
-            // Here we call the `collectCTERefs` function so that the outer CTE can also update the
-            // out-going-ref-count if needed.
-            collectCTERefs(ref)
-          })
+        val isDuplicated = cteDefs.forall(cteDef => cteMap.contains(cteDef.id))
+        if (isDuplicated) {
+          // If we have seen this `WithCTE` node then it must be self-contained so we can clear
+          // the references from containers to the definitions, and we don't need to process it
+          // again
+
+          cteDefs.foreach { cteDef =>
+            cteMap(cteDef.id).container.foreach(c => cteMap(c).outgoingRefs -= cteDef.id)
+          }
+        } else {
+          cteDefs.foreach { cteDef =>
+            cteMap(cteDef.id) = CTEReferenceInfo(
+              cteDef = cteDef,
+              refCount = 0,
+              outgoingRefs = mutable.Map.empty.withDefaultValue(0),
+              shouldInline = true,
+              container = outerCTEId
+            )
+          }
+
+          cteDefs.foreach { cteDef =>
+            buildCTEMap(cteDef, cteMap, Some(cteDef.id))
+          }
+          buildCTEMap(child, cteMap, outerCTEId)
         }
-        buildCTEMap(child, cteMap, collectCTERefs)
 
       case ref: CTERelationRef =>
         cteMap(ref.cteId) = cteMap(ref.cteId).withRefCountIncreased(1)
-        collectCTERefs(ref)
+
+        // The `outerCTEId` CTE definition can either reference `cteId` definition if `cteId` is in
+        // the same or in an outer `WithCTE` node, or `outerCTEId` can contain `cteId` definition if
+        // `cteId` is an inner `WithCTE` node inside `outerCTEId`.
+        // In both cases we can track the relations in `outgoingRefs` when we see a definition the
+        // first time. But if we encounter a conflicting duplicated contains relation later, then we
+        // will remove the references of the first contains relation.
+        outerCTEId.foreach { cteId =>
+          cteMap(cteId).increaseOutgoingRefCount(ref.cteId, 1)
+        }
+
       case _ =>
         if (plan.containsPattern(CTE)) {
           plan.children.foreach { child =>
-            buildCTEMap(child, cteMap, collectCTERefs)
+            buildCTEMap(child, cteMap, outerCTEId)
           }
 
           plan.expressions.foreach { expr =>
             if (expr.containsAllPatterns(PLAN_EXPRESSION, CTE)) {
               expr.foreach {
-                case e: SubqueryExpression => buildCTEMap(e.plan, cteMap, collectCTERefs)
+                case e: SubqueryExpression => buildCTEMap(e.plan, cteMap, outerCTEId)
                 case _ =>
               }
             }
@@ -225,12 +236,15 @@ case class InlineCTE(
  *                 from other CTE relations and regular places.
  * @param outgoingRefs A mutable map that tracks outgoing reference counts to other CTE relations.
  * @param shouldInline If true, this CTE relation should be inlined in the places that reference it.
+ * @param container The container of a CTE definition is another CTE definition in which the
+ *                  `WithCTE` node of the definition resides.
  */
 case class CTEReferenceInfo(
     cteDef: CTERelationDef,
     refCount: Int,
     outgoingRefs: mutable.Map[Long, Int],
-    shouldInline: Boolean) {
+    shouldInline: Boolean,
+    container: Option[Long]) {
 
   def withRefCountIncreased(count: Int): CTEReferenceInfo = {
     copy(refCount = refCount + count)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InsertMapSortInGroupingExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InsertMapSortExpression.scala
similarity index 69%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InsertMapSortInGroupingExpressions.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InsertMapSortExpression.scala
index b6ced6c49a36f..9e613c54a49bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InsertMapSortInGroupingExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InsertMapSortExpression.scala
@@ -20,32 +20,30 @@ package org.apache.spark.sql.catalyst.optimizer
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.expressions.{Alias, ArrayTransform, CreateNamedStruct, Expression, GetStructField, If, IsNull, LambdaFunction, Literal, MapFromArrays, MapKeys, MapSort, MapValues, NamedExpression, NamedLambdaVariable}
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project, RepartitionByExpression}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern
+import org.apache.spark.sql.catalyst.trees.TreePattern.{AGGREGATE, REPARTITION_OPERATION}
 import org.apache.spark.sql.types.{ArrayType, MapType, StructType}
 import org.apache.spark.util.ArrayImplicits.SparkArrayOps
 
 /**
- * Adds [[MapSort]] to group expressions containing map columns, as the key/value pairs need to be
- * in the correct order before grouping:
+ * Adds [[MapSort]] to [[Aggregate]] expressions containing map columns,
+ * as the key/value pairs need to be in the correct order before grouping:
  *
- * SELECT map_column, COUNT(*) FROM TABLE GROUP BY map_column  =>
+ * SELECT map_column, COUNT(*) FROM TABLE GROUP BY map_column =>
  * SELECT _groupingmapsort as map_column, COUNT(*) FROM (
  *   SELECT map_sort(map_column) as _groupingmapsort FROM TABLE
  * ) GROUP BY _groupingmapsort
  */
 object InsertMapSortInGroupingExpressions extends Rule[LogicalPlan] {
-  private def shouldAddMapSort(expr: Expression): Boolean = {
-    expr.dataType.existsRecursively(_.isInstanceOf[MapType])
-  }
+  import InsertMapSortExpression._
 
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    if (!plan.containsPattern(TreePattern.AGGREGATE)) {
+    if (!plan.containsPattern(AGGREGATE)) {
       return plan
     }
     val shouldRewrite = plan.exists {
-      case agg: Aggregate if agg.groupingExpressions.exists(shouldAddMapSort) => true
+      case agg: Aggregate if agg.groupingExpressions.exists(mapTypeExistsRecursively) => true
       case _ => false
     }
     if (!shouldRewrite) {
@@ -53,8 +51,7 @@ object InsertMapSortInGroupingExpressions extends Rule[LogicalPlan] {
     }
 
     plan transformUpWithNewOutput {
-      case agg @ Aggregate(groupingExprs, aggregateExpressions, child, _)
-          if agg.groupingExpressions.exists(shouldAddMapSort) =>
+      case agg @ Aggregate(groupingExprs, aggregateExpressions, child, hint) =>
         val exprToMapSort = new mutable.HashMap[Expression, NamedExpression]
         val newGroupingKeys = groupingExprs.map { expr =>
           val inserted = insertMapSortRecursively(expr)
@@ -77,15 +74,53 @@ object InsertMapSortInGroupingExpressions extends Rule[LogicalPlan] {
             }.asInstanceOf[NamedExpression]
         }
         val newChild = Project(child.output ++ exprToMapSort.values, child)
-        val newAgg = Aggregate(newGroupingKeys, newAggregateExprs, newChild)
+        val newAgg = Aggregate(newGroupingKeys, newAggregateExprs, newChild, hint)
         newAgg -> agg.output.zip(newAgg.output)
     }
   }
+}
+
+/**
+ * Adds [[MapSort]] to [[RepartitionByExpression]] expressions containing map columns,
+ * as the key/value pairs need to be in the correct order before repartitioning:
+ *
+ * SELECT * FROM TABLE DISTRIBUTE BY map_column =>
+ * SELECT * FROM TABLE DISTRIBUTE BY map_sort(map_column)
+ */
+object InsertMapSortInRepartitionExpressions extends Rule[LogicalPlan] {
+  import InsertMapSortExpression._
+
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.transformUpWithPruning(_.containsPattern(REPARTITION_OPERATION)) {
+      case rep: RepartitionByExpression
+        if rep.partitionExpressions.exists(mapTypeExistsRecursively) =>
+        val exprToMapSort = new mutable.HashMap[Expression, Expression]
+        val newPartitionExprs = rep.partitionExpressions.map { expr =>
+          val inserted = insertMapSortRecursively(expr)
+          if (expr.ne(inserted)) {
+            exprToMapSort.getOrElseUpdate(expr.canonicalized, inserted)
+          } else {
+            expr
+          }
+        }
+        rep.copy(partitionExpressions = newPartitionExprs)
+    }
+  }
+}
+
+private[optimizer] object InsertMapSortExpression {
 
   /**
-   * Inserts MapSort recursively taking into account when it is nested inside a struct or array.
+   * Returns true if the expression contains a [[MapType]] in DataType tree.
    */
-  private def insertMapSortRecursively(e: Expression): Expression = {
+  def mapTypeExistsRecursively(expr: Expression): Boolean = {
+    expr.dataType.existsRecursively(_.isInstanceOf[MapType])
+  }
+
+  /**
+   * Inserts [[MapSort]] recursively taking into account when it is nested inside a struct or array.
+   */
+  def insertMapSortRecursively(e: Expression): Expression = {
     e.dataType match {
       case m: MapType =>
         // Check if value type of MapType contains MapType (possibly nested)
@@ -122,5 +157,4 @@ object InsertMapSortInGroupingExpressions extends Rule[LogicalPlan] {
       case _ => e
     }
   }
-
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 29216523fefc5..9d269f37e58b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -73,6 +73,21 @@ abstract class Optimizer(catalogManager: CatalogManager)
       conf.optimizerMaxIterations,
       maxIterationsSetting = SQLConf.OPTIMIZER_MAX_ITERATIONS.key)
 
+  /**
+   * A helper method that takes as input a Seq of Batch or Seq[Batch], and flattens it out.
+   */
+  def flattenBatches(nestedBatchSequence: Seq[Any]): Seq[Batch] = {
+    assert(nestedBatchSequence.forall {
+      case _: Batch => true
+      case s: Seq[_] => s.forall(_.isInstanceOf[Batch])
+      case _ => false
+    })
+    nestedBatchSequence.flatMap {
+      case batches: Seq[Batch @unchecked] => batches
+      case batch: Batch => Seq(batch)
+    }
+  }
+
   /**
    * Defines the default rule batches in the Optimizer.
    *
@@ -143,39 +158,38 @@ abstract class Optimizer(catalogManager: CatalogManager)
         PushdownPredicatesAndPruneColumnsForCTEDef) ++
         extendedOperatorOptimizationRules
 
-    val operatorOptimizationBatch: Seq[Batch] = {
+    val operatorOptimizationBatch: Seq[Batch] = Seq(
       Batch("Operator Optimization before Inferring Filters", fixedPoint,
-        operatorOptimizationRuleSet: _*) ::
+        operatorOptimizationRuleSet: _*),
       Batch("Infer Filters", Once,
         InferFiltersFromGenerate,
-        InferFiltersFromConstraints) ::
+        InferFiltersFromConstraints),
       Batch("Operator Optimization after Inferring Filters", fixedPoint,
-        operatorOptimizationRuleSet: _*) ::
+        operatorOptimizationRuleSet: _*),
       Batch("Push extra predicate through join", fixedPoint,
         PushExtraPredicateThroughJoin,
-        PushDownPredicates) :: Nil
-    }
+        PushDownPredicates))
 
-    val batches = (
-    Batch("Finish Analysis", FixedPoint(1), FinishAnalysis) ::
+    val batches: Seq[Batch] = flattenBatches(Seq(
+    Batch("Finish Analysis", FixedPoint(1), FinishAnalysis),
     // We must run this batch after `ReplaceExpressions`, as `RuntimeReplaceable` expression
     // may produce `With` expressions that need to be rewritten.
-    Batch("Rewrite With expression", Once, RewriteWithExpression) ::
+    Batch("Rewrite With expression", fixedPoint, RewriteWithExpression),
     //////////////////////////////////////////////////////////////////////////////////////////
     // Optimizer rules start here
     //////////////////////////////////////////////////////////////////////////////////////////
-    Batch("Eliminate Distinct", Once, EliminateDistinct) ::
+    Batch("Eliminate Distinct", Once, EliminateDistinct),
     // - Do the first call of CombineUnions before starting the major Optimizer rules,
     //   since it can reduce the number of iteration and the other rules could add/move
     //   extra operators between two adjacent Union operators.
     // - Call CombineUnions again in Batch("Operator Optimizations"),
     //   since the other rules might make two separate Unions operators adjacent.
     Batch("Inline CTE", Once,
-      InlineCTE()) ::
+      InlineCTE()),
     Batch("Union", fixedPoint,
       RemoveNoopOperators,
       CombineUnions,
-      RemoveNoopUnion) ::
+      RemoveNoopUnion),
     // Run this once earlier. This might simplify the plan and reduce cost of optimizer.
     // For example, a query such as Filter(LocalRelation) would go through all the heavy
     // optimizer rules that are triggered when there is a filter
@@ -186,16 +200,16 @@ abstract class Optimizer(catalogManager: CatalogManager)
       PropagateEmptyRelation,
       // PropagateEmptyRelation can change the nullability of an attribute from nullable to
       // non-nullable when an empty relation child of a Union is removed
-      UpdateAttributeNullability) ::
+      UpdateAttributeNullability),
     Batch("Pullup Correlated Expressions", Once,
       OptimizeOneRowRelationSubquery,
       PullOutNestedDataOuterRefExpressions,
-      PullupCorrelatedPredicates) ::
+      PullupCorrelatedPredicates),
     // Subquery batch applies the optimizer rules recursively. Therefore, it makes no sense
     // to enforce idempotence on it and we change this batch from Once to FixedPoint(1).
     Batch("Subquery", FixedPoint(1),
       OptimizeSubqueries,
-      OptimizeOneRowRelationSubquery) ::
+      OptimizeOneRowRelationSubquery),
     Batch("Replace Operators", fixedPoint,
       RewriteExceptAll,
       RewriteIntersectAll,
@@ -203,48 +217,48 @@ abstract class Optimizer(catalogManager: CatalogManager)
       ReplaceExceptWithFilter,
       ReplaceExceptWithAntiJoin,
       ReplaceDistinctWithAggregate,
-      ReplaceDeduplicateWithAggregate) ::
+      ReplaceDeduplicateWithAggregate),
     Batch("Aggregate", fixedPoint,
       RemoveLiteralFromGroupExpressions,
-      RemoveRepetitionFromGroupExpressions) :: Nil ++
-    operatorOptimizationBatch) :+
-    Batch("Clean Up Temporary CTE Info", Once, CleanUpTempCTEInfo) :+
+      RemoveRepetitionFromGroupExpressions),
+    operatorOptimizationBatch,
+    Batch("Clean Up Temporary CTE Info", Once, CleanUpTempCTEInfo),
     // This batch rewrites plans after the operator optimization and
     // before any batches that depend on stats.
-    Batch("Pre CBO Rules", Once, preCBORules: _*) :+
+    Batch("Pre CBO Rules", Once, preCBORules: _*),
     // This batch pushes filters and projections into scan nodes. Before this batch, the logical
     // plan may contain nodes that do not report stats. Anything that uses stats must run after
     // this batch.
-    Batch("Early Filter and Projection Push-Down", Once, earlyScanPushDownRules: _*) :+
-    Batch("Update CTE Relation Stats", Once, UpdateCTERelationStats) :+
+    Batch("Early Filter and Projection Push-Down", Once, earlyScanPushDownRules: _*),
+    Batch("Update CTE Relation Stats", Once, UpdateCTERelationStats),
     // Since join costs in AQP can change between multiple runs, there is no reason that we have an
     // idempotence enforcement on this batch. We thus make it FixedPoint(1) instead of Once.
     Batch("Join Reorder", FixedPoint(1),
-      CostBasedJoinReorder) :+
+      CostBasedJoinReorder),
     Batch("Eliminate Sorts", Once,
       EliminateSorts,
-      RemoveRedundantSorts) :+
+      RemoveRedundantSorts),
     Batch("Decimal Optimizations", fixedPoint,
-      DecimalAggregates) :+
+      DecimalAggregates),
     // This batch must run after "Decimal Optimizations", as that one may change the
     // aggregate distinct column
     Batch("Distinct Aggregate Rewrite", Once,
-      RewriteDistinctAggregates) :+
+      RewriteDistinctAggregates),
     Batch("Object Expressions Optimization", fixedPoint,
       EliminateMapObjects,
       CombineTypedFilters,
       ObjectSerializerPruning,
-      ReassignLambdaVariableID) :+
+      ReassignLambdaVariableID),
     Batch("LocalRelation", fixedPoint,
       ConvertToLocalRelation,
       PropagateEmptyRelation,
       // PropagateEmptyRelation can change the nullability of an attribute from nullable to
       // non-nullable when an empty relation child of a Union is removed
-      UpdateAttributeNullability) :+
-    Batch("Optimize One Row Plan", fixedPoint, OptimizeOneRowPlan) :+
+      UpdateAttributeNullability),
+    Batch("Optimize One Row Plan", fixedPoint, OptimizeOneRowPlan),
     // The following batch should be executed after batch "Join Reorder" and "LocalRelation".
     Batch("Check Cartesian Products", Once,
-      CheckCartesianProducts) :+
+      CheckCartesianProducts),
     Batch("RewriteSubquery", Once,
       RewritePredicateSubquery,
       PushPredicateThroughJoin,
@@ -252,10 +266,10 @@ abstract class Optimizer(catalogManager: CatalogManager)
       ColumnPruning,
       CollapseProject,
       RemoveRedundantAliases,
-      RemoveNoopOperators) :+
+      RemoveNoopOperators),
     // This batch must be executed after the `RewriteSubquery` batch, which creates joins.
-    Batch("NormalizeFloatingNumbers", Once, NormalizeFloatingNumbers) :+
-    Batch("ReplaceUpdateFieldsExpression", Once, ReplaceUpdateFieldsExpression)
+    Batch("NormalizeFloatingNumbers", Once, NormalizeFloatingNumbers),
+    Batch("ReplaceUpdateFieldsExpression", Once, ReplaceUpdateFieldsExpression)))
 
     // remove any batches with no rules. this may happen when subclasses do not add optional rules.
     batches.filter(_.rules.nonEmpty)
@@ -270,22 +284,23 @@ abstract class Optimizer(catalogManager: CatalogManager)
    * (defaultBatches - (excludedRules - nonExcludableRules)).
    */
   def nonExcludableRules: Seq[String] =
-    FinishAnalysis.ruleName ::
-      RewriteDistinctAggregates.ruleName ::
-      ReplaceDeduplicateWithAggregate.ruleName ::
-      ReplaceIntersectWithSemiJoin.ruleName ::
-      ReplaceExceptWithFilter.ruleName ::
-      ReplaceExceptWithAntiJoin.ruleName ::
-      RewriteExceptAll.ruleName ::
-      RewriteIntersectAll.ruleName ::
-      ReplaceDistinctWithAggregate.ruleName ::
-      PullupCorrelatedPredicates.ruleName ::
-      RewriteCorrelatedScalarSubquery.ruleName ::
-      RewritePredicateSubquery.ruleName ::
-      NormalizeFloatingNumbers.ruleName ::
-      ReplaceUpdateFieldsExpression.ruleName ::
-      RewriteLateralSubquery.ruleName ::
-      OptimizeSubqueries.ruleName :: Nil
+    Seq(
+      FinishAnalysis.ruleName,
+      RewriteDistinctAggregates.ruleName,
+      ReplaceDeduplicateWithAggregate.ruleName,
+      ReplaceIntersectWithSemiJoin.ruleName,
+      ReplaceExceptWithFilter.ruleName,
+      ReplaceExceptWithAntiJoin.ruleName,
+      RewriteExceptAll.ruleName,
+      RewriteIntersectAll.ruleName,
+      ReplaceDistinctWithAggregate.ruleName,
+      PullupCorrelatedPredicates.ruleName,
+      RewriteCorrelatedScalarSubquery.ruleName,
+      RewritePredicateSubquery.ruleName,
+      NormalizeFloatingNumbers.ruleName,
+      ReplaceUpdateFieldsExpression.ruleName,
+      RewriteLateralSubquery.ruleName,
+      OptimizeSubqueries.ruleName)
 
   /**
    * Apply finish-analysis rules for the entire plan including all subqueries.
@@ -298,7 +313,9 @@ abstract class Optimizer(catalogManager: CatalogManager)
     private val rules = Seq(
       EliminateResolvedHint,
       EliminateSubqueryAliases,
+      EliminatePipeOperators,
       EliminateView,
+      EliminateSQLFunctionNode,
       ReplaceExpressions,
       RewriteNonCorrelatedExists,
       PullOutGroupingExpressions,
@@ -306,6 +323,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
       // so the grouping keys can only be attribute and literal which makes
       // `InsertMapSortInGroupingExpressions` easy to insert `MapSort`.
       InsertMapSortInGroupingExpressions,
+      InsertMapSortInRepartitionExpressions,
       ComputeCurrentTime,
       ReplaceCurrentLike(catalogManager),
       SpecialDatetimeValues,
@@ -346,7 +364,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
       case d: DynamicPruningSubquery => d
       case s @ ScalarSubquery(
         PhysicalOperation(projections, predicates, a @ Aggregate(group, _, child, _)),
-        _, _, _, _, mayHaveCountBug, _, _)
+        _, _, _, _, mayHaveCountBug, _)
         if conf.getConf(SQLConf.DECORRELATE_SUBQUERY_PREVENT_CONSTANT_FOLDING_FOR_COUNT_BUG) &&
           mayHaveCountBug.nonEmpty && mayHaveCountBug.get =>
         // This is a subquery with an aggregate that may suffer from a COUNT bug.
@@ -1031,6 +1049,9 @@ object ColumnPruning extends Rule[LogicalPlan] {
     // Can't prune the columns on LeafNode
     case p @ Project(_, _: LeafNode) => p
 
+    // Can't prune the columns on UpdateEventTimeWatermarkColumn
+    case p @ Project(_, _: UpdateEventTimeWatermarkColumn) => p
+
     case NestedColumnAliasing(rewrittenPlan) => rewrittenPlan
 
     // for all other logical plans that inherits the output from it's children
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
index aa13e6a67c510..59b3d83c55162 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
@@ -122,7 +122,7 @@ object PushdownPredicatesAndPruneColumnsForCTEDef extends Rule[LogicalPlan] {
   private def pushdownPredicatesAndAttributes(
       plan: LogicalPlan,
       cteMap: CTEMap): LogicalPlan = plan.transformWithSubqueries {
-    case cteDef @ CTERelationDef(child, id, originalPlanWithPredicates, _) =>
+    case cteDef @ CTERelationDef(child, id, originalPlanWithPredicates, _, _) =>
       val (_, _, newPreds, newAttrSet) = cteMap(id)
       val originalPlan = originalPlanWithPredicates.map(_._1).getOrElse(child)
       val preds = originalPlanWithPredicates.map(_._2).getOrElse(Seq.empty)
@@ -141,7 +141,7 @@ object PushdownPredicatesAndPruneColumnsForCTEDef extends Rule[LogicalPlan] {
         cteDef
       }
 
-    case cteRef @ CTERelationRef(cteId, _, output, _, _) =>
+    case cteRef @ CTERelationRef(cteId, _, output, _, _, _) =>
       val (cteDef, _, _, newAttrSet) = cteMap(cteId)
       if (needsPruning(cteDef.child, newAttrSet)) {
         val indices = newAttrSet.toSeq.map(cteDef.output.indexOf)
@@ -170,7 +170,7 @@ object PushdownPredicatesAndPruneColumnsForCTEDef extends Rule[LogicalPlan] {
 object CleanUpTempCTEInfo extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan =
     plan.transformWithPruning(_.containsPattern(CTE)) {
-      case cteDef @ CTERelationDef(_, _, Some(_), _) =>
+      case cteDef @ CTERelationDef(_, _, Some(_), _, _) =>
         cteDef.copy(originalPlanWithPredicates = None)
     }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala
index 393a66f7c1e4f..5d85e89e1eabe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Plan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.{COMMON_EXPR_REF, WITH_EXPRESSION}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.util.Utils
 
 /**
  * Rewrites the `With` expressions by adding a `Project` to pre-evaluate the common expressions, or
@@ -66,11 +67,25 @@ object RewriteWithExpression extends Rule[LogicalPlan] {
   }
 
   private def applyInternal(p: LogicalPlan): LogicalPlan = {
-    val inputPlans = p.children.toArray
+    val inputPlans = p.children
+    val commonExprIdSet = p.expressions
+      .flatMap(_.collect { case r: CommonExpressionRef => r.id })
+      .groupBy(identity)
+      .transform((_, v) => v.size)
+      .filter(_._2 > 1)
+      .keySet
+    val commonExprsPerChild = Array.fill(inputPlans.length)(mutable.ListBuffer.empty[(Alias, Long)])
     var newPlan: LogicalPlan = p.mapExpressions { expr =>
-      rewriteWithExprAndInputPlans(expr, inputPlans)
+      rewriteWithExprAndInputPlans(expr, inputPlans, commonExprsPerChild, commonExprIdSet)
     }
-    newPlan = newPlan.withNewChildren(inputPlans.toIndexedSeq)
+    val newChildren = inputPlans.zip(commonExprsPerChild).map { case (inputPlan, commonExprs) =>
+      if (commonExprs.isEmpty) {
+        inputPlan
+      } else {
+        Project(inputPlan.output ++ commonExprs.map(_._1), inputPlan)
+      }
+    }
+    newPlan = newPlan.withNewChildren(newChildren)
     // Since we add extra Projects with extra columns to pre-evaluate the common expressions,
     // the current operator may have extra columns if it inherits the output columns from its
     // child, and we need to project away the extra columns to keep the plan schema unchanged.
@@ -85,33 +100,34 @@ object RewriteWithExpression extends Rule[LogicalPlan] {
 
   private def rewriteWithExprAndInputPlans(
       e: Expression,
-      inputPlans: Array[LogicalPlan]): Expression = {
+      inputPlans: Seq[LogicalPlan],
+      commonExprsPerChild: Array[mutable.ListBuffer[(Alias, Long)]],
+      commonExprIdSet: Set[CommonExpressionId],
+      isNestedWith: Boolean = false): Expression = {
     if (!e.containsPattern(WITH_EXPRESSION)) return e
     e match {
-      case w: With =>
+      // Do not handle nested With in one pass. Leave it to the next rule executor batch.
+      case w: With if !isNestedWith =>
         // Rewrite nested With expressions first
-        val child = rewriteWithExprAndInputPlans(w.child, inputPlans)
-        val defs = w.defs.map(rewriteWithExprAndInputPlans(_, inputPlans))
+        val child = rewriteWithExprAndInputPlans(
+          w.child, inputPlans, commonExprsPerChild, commonExprIdSet, isNestedWith = true)
+        val defs = w.defs.map(rewriteWithExprAndInputPlans(
+          _, inputPlans, commonExprsPerChild, commonExprIdSet, isNestedWith = true))
         val refToExpr = mutable.HashMap.empty[CommonExpressionId, Expression]
-        val childProjections = Array.fill(inputPlans.length)(mutable.ArrayBuffer.empty[Alias])
 
         defs.zipWithIndex.foreach { case (CommonExpressionDef(child, id), index) =>
-          if (child.containsPattern(COMMON_EXPR_REF)) {
-            throw SparkException.internalError(
-              "Common expression definition cannot reference other Common expression definitions")
-          }
           if (id.canonicalized) {
             throw SparkException.internalError(
               "Cannot rewrite canonicalized Common expression definitions")
           }
 
-          if (CollapseProject.isCheap(child)) {
+          if (CollapseProject.isCheap(child) || !commonExprIdSet.contains(id)) {
             refToExpr(id) = child
           } else {
-            val childProjectionIndex = inputPlans.indexWhere(
+            val childPlanIndex = inputPlans.indexWhere(
               c => child.references.subsetOf(c.outputSet)
             )
-            if (childProjectionIndex == -1) {
+            if (childPlanIndex == -1) {
               // When we cannot rewrite the common expressions, force to inline them so that the
               // query can still run. This can happen if the join condition contains `With` and
               // the common expression references columns from both join sides.
@@ -122,36 +138,37 @@ object RewriteWithExpression extends Rule[LogicalPlan] {
               //       if it's ref count is 1.
               refToExpr(id) = child
             } else {
-              val aliasName = if (SQLConf.get.getConf(SQLConf.USE_COMMON_EXPR_ID_FOR_ALIAS)) {
-                s"_common_expr_${id.id}"
-              } else {
-                s"_common_expr_$index"
-              }
-              val alias = Alias(child, aliasName)()
-              val fakeProj = Project(Seq(alias), inputPlans(childProjectionIndex))
-              if (PlanHelper.specialExpressionsInUnsupportedOperator(fakeProj).nonEmpty) {
-                // We have to inline the common expression if it cannot be put in a Project.
-                refToExpr(id) = child
+              val commonExprs = commonExprsPerChild(childPlanIndex)
+              val existingCommonExpr = commonExprs.find(_._2 == id.id)
+              if (existingCommonExpr.isDefined) {
+                if (Utils.isTesting) {
+                  assert(existingCommonExpr.get._1.child.semanticEquals(child))
+                }
+                refToExpr(id) = existingCommonExpr.get._1.toAttribute
               } else {
-                childProjections(childProjectionIndex) += alias
-                refToExpr(id) = alias.toAttribute
+                val aliasName = if (SQLConf.get.getConf(SQLConf.USE_COMMON_EXPR_ID_FOR_ALIAS)) {
+                  s"_common_expr_${id.id}"
+                } else {
+                  s"_common_expr_$index"
+                }
+                val alias = Alias(child, aliasName)()
+                val fakeProj = Project(Seq(alias), inputPlans(childPlanIndex))
+                if (PlanHelper.specialExpressionsInUnsupportedOperator(fakeProj).nonEmpty) {
+                  // We have to inline the common expression if it cannot be put in a Project.
+                  refToExpr(id) = child
+                } else {
+                  commonExprs.append((alias, id.id))
+                  refToExpr(id) = alias.toAttribute
+                }
               }
             }
           }
         }
 
-        for (i <- inputPlans.indices) {
-          val projectList = childProjections(i)
-          if (projectList.nonEmpty) {
-            inputPlans(i) = Project(inputPlans(i).output ++ projectList, inputPlans(i))
-          }
-        }
-
         child.transformWithPruning(_.containsPattern(COMMON_EXPR_REF)) {
-          case ref: CommonExpressionRef =>
-            if (!refToExpr.contains(ref.id)) {
-              throw SparkException.internalError("Undefined common expression id " + ref.id)
-            }
+          // `child` may contain nested With and we only replace `CommonExpressionRef` that
+          // references common expressions in the current `With`.
+          case ref: CommonExpressionRef if refToExpr.contains(ref.id) =>
             if (ref.id.canonicalized) {
               throw SparkException.internalError(
                 "Cannot rewrite canonicalized Common expression references")
@@ -161,7 +178,8 @@ object RewriteWithExpression extends Rule[LogicalPlan] {
 
       case c: ConditionalExpression =>
         val newAlwaysEvaluatedInputs = c.alwaysEvaluatedInputs.map(
-          rewriteWithExprAndInputPlans(_, inputPlans))
+          rewriteWithExprAndInputPlans(
+            _, inputPlans, commonExprsPerChild, commonExprIdSet, isNestedWith))
         val newExpr = c.withNewAlwaysEvaluatedInputs(newAlwaysEvaluatedInputs)
         // Use transformUp to handle nested With.
         newExpr.transformUpWithPruning(_.containsPattern(WITH_EXPRESSION)) {
@@ -174,7 +192,10 @@ object RewriteWithExpression extends Rule[LogicalPlan] {
             }
         }
 
-      case other => other.mapChildren(rewriteWithExprAndInputPlans(_, inputPlans))
+      case other => other.mapChildren(
+        rewriteWithExprAndInputPlans(
+          _, inputPlans, commonExprsPerChild, commonExprIdSet, isNestedWith)
+      )
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 754fea85ec6d7..e867953bcf282 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -90,7 +90,7 @@ object ConstantFolding extends Rule[LogicalPlan] {
       }
 
     // Don't replace ScalarSubquery if its plan is an aggregate that may suffer from a COUNT bug.
-    case s @ ScalarSubquery(_, _, _, _, _, mayHaveCountBug, _, _)
+    case s @ ScalarSubquery(_, _, _, _, _, mayHaveCountBug, _)
       if conf.getConf(SQLConf.DECORRELATE_SUBQUERY_PREVENT_CONSTANT_FOLDING_FOR_COUNT_BUG) &&
         mayHaveCountBug.nonEmpty && mayHaveCountBug.get =>
       s
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 8c82769dbf4a3..5a4e9f37c3951 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -131,12 +131,12 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
 
       // Filter the plan by applying left semi and left anti joins.
       withSubquery.foldLeft(newFilter) {
-        case (p, Exists(sub, _, _, conditions, subHint, _)) =>
+        case (p, Exists(sub, _, _, conditions, subHint)) =>
           val (joinCond, outerPlan) = rewriteExistentialExpr(conditions, p)
           val join = buildJoin(outerPlan, rewriteDomainJoinsIfPresent(outerPlan, sub, joinCond),
             LeftSemi, joinCond, subHint)
           Project(p.output, join)
-        case (p, Not(Exists(sub, _, _, conditions, subHint, _))) =>
+        case (p, Not(Exists(sub, _, _, conditions, subHint))) =>
           val (joinCond, outerPlan) = rewriteExistentialExpr(conditions, p)
           val join = buildJoin(outerPlan, rewriteDomainJoinsIfPresent(outerPlan, sub, joinCond),
             LeftAnti, joinCond, subHint)
@@ -319,7 +319,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
     val introducedAttrs = ArrayBuffer.empty[Attribute]
     val newExprs = exprs.map { e =>
       e.transformDownWithPruning(_.containsAnyPattern(EXISTS_SUBQUERY, IN_SUBQUERY)) {
-        case Exists(sub, _, _, conditions, subHint, _) =>
+        case Exists(sub, _, _, conditions, subHint) =>
           val exists = AttributeReference("exists", BooleanType, nullable = false)()
           val existenceJoin = ExistenceJoin(exists)
           val newCondition = conditions.reduceLeftOption(And)
@@ -507,7 +507,7 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper
 
     plan.transformExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION)) {
       case ScalarSubquery(sub, children, exprId, conditions, hint,
-      mayHaveCountBugOld, needSingleJoinOld, _)
+      mayHaveCountBugOld, needSingleJoinOld)
         if children.nonEmpty =>
 
         def mayHaveCountBugAgg(a: Aggregate): Boolean = {
@@ -560,7 +560,7 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper
         }
         ScalarSubquery(newPlan, children, exprId, getJoinCondition(newCond, conditions),
           hint, Some(mayHaveCountBug), Some(needSingleJoin))
-      case Exists(sub, children, exprId, conditions, hint, _) if children.nonEmpty =>
+      case Exists(sub, children, exprId, conditions, hint) if children.nonEmpty =>
         val (newPlan, newCond) = if (SQLConf.get.decorrelateInnerQueryEnabledForExistsIn) {
           decorrelate(sub, plan, handleCountBug = true)
         } else {
@@ -818,7 +818,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
     val subqueryAttrMapping = ArrayBuffer[(Attribute, Attribute)]()
     val newChild = subqueries.foldLeft(child) {
       case (currentChild, ScalarSubquery(sub, _, _, conditions, subHint, mayHaveCountBug,
-      needSingleJoin, _)) =>
+      needSingleJoin)) =>
         val query = DecorrelateInnerQuery.rewriteDomainJoins(currentChild, sub, conditions)
         val origOutput = query.output.head
         // The subquery appears on the right side of the join, hence add its hint to the right
@@ -1064,8 +1064,7 @@ object OptimizeOneRowRelationSubquery extends Rule[LogicalPlan] {
 
     case p: LogicalPlan => p.transformExpressionsUpWithPruning(
       _.containsPattern(SCALAR_SUBQUERY)) {
-      case s @ ScalarSubquery(
-          OneRowSubquery(p @ Project(_, _: OneRowRelation)), _, _, _, _, _, _, _)
+      case s @ ScalarSubquery(OneRowSubquery(p @ Project(_, _: OneRowRelation)), _, _, _, _, _, _)
           if !hasCorrelatedSubquery(s.plan) && s.joinCond.isEmpty =>
         assert(p.projectList.size == 1)
         stripOuterReferences(p.projectList).head
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 08a8cf6bab87a..b408fcefcfb26 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -43,7 +43,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
 import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
-import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, IntervalUtils, SparkParserUtils}
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, CollationFactory, DateTimeUtils, IntervalUtils, SparkParserUtils}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTimestamp, stringToTimestampWithoutTimeZone}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces, TableCatalog, TableWritePrivilege}
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
@@ -144,14 +144,27 @@ class AstBuilder extends DataTypeAstBuilder
 
   override def visitSingleCompoundStatement(ctx: SingleCompoundStatementContext): CompoundBody = {
     val labelCtx = new SqlScriptingLabelContext()
-    visitCompoundBodyImpl(ctx.compoundBody(), None, allowVarDeclare = true, labelCtx)
+    val labelText = labelCtx.enterLabeledScope(None, None)
+
+    val script = Option(ctx.compoundBody())
+      .map(visitCompoundBodyImpl(
+        _,
+        Some(labelText),
+        allowVarDeclare = true,
+        labelCtx,
+        isScope = true
+      )).getOrElse(CompoundBody(Seq.empty, Some(labelText), isScope = true))
+
+    labelCtx.exitLabeledScope(None)
+    script
   }
 
   private def visitCompoundBodyImpl(
       ctx: CompoundBodyContext,
       label: Option[String],
       allowVarDeclare: Boolean,
-      labelCtx: SqlScriptingLabelContext): CompoundBody = {
+      labelCtx: SqlScriptingLabelContext,
+      isScope: Boolean): CompoundBody = {
     val buff = ListBuffer[CompoundPlanStatement]()
     ctx.compoundStatements.forEach(
       compoundStatement => buff += visitCompoundStatementImpl(compoundStatement, labelCtx))
@@ -183,7 +196,7 @@ class AstBuilder extends DataTypeAstBuilder
       case _ =>
     }
 
-    CompoundBody(buff.toSeq, label)
+    CompoundBody(buff.toSeq, label, isScope)
   }
 
   private def visitBeginEndCompoundBlockImpl(
@@ -191,12 +204,14 @@ class AstBuilder extends DataTypeAstBuilder
       labelCtx: SqlScriptingLabelContext): CompoundBody = {
     val labelText =
       labelCtx.enterLabeledScope(Option(ctx.beginLabel()), Option(ctx.endLabel()))
-    val body = visitCompoundBodyImpl(
-      ctx.compoundBody(),
-      Some(labelText),
-      allowVarDeclare = true,
-      labelCtx
-    )
+    val body = Option(ctx.compoundBody())
+      .map(visitCompoundBodyImpl(
+        _,
+        Some(labelText),
+        allowVarDeclare = true,
+        labelCtx,
+        isScope = true
+      )).getOrElse(CompoundBody(Seq.empty, Some(labelText), isScope = true))
     labelCtx.exitLabeledScope(Option(ctx.beginLabel()))
     body
   }
@@ -226,6 +241,8 @@ class AstBuilder extends DataTypeAstBuilder
                 visitSearchedCaseStatementImpl(searchedCaseContext, labelCtx)
               case simpleCaseContext: SimpleCaseStatementContext =>
                 visitSimpleCaseStatementImpl(simpleCaseContext, labelCtx)
+              case forStatementContext: ForStatementContext =>
+                visitForStatementImpl(forStatementContext, labelCtx)
               case stmt => visit(stmt).asInstanceOf[CompoundPlanStatement]
             }
           } else {
@@ -245,10 +262,12 @@ class AstBuilder extends DataTypeAstBuilder
             OneRowRelation()))
       }),
       conditionalBodies = ctx.conditionalBodies.asScala.toList.map(
-        body => visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx)
+        body =>
+          visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx, isScope = false)
       ),
       elseBody = Option(ctx.elseBody).map(
-        body => visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx)
+        body =>
+          visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx, isScope = false)
       )
     )
   }
@@ -265,7 +284,13 @@ class AstBuilder extends DataTypeAstBuilder
         Project(
           Seq(Alias(expression(boolExpr), "condition")()),
           OneRowRelation()))}
-    val body = visitCompoundBodyImpl(ctx.compoundBody(), None, allowVarDeclare = false, labelCtx)
+    val body = visitCompoundBodyImpl(
+      ctx.compoundBody(),
+      None,
+      allowVarDeclare = false,
+      labelCtx,
+      isScope = false
+    )
     labelCtx.exitLabeledScope(Option(ctx.beginLabel()))
 
     WhileStatement(condition, body, Some(labelText))
@@ -282,7 +307,8 @@ class AstBuilder extends DataTypeAstBuilder
     })
     val conditionalBodies =
       ctx.conditionalBodies.asScala.toList.map(
-        body => visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx)
+        body =>
+          visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx, isScope = false)
       )
 
     if (conditions.length != conditionalBodies.length) {
@@ -295,7 +321,8 @@ class AstBuilder extends DataTypeAstBuilder
       conditions = conditions,
       conditionalBodies = conditionalBodies,
       elseBody = Option(ctx.elseBody).map(
-        body => visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx)
+        body =>
+          visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx, isScope = false)
       ))
   }
 
@@ -312,7 +339,8 @@ class AstBuilder extends DataTypeAstBuilder
     })
     val conditionalBodies =
       ctx.conditionalBodies.asScala.toList.map(
-        body => visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx)
+        body =>
+          visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx, isScope = false)
       )
 
     if (conditions.length != conditionalBodies.length) {
@@ -325,7 +353,8 @@ class AstBuilder extends DataTypeAstBuilder
       conditions = conditions,
       conditionalBodies = conditionalBodies,
       elseBody = Option(ctx.elseBody).map(
-        body => visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx)
+        body =>
+          visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx, isScope = false)
       ))
   }
 
@@ -341,34 +370,66 @@ class AstBuilder extends DataTypeAstBuilder
         Project(
           Seq(Alias(expression(boolExpr), "condition")()),
           OneRowRelation()))}
-    val body = visitCompoundBodyImpl(ctx.compoundBody(), None, allowVarDeclare = false, labelCtx)
+    val body = visitCompoundBodyImpl(
+      ctx.compoundBody(),
+      None,
+      allowVarDeclare = false,
+      labelCtx,
+      isScope = false
+    )
     labelCtx.exitLabeledScope(Option(ctx.beginLabel()))
 
     RepeatStatement(condition, body, Some(labelText))
   }
 
+  private def visitForStatementImpl(
+      ctx: ForStatementContext,
+      labelCtx: SqlScriptingLabelContext): ForStatement = {
+    val labelText = labelCtx.enterLabeledScope(Option(ctx.beginLabel()), Option(ctx.endLabel()))
+
+    val queryCtx = ctx.query()
+    val query = withOrigin(queryCtx) {
+      SingleStatement(visitQuery(queryCtx))
+    }
+    val varName = Option(ctx.multipartIdentifier()).map(_.getText)
+    val body = visitCompoundBodyImpl(
+      ctx.compoundBody(),
+      None,
+      allowVarDeclare = false,
+      labelCtx,
+      isScope = false
+    )
+    labelCtx.exitLabeledScope(Option(ctx.beginLabel()))
+
+    ForStatement(query, varName, body, Some(labelText))
+  }
+
   private def leaveOrIterateContextHasLabel(
       ctx: RuleContext, label: String, isIterate: Boolean): Boolean = {
     ctx match {
       case c: BeginEndCompoundBlockContext
-        if Option(c.beginLabel()).isDefined &&
-          c.beginLabel().multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label) =>
-        if (isIterate) {
+        if Option(c.beginLabel()).exists { b =>
+          b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
+        } => if (isIterate) {
           throw SqlScriptingErrors.invalidIterateLabelUsageForCompound(CurrentOrigin.get, label)
         }
         true
       case c: WhileStatementContext
-        if Option(c.beginLabel()).isDefined &&
-          c.beginLabel().multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
-        => true
+        if Option(c.beginLabel()).exists { b =>
+          b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
+        } => true
       case c: RepeatStatementContext
-        if Option(c.beginLabel()).isDefined &&
-          c.beginLabel().multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
-        => true
+        if Option(c.beginLabel()).exists { b =>
+          b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
+        } => true
       case c: LoopStatementContext
-        if Option(c.beginLabel()).isDefined &&
-          c.beginLabel().multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
-        => true
+        if Option(c.beginLabel()).exists { b =>
+          b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
+        } => true
+      case c: ForStatementContext
+        if Option(c.beginLabel()).exists { b =>
+          b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
+        } => true
       case _ => false
     }
   }
@@ -410,7 +471,13 @@ class AstBuilder extends DataTypeAstBuilder
       labelCtx: SqlScriptingLabelContext): LoopStatement = {
     val labelText =
       labelCtx.enterLabeledScope(Option(ctx.beginLabel()), Option(ctx.endLabel()))
-    val body = visitCompoundBodyImpl(ctx.compoundBody(), None, allowVarDeclare = false, labelCtx)
+    val body = visitCompoundBodyImpl(
+      ctx.compoundBody(),
+      None,
+      allowVarDeclare = false,
+      labelCtx,
+      isScope = false
+    )
     labelCtx.exitLabeledScope(Option(ctx.beginLabel()))
 
     LoopStatement(body, Some(labelText))
@@ -484,7 +551,7 @@ class AstBuilder extends DataTypeAstBuilder
       throw QueryParsingErrors.duplicateCteDefinitionNamesError(
         duplicates.map(toSQLId).mkString(", "), ctx)
     }
-    UnresolvedWith(plan, ctes.toSeq)
+    UnresolvedWith(plan, ctes.toSeq, ctx.RECURSIVE() != null)
   }
 
   /**
@@ -527,7 +594,10 @@ class AstBuilder extends DataTypeAstBuilder
         optionalMap(body.queryOrganization)(withQueryResultClauses(_, _, forPipeOperators = false))
     }
     // If there are multiple SELECT just UNION them together into one query.
-    if (selects.length == 1) {
+    if (selects.length == 0) {
+      // This is a "FROM <tableName>" clause with no other syntax.
+      from
+    } else if (selects.length == 1) {
       selects.head
     } else {
       Union(selects.toSeq)
@@ -2128,7 +2198,7 @@ class AstBuilder extends DataTypeAstBuilder
     }
 
     val unresolvedTable = UnresolvedInlineTable(aliases, rows.toSeq)
-    val table = if (conf.getConf(SQLConf.EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED)) {
+    val table = if (canEagerlyEvaluateInlineTable(ctx, unresolvedTable)) {
       EvaluateUnresolvedInlineTable.evaluate(unresolvedTable)
     } else {
       unresolvedTable
@@ -2136,6 +2206,42 @@ class AstBuilder extends DataTypeAstBuilder
     table.optionalMap(ctx.tableAlias.strictIdentifier)(aliasPlan)
   }
 
+  /**
+   * Determines if the inline table can be eagerly evaluated.
+   */
+  private def canEagerlyEvaluateInlineTable(
+      ctx: InlineTableContext,
+      table: UnresolvedInlineTable): Boolean = {
+    if (!conf.getConf(SQLConf.EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED)) {
+      return false
+    } else if (!ResolveDefaultStringTypes.needsResolution(table.expressions)) {
+      // if there are no strings to be resolved we can always evaluate eagerly
+      return true
+    }
+
+    val isSessionCollationSet = conf.defaultStringType != StringType
+
+    // if either of these are true we need to resolve
+    // the string types first
+    !isSessionCollationSet && !contextInsideCreate(ctx)
+  }
+
+  private def contextInsideCreate(ctx: ParserRuleContext): Boolean = {
+    var currentContext: RuleContext = ctx
+
+    while (currentContext != null) {
+      if (currentContext.isInstanceOf[CreateTableContext] ||
+          currentContext.isInstanceOf[ReplaceTableContext] ||
+          currentContext.isInstanceOf[CreateViewContext]) {
+        return true
+      }
+
+      currentContext = currentContext.parent
+    }
+
+    false
+  }
+
   /**
    * Create an alias (SubqueryAlias) for a join relation. This is practically the same as
    * visitAliasedQuery and visitNamedExpression, ANTLR4 however requires us to use 3 different
@@ -2228,14 +2334,6 @@ class AstBuilder extends DataTypeAstBuilder
     FunctionIdentifier(ctx.function.getText, Option(ctx.db).map(_.getText))
   }
 
-  /**
-   * Create a multi-part identifier.
-   */
-  override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] =
-    withOrigin(ctx) {
-      ctx.parts.asScala.map(_.getText).toSeq
-    }
-
   /* ********************************************************************************************
    * Expression parsing
    * ******************************************************************************************** */
@@ -2275,9 +2373,10 @@ class AstBuilder extends DataTypeAstBuilder
   def visitStarExcept(ctx: StarContext, target: Option[Seq[String]]): Expression = withOrigin(ctx) {
     val exceptCols = ctx.exceptClause
       .exceptCols.multipartIdentifier.asScala.map(typedVisit[Seq[String]])
-    UnresolvedStarExcept(
+    UnresolvedStarExceptOrReplace(
       target,
-      exceptCols.toSeq)
+      exceptCols.toSeq,
+      replacements = None)
   }
 
   /**
@@ -2647,15 +2746,16 @@ class AstBuilder extends DataTypeAstBuilder
    */
   override def visitCollate(ctx: CollateContext): Expression = withOrigin(ctx) {
     val collationName = visitCollateClause(ctx.collateClause())
-    Collate(expression(ctx.primaryExpression), collationName)
+
+    Collate(expression(ctx.primaryExpression), UnresolvedCollation(collationName))
   }
 
-  override def visitCollateClause(ctx: CollateClauseContext): String = withOrigin(ctx) {
-    val collationName = ctx.collationName.getText
-    if (!SQLConf.get.trimCollationEnabled && collationName.toUpperCase().contains("TRIM")) {
+  override def visitCollateClause(ctx: CollateClauseContext): Seq[String] = withOrigin(ctx) {
+    val collationName = visitMultipartIdentifier(ctx.collationName)
+    if (!SQLConf.get.trimCollationEnabled && collationName.last.toUpperCase().contains("TRIM")) {
       throw QueryCompilationErrors.trimCollationNotEnabledError()
     }
-    ctx.identifier.getText
+    collationName
   }
 
   /**
@@ -2663,20 +2763,6 @@ class AstBuilder extends DataTypeAstBuilder
    */
   override def visitCast(ctx: CastContext): Expression = withOrigin(ctx) {
     val rawDataType = typedVisit[DataType](ctx.dataType())
-    ctx.dataType() match {
-      case context: PrimitiveDataTypeContext =>
-        val typeCtx = context.`type`()
-        if (typeCtx.start.getType == STRING) {
-          typeCtx.children.asScala.toSeq match {
-            case Seq(_, cctx: CollateClauseContext) =>
-              throw QueryParsingErrors.dataTypeUnsupportedError(
-                rawDataType.typeName,
-                ctx.dataType().asInstanceOf[PrimitiveDataTypeContext])
-            case _ =>
-          }
-        }
-      case _ =>
-    }
     val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType)
     ctx.name.getType match {
       case SqlBaseParser.CAST =>
@@ -2696,20 +2782,6 @@ class AstBuilder extends DataTypeAstBuilder
    */
   override def visitCastByColon(ctx: CastByColonContext): Expression = withOrigin(ctx) {
     val rawDataType = typedVisit[DataType](ctx.dataType())
-    ctx.dataType() match {
-      case context: PrimitiveDataTypeContext =>
-        val typeCtx = context.`type`()
-        if (typeCtx.start.getType == STRING) {
-          typeCtx.children.asScala.toSeq match {
-            case Seq(_, cctx: CollateClauseContext) =>
-              throw QueryParsingErrors.dataTypeUnsupportedError(
-                rawDataType.typeName,
-                ctx.dataType().asInstanceOf[PrimitiveDataTypeContext])
-            case _ =>
-          }
-        }
-      case _ =>
-    }
     val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType)
     val cast = Cast(expression(ctx.primaryExpression), dataType)
     cast.setTagValue(Cast.USER_SPECIFIED_CAST, ())
@@ -3346,7 +3418,7 @@ class AstBuilder extends DataTypeAstBuilder
    * Create a String literal expression.
    */
   override def visitStringLiteral(ctx: StringLiteralContext): Literal = withOrigin(ctx) {
-    Literal.create(createString(ctx), conf.defaultStringType)
+    Literal.create(createString(ctx), StringType)
   }
 
   /**
@@ -3422,7 +3494,7 @@ class AstBuilder extends DataTypeAstBuilder
   /**
    * Create an [[UnresolvedTableOrView]] from a multi-part identifier.
    */
-  private def createUnresolvedTableOrView(
+  protected def createUnresolvedTableOrView(
       ctx: IdentifierReferenceContext,
       commandName: String,
       allowTempView: Boolean = true): LogicalPlan = withOrigin(ctx) {
@@ -3797,6 +3869,19 @@ class AstBuilder extends DataTypeAstBuilder
     ctx.asScala.headOption.map(visitCommentSpec)
   }
 
+  protected def visitCollationSpecList(
+      ctx: java.util.List[CollationSpecContext]): Option[String] = {
+    ctx.asScala.headOption.map(visitCollationSpec)
+  }
+
+  override def visitCollationSpec(ctx: CollationSpecContext): String = withOrigin(ctx) {
+    if (!SQLConf.get.objectLevelCollationsEnabled) {
+      throw QueryCompilationErrors.objectLevelCollationsNotEnabledError()
+    }
+    val collationName = ctx.identifier.getText
+    CollationFactory.fetchCollation(collationName).collationName
+  }
+
   /**
    * Create a [[BucketSpec]].
    */
@@ -3928,6 +4013,7 @@ class AstBuilder extends DataTypeAstBuilder
    * - options
    * - location
    * - comment
+   * - collation
    * - serde
    * - clusterBySpec
    *
@@ -3936,8 +4022,8 @@ class AstBuilder extends DataTypeAstBuilder
    * types like `i INT`, which should be appended to the existing table schema.
    */
   type TableClauses = (
-      Seq[Transform], Seq[ColumnDefinition], Option[BucketSpec], Map[String, String],
-      OptionList, Option[String], Option[String], Option[SerdeInfo], Option[ClusterBySpec])
+      Seq[Transform], Seq[ColumnDefinition], Option[BucketSpec], Map[String, String], OptionList,
+      Option[String], Option[String], Option[String], Option[SerdeInfo], Option[ClusterBySpec])
 
   /**
    * Validate a create table statement and return the [[TableIdentifier]].
@@ -4224,6 +4310,10 @@ class AstBuilder extends DataTypeAstBuilder
         throw QueryParsingErrors.cannotCleanReservedTablePropertyError(
           PROP_EXTERNAL, ctx, "please use CREATE EXTERNAL TABLE")
       case (PROP_EXTERNAL, _) => false
+      case (PROP_COLLATION, _) if !legacyOn =>
+        throw QueryParsingErrors.cannotCleanReservedTablePropertyError(
+          PROP_COLLATION, ctx, "please use the DEFAULT COLLATION clause to specify it")
+      case (PROP_COLLATION, _) => false
       // It's safe to set whatever table comment, so we don't make it a reserved table property.
       case (PROP_COMMENT, _) => true
       case (k, _) =>
@@ -4403,6 +4493,7 @@ class AstBuilder extends DataTypeAstBuilder
     checkDuplicateClauses(ctx.createFileFormat, "STORED AS/BY", ctx)
     checkDuplicateClauses(ctx.rowFormat, "ROW FORMAT", ctx)
     checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx)
+    checkDuplicateClauses(ctx.collationSpec(), "DEFAULT COLLATION", ctx)
     checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
     checkDuplicateClauses(ctx.clusterBySpec(), "CLUSTER BY", ctx)
     checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
@@ -4421,6 +4512,7 @@ class AstBuilder extends DataTypeAstBuilder
     val location = visitLocationSpecList(ctx.locationSpec())
     val (cleanedOptions, newLocation) = cleanTableOptions(ctx, options, location)
     val comment = visitCommentSpecList(ctx.commentSpec())
+    val collation = visitCollationSpecList(ctx.collationSpec())
     val serdeInfo =
       getSerdeInfo(ctx.rowFormat.asScala.toSeq, ctx.createFileFormat.asScala.toSeq, ctx)
     val clusterBySpec = ctx.clusterBySpec().asScala.headOption.map(visitClusterBySpec)
@@ -4435,7 +4527,7 @@ class AstBuilder extends DataTypeAstBuilder
     }
 
     (partTransforms, partCols, bucketSpec, cleanedProperties, cleanedOptions, newLocation, comment,
-      serdeInfo, clusterBySpec)
+      collation, serdeInfo, clusterBySpec)
   }
 
   protected def getSerdeInfo(
@@ -4495,6 +4587,7 @@ class AstBuilder extends DataTypeAstBuilder
    *     ]
    *     [LOCATION path]
    *     [COMMENT table_comment]
+   *     [DEFAULT COLLATION collation_name]
    *     [TBLPROPERTIES (property_name=property_value, ...)]
    *
    *   partition_fields:
@@ -4508,8 +4601,8 @@ class AstBuilder extends DataTypeAstBuilder
 
     val columns = Option(ctx.colDefinitionList()).map(visitColDefinitionList).getOrElse(Nil)
     val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText)
-    val (partTransforms, partCols, bucketSpec, properties, options, location,
-      comment, serdeInfo, clusterBySpec) = visitCreateTableClauses(ctx.createTableClauses())
+    val (partTransforms, partCols, bucketSpec, properties, options, location, comment,
+      collation, serdeInfo, clusterBySpec) = visitCreateTableClauses(ctx.createTableClauses())
 
     if (provider.isDefined && serdeInfo.isDefined) {
       invalidStatement(s"CREATE TABLE ... USING ... ${serdeInfo.get.describe}", ctx)
@@ -4527,7 +4620,7 @@ class AstBuilder extends DataTypeAstBuilder
         clusterBySpec.map(_.asTransform)
 
     val tableSpec = UnresolvedTableSpec(properties, provider, options, location, comment,
-      serdeInfo, external)
+      collation, serdeInfo, external)
 
     Option(ctx.query).map(plan) match {
       case Some(_) if columns.nonEmpty =>
@@ -4576,6 +4669,7 @@ class AstBuilder extends DataTypeAstBuilder
    *     ]
    *     [LOCATION path]
    *     [COMMENT table_comment]
+   *     [DEFAULT COLLATION collation_name]
    *     [TBLPROPERTIES (property_name=property_value, ...)]
    *
    *   partition_fields:
@@ -4585,8 +4679,8 @@ class AstBuilder extends DataTypeAstBuilder
    */
   override def visitReplaceTable(ctx: ReplaceTableContext): LogicalPlan = withOrigin(ctx) {
     val orCreate = ctx.replaceTableHeader().CREATE() != null
-    val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo,
-      clusterBySpec) = visitCreateTableClauses(ctx.createTableClauses())
+    val (partTransforms, partCols, bucketSpec, properties, options, location, comment, collation,
+      serdeInfo, clusterBySpec) = visitCreateTableClauses(ctx.createTableClauses())
     val columns = Option(ctx.colDefinitionList()).map(visitColDefinitionList).getOrElse(Nil)
     val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText)
 
@@ -4600,7 +4694,7 @@ class AstBuilder extends DataTypeAstBuilder
         clusterBySpec.map(_.asTransform)
 
     val tableSpec = UnresolvedTableSpec(properties, provider, options, location, comment,
-      serdeInfo, external = false)
+      collation, serdeInfo, external = false)
 
     Option(ctx.query).map(plan) match {
       case Some(_) if columns.nonEmpty =>
@@ -5006,6 +5100,21 @@ class AstBuilder extends DataTypeAstBuilder
     }
   }
 
+  /**
+   * Parse a [[AlterTableCollation]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER TABLE table1 DEFAULT COLLATION name
+   * }}}
+   */
+  override def visitAlterTableCollation(ctx: AlterTableCollationContext): LogicalPlan =
+    withOrigin(ctx) {
+      val table = createUnresolvedTable(
+        ctx.identifierReference, "ALTER TABLE ... DEFAULT COLLATION")
+      AlterTableCollation(table, visitCollationSpec(ctx.collationSpec()))
+    }
+
   /**
    * Parse [[SetViewProperties]] or [[SetTableProperties]] commands.
    *
@@ -5089,36 +5198,6 @@ class AstBuilder extends DataTypeAstBuilder
       visitLocationSpec(ctx.locationSpec))
   }
 
-  /**
-   * Create a [[DescribeColumn]] or [[DescribeRelation]] commands.
-   */
-  override def visitDescribeRelation(ctx: DescribeRelationContext): LogicalPlan = withOrigin(ctx) {
-    val isExtended = ctx.EXTENDED != null || ctx.FORMATTED != null
-    val relation = createUnresolvedTableOrView(ctx.identifierReference, "DESCRIBE TABLE")
-    if (ctx.describeColName != null) {
-      if (ctx.partitionSpec != null) {
-        throw QueryParsingErrors.descColumnForPartitionUnsupportedError(ctx)
-      } else {
-        DescribeColumn(
-          relation,
-          UnresolvedAttribute(ctx.describeColName.nameParts.asScala.map(_.getText).toSeq),
-          isExtended)
-      }
-    } else {
-      val partitionSpec = if (ctx.partitionSpec != null) {
-        // According to the syntax, visitPartitionSpec returns `Map[String, Option[String]]`.
-        visitPartitionSpec(ctx.partitionSpec).map {
-          case (key, Some(value)) => key -> value
-          case (key, _) =>
-            throw QueryParsingErrors.emptyPartitionKeyError(key, ctx.partitionSpec)
-        }
-      } else {
-        Map.empty[String, String]
-      }
-      DescribeRelation(relation, partitionSpec, isExtended)
-    }
-  }
-
   /**
    * Create an [[AnalyzeTable]], or an [[AnalyzeColumn]].
    * Example SQL for analyzing a table or a set of partitions :
@@ -5911,18 +5990,6 @@ class AstBuilder extends DataTypeAstBuilder
     if (!SQLConf.get.getConf(SQLConf.OPERATOR_PIPE_SYNTAX_ENABLED)) {
       operationNotAllowed("Operator pipe SQL syntax using |>", ctx)
     }
-    // This helper function adds a table subquery boundary between the new operator to be added
-    // (such as a filter or sort) and the input plan if one does not already exist. This helps the
-    // analyzer behave as if we had added the corresponding SQL clause after a table subquery
-    // containing the input plan.
-    def withSubqueryAlias(): LogicalPlan = left match {
-      case s: SubqueryAlias =>
-        s
-      case u: UnresolvedRelation =>
-        u
-      case _ =>
-        SubqueryAlias(SubqueryAlias.generateSubqueryName(), left)
-    }
     Option(ctx.selectClause).map { c =>
       withSelectQuerySpecification(
         ctx = ctx,
@@ -5952,11 +6019,21 @@ class AstBuilder extends DataTypeAstBuilder
         }.get
       val projectList: Seq[NamedExpression] = Seq(UnresolvedStar(None)) ++ extendExpressions
       Project(projectList, left)
+    }.getOrElse(Option(ctx.SET).map { _ =>
+      visitOperatorPipeSet(ctx, left)
+    }.getOrElse(Option(ctx.DROP).map { _ =>
+      val ids: Seq[String] = visitIdentifierSeq(ctx.identifierSeq())
+      val projectList: Seq[NamedExpression] =
+        Seq(UnresolvedStarExceptOrReplace(
+          target = None, excepts = ids.map(s => Seq(s)), replacements = None))
+      Project(projectList, left)
+    }.getOrElse(Option(ctx.AS).map { _ =>
+      SubqueryAlias(ctx.errorCapturingIdentifier().getText, left)
     }.getOrElse(Option(ctx.whereClause).map { c =>
       if (ctx.windowClause() != null) {
         throw QueryParsingErrors.windowClauseInPipeOperatorWhereClauseNotAllowedError(ctx)
       }
-      withWhereClause(c, withSubqueryAlias())
+      withWhereClause(c, PipeOperator(left))
     }.getOrElse(Option(ctx.pivotClause()).map { c =>
       if (ctx.unpivotClause() != null) {
         throw QueryParsingErrors.unpivotWithPivotInFromClauseNotAllowedError(ctx)
@@ -5975,10 +6052,50 @@ class AstBuilder extends DataTypeAstBuilder
       val all = Option(ctx.setQuantifier()).exists(_.ALL != null)
       visitSetOperationImpl(left, plan(ctx.right), all, c.getType)
     }.getOrElse(Option(ctx.queryOrganization).map { c =>
-      withQueryResultClauses(c, withSubqueryAlias(), forPipeOperators = true)
+      withQueryResultClauses(c, PipeOperator(left), forPipeOperators = true)
     }.getOrElse(
       visitOperatorPipeAggregate(ctx, left)
-    )))))))))
+    ))))))))))))
+  }
+
+  private def visitOperatorPipeSet(
+      ctx: OperatorPipeRightSideContext, left: LogicalPlan): LogicalPlan = {
+    val (setIdentifiers: Seq[String], setTargets: Seq[Expression]) =
+      visitOperatorPipeSetAssignmentSeq(ctx.operatorPipeSetAssignmentSeq())
+    var plan = left
+    setIdentifiers.zip(setTargets).foreach {
+      case (_, _: Alias) =>
+        operationNotAllowed(
+          "SQL pipe syntax |> SET operator with an alias assigned with [AS] aliasName", ctx)
+      case (ident, target) =>
+        // Add an UnresolvedStarExceptOrReplace to exclude the SET expression name from the relation
+        // and add the new SET expression to the projection list.
+        // Use a PipeSelect expression to make sure it does not contain any aggregate functions.
+        val replacement =
+          Alias(PipeExpression(target, isAggregate = false, PipeOperators.setClause), ident)()
+        val projectList: Seq[NamedExpression] =
+          Seq(UnresolvedStarExceptOrReplace(
+            target = None, excepts = Seq(Seq(ident)), replacements = Some(Seq(replacement))))
+        // Add a projection to implement the SET operator using the UnresolvedStarExceptOrReplace
+        // expression. We do this once per SET assignment to allow for multiple SET assignments with
+        // optional lateral references to previous ones.
+        plan = Project(projectList, plan)
+    }
+    plan
+  }
+
+  override def visitOperatorPipeSetAssignmentSeq(
+      ctx: OperatorPipeSetAssignmentSeqContext): (Seq[String], Seq[Expression]) = {
+    withOrigin(ctx) {
+      if (!ctx.DOT.isEmpty) {
+        operationNotAllowed(
+          s"SQL pipe syntax |> SET operator with multi-part assignment key " +
+            s"(only single-part keys are allowed)", ctx)
+      }
+      val setIdentifiers: Seq[String] = ctx.errorCapturingIdentifier().asScala.map(_.getText).toSeq
+      val setTargets: Seq[Expression] = ctx.expression().asScala.map(typedVisit[Expression]).toSeq
+      (setIdentifiers, setTargets)
+    }
   }
 
   private def visitOperatorPipeAggregate(
@@ -5989,7 +6106,7 @@ class AstBuilder extends DataTypeAstBuilder
         "The AGGREGATE clause requires a list of aggregate expressions " +
           "or a list of grouping expressions, or both", ctx)
     }
-    // Visit each aggregate expression, and add a PipeAggregate expression on top of it to generate
+    // Visit each aggregate expression, and add a [[PipeExpression]] on top of it to generate
     // clear error messages if the expression does not contain at least one aggregate function.
     val aggregateExpressions: Seq[NamedExpression] =
       Option(ctx.namedExpressionSeq()).map { n: NamedExpressionSeqContext =>
@@ -6025,7 +6142,8 @@ class AstBuilder extends DataTypeAstBuilder
                 Seq("GROUPING", "GROUPING_ID").foreach { name =>
                   if (f.nameParts.head.equalsIgnoreCase(name)) error(name)
                 }
-              case _: WindowSpec => error("window functions")
+              case _: WindowSpec => error("window functions; please update the query to move " +
+                "the window functions to a subsequent |> SELECT operator instead")
               case _ =>
             }
             e.children.foreach(visit)
@@ -6034,12 +6152,28 @@ class AstBuilder extends DataTypeAstBuilder
           a.aggregateExpressions.foreach(visit)
           // Prepend grouping keys to the list of aggregate functions, since operator pipe AGGREGATE
           // clause returns the GROUP BY expressions followed by the list of aggregate functions.
-          val namedGroupingExpressions: Seq[NamedExpression] =
-            a.groupingExpressions.map {
-              case n: NamedExpression => n
-              case e: Expression => UnresolvedAlias(e, None)
-            }
-          a.copy(aggregateExpressions = namedGroupingExpressions ++ a.aggregateExpressions)
+          val newGroupingExpressions = ArrayBuffer.empty[Expression]
+          val newAggregateExpressions = ArrayBuffer.empty[NamedExpression]
+          a.groupingExpressions.foreach {
+            case n: NamedExpression =>
+              newGroupingExpressions += n
+              newAggregateExpressions += n
+            // If the grouping expression is an integer literal, create [[UnresolvedOrdinal]] and
+            // [[UnresolvedPipeAggregateOrdinal]] expressions to represent it in the final grouping
+            // and aggregate expressions, respectively. This will let the
+            // [[ResolveOrdinalInOrderByAndGroupBy]] rule detect the ordinal in the aggregate list
+            // and replace it with the corresponding attribute from the child operator.
+            case Literal(v: Int, IntegerType) if conf.groupByOrdinal =>
+              newGroupingExpressions += UnresolvedOrdinal(newAggregateExpressions.length + 1)
+              newAggregateExpressions += UnresolvedAlias(UnresolvedPipeAggregateOrdinal(v), None)
+            case e: Expression =>
+              newGroupingExpressions += e
+              newAggregateExpressions += UnresolvedAlias(e, None)
+          }
+          newAggregateExpressions.appendAll(a.aggregateExpressions)
+          a.copy(
+            groupingExpressions = newGroupingExpressions.toSeq,
+            aggregateExpressions = newAggregateExpressions.toSeq)
       }
     }.getOrElse {
       // This is a table aggregation with no grouping expressions.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 8471c9f9dff13..1bc4f95f95daf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -163,6 +163,14 @@ class SqlScriptingLabelContext {
             bl.multipartIdentifier().getText,
             el.multipartIdentifier().getText)
         }
+      case (Some(bl: BeginLabelContext), _)
+        if bl.multipartIdentifier().parts.size() > 1 =>
+        withOrigin(bl) {
+          throw SqlScriptingErrors.labelCannotBeQualified(
+            CurrentOrigin.get,
+            bl.multipartIdentifier().getText.toLowerCase(Locale.ROOT)
+          )
+        }
       case (None, Some(el: EndLabelContext)) =>
         withOrigin(el) {
           throw SqlScriptingErrors.endLabelWithoutBeginLabel(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala
index 99d2ea7751959..a6ec6f5736300 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala
@@ -21,13 +21,19 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.types.{MetadataBuilder, StringType}
 
 private[sql] object DescribeCommandSchema {
-  def describeTableAttributes(): Seq[AttributeReference] = Seq(
-    AttributeReference("col_name", StringType, nullable = false,
-      new MetadataBuilder().putString("comment", "name of the column").build())(),
-    AttributeReference("data_type", StringType, nullable = false,
-      new MetadataBuilder().putString("comment", "data type of the column").build())(),
-    AttributeReference("comment", StringType, nullable = true,
-      new MetadataBuilder().putString("comment", "comment of the column").build())())
+  def describeJsonTableAttributes(): Seq[AttributeReference] =
+    Seq(
+      AttributeReference("json_metadata", StringType, nullable = false,
+        new MetadataBuilder().putString("comment", "JSON metadata of the table").build())()
+    )
+  def describeTableAttributes(): Seq[AttributeReference] = {
+      Seq(AttributeReference("col_name", StringType, nullable = false,
+        new MetadataBuilder().putString("comment", "name of the column").build())(),
+        AttributeReference("data_type", StringType, nullable = false,
+          new MetadataBuilder().putString("comment", "data type of the column").build())(),
+        AttributeReference("comment", StringType, nullable = true,
+          new MetadataBuilder().putString("comment", "comment of the column").build())())
+    }
 
   def describeColumnAttributes(): Seq[AttributeReference] = Seq(
     AttributeReference("info_name", StringType, nullable = false,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/NormalizePlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/NormalizePlan.scala
index 3b691f4f87778..13df749c6d584 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/NormalizePlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/NormalizePlan.scala
@@ -17,14 +17,42 @@
 
 package org.apache.spark.sql.catalyst.plans
 
+import java.util.HashMap
+
 import org.apache.spark.sql.catalyst.analysis.GetViewColumnByNameAndOrdinal
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans.logical._
 
 object NormalizePlan extends PredicateHelper {
-  def apply(plan: LogicalPlan): LogicalPlan =
-    normalizePlan(normalizeExprIds(plan))
+  def apply(plan: LogicalPlan): LogicalPlan = {
+    val withNormalizedInheritAnalysis = normalizeInheritAnalysisRules(plan)
+    val withNormalizedExprIds = normalizeExprIds(withNormalizedInheritAnalysis)
+    normalizePlan(withNormalizedExprIds)
+  }
+
+  /**
+   * Normalize [[InheritAnalysisRules]] nodes by replacing them with their replacement expressions.
+   * This is necessary because fixed-point analyzer may produce non-deterministic results when
+   * resolving original expressions. For example, in a query like:
+   *
+   * {{{ SELECT assert_true(1) }}}
+   *
+   * Before resolution, we have [[UnresolvedFunction]] whose child is Literal(1). This child will
+   * first be converted to Cast(Literal(1), BooleanType) by type coercion. Because in this case
+   * [[Cast]] doesn't require timezone, the expression will be implicitly resolved. Because the
+   * child of initially unresolved function is resolved, the function can be converted to
+   * [[AssertTrue]], which is of type [[InheritAnalysisRules]]. However, because the only child of
+   * [[InheritAnalysisRules]] is the replacement expression, the original expression will be lost
+   * timezone will never be applied. This causes inconsistencies, because fixed-point semantic is
+   * to ALWAYS apply timezone, regardless of whether or not the Cast actually needs it.
+   */
+  def normalizeInheritAnalysisRules(plan: LogicalPlan): LogicalPlan = {
+    plan transformAllExpressions {
+      case inheritAnalysisRules: InheritAnalysisRules =>
+        inheritAnalysisRules.child
+    }
+  }
 
   /**
    * Since attribute references are given globally unique ids during analysis,
@@ -68,8 +96,13 @@ object NormalizePlan extends PredicateHelper {
    *   etc., will all now be equivalent.
    * - Sample the seed will replaced by 0L.
    * - Join conditions will be resorted by hashCode.
+   * - CTERelationDef ids will be rewritten using a monitonically increasing counter from 0.
+   * - CTERelationRef ids will be remapped based on the new CTERelationDef IDs. This is possible,
+   *   because WithCTE returns cteDefs as first children, and the defs will be traversed before the
+   *   refs.
    */
   def normalizePlan(plan: LogicalPlan): LogicalPlan = {
+    val cteIdNormalizer = new CteIdNormalizer
     plan transform {
       case Filter(condition: Expression, child: LogicalPlan) =>
         Filter(
@@ -105,6 +138,19 @@ object NormalizePlan extends PredicateHelper {
           .asInstanceOf[Seq[NamedExpression]]
         Project(projList, child)
       case c: KeepAnalyzedQuery => c.storeAnalyzedQuery()
+      case localRelation: LocalRelation if !localRelation.data.isEmpty =>
+        /**
+         * A substitute for the [[LocalRelation.data]]. [[GenericInternalRow]] is incomparable for
+         * maps, because [[ArrayBasedMapData]] doesn't define [[equals]].
+         */
+        val unsafeProjection = UnsafeProjection.create(localRelation.schema)
+        localRelation.copy(data = localRelation.data.map { row =>
+          unsafeProjection(row)
+        })
+      case cteRelationDef: CTERelationDef =>
+        cteIdNormalizer.normalizeDef(cteRelationDef)
+      case cteRelationRef: CTERelationRef =>
+        cteIdNormalizer.normalizeRef(cteRelationRef)
     }
   }
 
@@ -125,3 +171,25 @@ object NormalizePlan extends PredicateHelper {
     case _ => condition // Don't reorder.
   }
 }
+
+class CteIdNormalizer {
+  private var cteIdCounter: Long = 0
+  private val oldToNewIdMapping = new HashMap[Long, Long]
+
+  def normalizeDef(cteRelationDef: CTERelationDef): CTERelationDef = {
+    try {
+      oldToNewIdMapping.put(cteRelationDef.id, cteIdCounter)
+      cteRelationDef.copy(id = cteIdCounter)
+    } finally {
+      cteIdCounter += 1
+    }
+  }
+
+  def normalizeRef(cteRelationRef: CTERelationRef): CTERelationRef = {
+    if (oldToNewIdMapping.containsKey(cteRelationRef.cteId)) {
+      cteRelationRef.copy(cteId = oldToNewIdMapping.get(cteRelationRef.cteId))
+    } else {
+      cteRelationRef
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 23813d94c5495..07341f8ca1765 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans
 
+import java.lang.{Boolean => JBoolean}
 import java.util.IdentityHashMap
 
 import scala.collection.mutable
@@ -32,7 +33,7 @@ import org.apache.spark.sql.catalyst.trees.TreePatternBits
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
-import org.apache.spark.util.TransientLazy
+import org.apache.spark.util.{BestEffortLazyVal, TransientBestEffortLazyVal}
 import org.apache.spark.util.collection.BitSet
 
 /**
@@ -54,8 +55,9 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   /**
    * Returns the set of attributes that are output by this node.
    */
-  @transient
-  lazy val outputSet: AttributeSet = AttributeSet(output)
+  def outputSet: AttributeSet = _outputSet()
+
+  private val _outputSet = new TransientBestEffortLazyVal(() => AttributeSet(output))
 
   /**
    * Returns the output ordering that this plan generates, although the semantics differ in logical
@@ -97,16 +99,17 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
    */
   def references: AttributeSet = _references()
 
-  private val _references = new TransientLazy({
-    AttributeSet(expressions) -- producedAttributes
-  })
+  private val _references = new TransientBestEffortLazyVal(() =>
+    AttributeSet(expressions) -- producedAttributes)
 
   /**
    * Returns true when the all the expressions in the current node as well as all of its children
    * are deterministic
    */
-  lazy val deterministic: Boolean = expressions.forall(_.deterministic) &&
-    children.forall(_.deterministic)
+  def deterministic: Boolean = _deterministic()
+
+  private val _deterministic = new BestEffortLazyVal[JBoolean](() =>
+    expressions.forall(_.deterministic) && children.forall(_.deterministic))
 
   /**
    * Attributes that are referenced by expressions but not provided by this node's children.
@@ -280,7 +283,9 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   }
 
   /** Returns all of the expressions present in this query plan operator. */
-  final def expressions: Seq[Expression] = {
+  final def expressions: Seq[Expression] = _expressions()
+
+  private val _expressions = new BestEffortLazyVal[Seq[Expression]](() => {
     // Recursively find all expressions from a traversable.
     def seqToExpressions(seq: Iterable[Any]): Iterable[Expression] = seq.flatMap {
       case e: Expression => e :: Nil
@@ -294,7 +299,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
       case seq: Iterable[_] => seqToExpressions(seq)
       case other => Nil
     }.toSeq
-  }
+  })
 
   /**
    * A variant of `transformUp`, which takes care of the case that the rule replaces a plan node
@@ -427,7 +432,10 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
     }
   }
 
-  lazy val schema: StructType = DataTypeUtils.fromAttributes(output)
+  def schema: StructType = _schema()
+
+  private val _schema = new BestEffortLazyVal[StructType](() =>
+    DataTypeUtils.fromAttributes(output))
 
   /** Returns the output schema in the tree format. */
   def schemaString: String = schema.treeString
@@ -480,11 +488,13 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   /**
    * All the top-level subqueries of the current plan node. Nested subqueries are not included.
    */
-  @transient lazy val subqueries: Seq[PlanType] = {
+  def subqueries: Seq[PlanType] = _subqueries()
+
+  private val _subqueries = new TransientBestEffortLazyVal(() =>
     expressions.filter(_.containsPattern(PLAN_EXPRESSION)).flatMap(_.collect {
       case e: PlanExpression[_] => e.plan.asInstanceOf[PlanType]
     })
-  }
+  )
 
   /**
    * All the subqueries of the current plan node and all its children. Nested subqueries are also
@@ -620,7 +630,9 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
    * Plan nodes that require special canonicalization should override [[doCanonicalize()]].
    * They should remove expressions cosmetic variations themselves.
    */
-  @transient final lazy val canonicalized: PlanType = {
+  def canonicalized: PlanType = _canonicalized()
+
+  private val _canonicalized = new TransientBestEffortLazyVal(() => {
     var plan = doCanonicalize()
     // If the plan has not been changed due to canonicalization, make a copy of it so we don't
     // mutate the original plan's _isCanonicalizedPlan flag.
@@ -629,7 +641,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
     }
     plan._isCanonicalizedPlan = true
     plan
-  }
+  })
 
   /**
    * Defines how the canonicalization should work for the current plan.
@@ -724,6 +736,12 @@ object QueryPlan extends PredicateHelper {
         } else {
           ar.withExprId(ExprId(ordinal))
         }
+
+      // Top-level Alias is already handled by `QueryPlan#doCanonicalize`. For inner Alias, the id
+      // doesn't matter and we normalize it to 0 here.
+      case a: Alias =>
+        Alias(a.child, a.name)(
+          ExprId(0), a.qualifier, a.explicitMetadata, a.nonInheritableMetadataKeys)
     }.canonicalized.asInstanceOf[T]
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index 41bba99673a2b..9f8c62fe58408 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -159,3 +159,25 @@ sealed abstract class AsOfJoinDirection
 case object Forward extends AsOfJoinDirection
 case object Backward extends AsOfJoinDirection
 case object Nearest extends AsOfJoinDirection
+
+object LateralJoinType {
+
+  val supported = Seq(
+    "inner",
+    "leftouter", "left", "left_outer",
+    "cross"
+  )
+
+  def apply(typ: String): JoinType = typ.toLowerCase(Locale.ROOT).replace("_", "") match {
+    case "inner" => Inner
+    case "leftouter" | "left" => LeftOuter
+    case "cross" => Cross
+    case _ =>
+      throw new AnalysisException(
+        errorClass = "UNSUPPORTED_JOIN_TYPE",
+        messageParameters = Map(
+          "typ" -> typ,
+          "supported" -> supported.mkString("'", "', '", "'"))
+      )
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SqlScriptingLogicalPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SqlScriptingLogicalPlans.scala
index e6018e5e57b9c..ad00a5216b4c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SqlScriptingLogicalPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SqlScriptingLogicalPlans.scala
@@ -62,16 +62,19 @@ case class SingleStatement(parsedPlan: LogicalPlan)
  * @param label Label set to CompoundBody by user or UUID otherwise.
  *              It can be None in case when CompoundBody is not part of BeginEndCompoundBlock
  *              for example when CompoundBody is inside loop or conditional block.
+ * @param isScope Flag indicating if the CompoundBody is a labeled scope.
+ *                Scopes are used for grouping local variables and exception handlers.
  */
 case class CompoundBody(
     collection: Seq[CompoundPlanStatement],
-    label: Option[String]) extends Command with CompoundPlanStatement {
+    label: Option[String],
+    isScope: Boolean) extends Command with CompoundPlanStatement {
 
   override def children: Seq[LogicalPlan] = collection
 
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
-    CompoundBody(newChildren.map(_.asInstanceOf[CompoundPlanStatement]), label)
+    CompoundBody(newChildren.map(_.asInstanceOf[CompoundPlanStatement]), label, isScope)
   }
 }
 
@@ -267,3 +270,31 @@ case class LoopStatement(
     LoopStatement(newChildren(0).asInstanceOf[CompoundBody], label)
   }
 }
+
+/**
+ * Logical operator for FOR statement.
+ * @param query Query which is executed once, then it's result set is iterated on, row by row.
+ * @param variableName Name of variable which is used to access the current row during iteration.
+ * @param body Compound body is a collection of statements that are executed for each row in
+ *             the result set of the query.
+ * @param label An optional label for the loop which is unique amongst all labels for statements
+ *              within which the FOR statement is contained.
+ *              If an end label is specified it must match the beginning label.
+ *              The label can be used to LEAVE or ITERATE the loop.
+ */
+case class ForStatement(
+    query: SingleStatement,
+    variableName: Option[String],
+    body: CompoundBody,
+    label: Option[String]) extends CompoundPlanStatement {
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override def children: Seq[LogicalPlan] = Seq(query, body)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = newChildren match {
+    case IndexedSeq(query: SingleStatement, body: CompoundBody) =>
+      ForStatement(query, variableName, body, label)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 0cb04064a6178..c1261f2b5fac5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -376,10 +376,13 @@ case class Intersect(
 
   final override val nodePatterns: Seq[TreePattern] = Seq(INTERSECT)
 
-  override def output: Seq[Attribute] =
-    left.output.zip(right.output).map { case (leftAttr, rightAttr) =>
-      leftAttr.withNullability(leftAttr.nullable && rightAttr.nullable)
+  override def output: Seq[Attribute] = {
+    if (conf.getConf(SQLConf.LAZY_SET_OPERATOR_OUTPUT)) {
+      lazyOutput
+    } else {
+      computeOutput()
     }
+  }
 
   override def metadataOutput: Seq[Attribute] = Nil
 
@@ -396,6 +399,14 @@ case class Intersect(
 
   override protected def withNewChildrenInternal(
     newLeft: LogicalPlan, newRight: LogicalPlan): Intersect = copy(left = newLeft, right = newRight)
+
+  private lazy val lazyOutput: Seq[Attribute] = computeOutput()
+
+  /** We don't use right.output because those rows get excluded from the set. */
+  private def computeOutput(): Seq[Attribute] =
+    left.output.zip(right.output).map { case (leftAttr, rightAttr) =>
+      leftAttr.withNullability(leftAttr.nullable && rightAttr.nullable)
+    }
 }
 
 case class Except(
@@ -403,8 +414,14 @@ case class Except(
     right: LogicalPlan,
     isAll: Boolean) extends SetOperation(left, right) {
   override def nodeName: String = getClass.getSimpleName + ( if ( isAll ) " All" else "" )
-  /** We don't use right.output because those rows get excluded from the set. */
-  override def output: Seq[Attribute] = left.output
+
+  override def output: Seq[Attribute] = {
+    if (conf.getConf(SQLConf.LAZY_SET_OPERATOR_OUTPUT)) {
+      lazyOutput
+    } else {
+      computeOutput()
+    }
+  }
 
   override def metadataOutput: Seq[Attribute] = Nil
 
@@ -416,6 +433,11 @@ case class Except(
 
   override protected def withNewChildrenInternal(
     newLeft: LogicalPlan, newRight: LogicalPlan): Except = copy(left = newLeft, right = newRight)
+
+  private lazy val lazyOutput: Seq[Attribute] = computeOutput()
+
+  /** We don't use right.output because those rows get excluded from the set. */
+  private def computeOutput(): Seq[Attribute] = left.output
 }
 
 /** Factory for constructing new `Union` nodes. */
@@ -423,6 +445,21 @@ object Union {
   def apply(left: LogicalPlan, right: LogicalPlan): Union = {
     Union (left :: right :: Nil)
   }
+
+  // updating nullability to make all the children consistent
+  def mergeChildOutputs(childOutputs: Seq[Seq[Attribute]]): Seq[Attribute] = {
+    childOutputs.transpose.map { attrs =>
+      val firstAttr = attrs.head
+      val nullable = attrs.exists(_.nullable)
+      val newDt = attrs.map(_.dataType).reduce(StructType.unionLikeMerge)
+      if (firstAttr.dataType == newDt) {
+        firstAttr.withNullability(nullable)
+      } else {
+        AttributeReference(firstAttr.name, newDt, nullable, firstAttr.metadata)(
+          firstAttr.exprId, firstAttr.qualifier)
+      }
+    }
+  }
 }
 
 /**
@@ -479,18 +516,11 @@ case class Union(
       AttributeSet.fromAttributeSets(children.map(_.outputSet)).size
   }
 
-  // updating nullability to make all the children consistent
   override def output: Seq[Attribute] = {
-    children.map(_.output).transpose.map { attrs =>
-      val firstAttr = attrs.head
-      val nullable = attrs.exists(_.nullable)
-      val newDt = attrs.map(_.dataType).reduce(StructType.unionLikeMerge)
-      if (firstAttr.dataType == newDt) {
-        firstAttr.withNullability(nullable)
-      } else {
-        AttributeReference(firstAttr.name, newDt, nullable, firstAttr.metadata)(
-          firstAttr.exprId, firstAttr.qualifier)
-      }
+    if (conf.getConf(SQLConf.LAZY_SET_OPERATOR_OUTPUT)) {
+      lazyOutput
+    } else {
+      computeOutput()
     }
   }
 
@@ -509,6 +539,10 @@ case class Union(
     children.length > 1 && !(byName || allowMissingCol) && childrenResolved && allChildrenCompatible
   }
 
+  private lazy val lazyOutput: Seq[Attribute] = computeOutput()
+
+  private def computeOutput(): Seq[Attribute] = Union.mergeChildOutputs(children.map(_.output))
+
   /**
    * Maps the constraints containing a given (original) sequence of attributes to those with a
    * given (reference) sequence of attributes. Given the nature of union, we expect that the
@@ -801,10 +835,12 @@ object View {
  * @param child The final query of this CTE.
  * @param cteRelations A sequence of pair (alias, the CTE definition) that this CTE defined
  *                     Each CTE can see the base tables and the previously defined CTEs only.
+ * @param allowRecursion A boolean flag if recursion is allowed.
  */
 case class UnresolvedWith(
     child: LogicalPlan,
-    cteRelations: Seq[(String, SubqueryAlias)]) extends UnaryNode {
+    cteRelations: Seq[(String, SubqueryAlias)],
+    allowRecursion: Boolean = false) extends UnaryNode {
   final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_WITH)
 
   override def output: Seq[Attribute] = child.output
@@ -830,12 +866,17 @@ case class UnresolvedWith(
  *                                   pushdown to help ensure rule idempotency.
  * @param underSubquery If true, it means we don't need to add a shuffle for this CTE relation as
  *                      subquery reuse will be applied to reuse CTE relation output.
+ * @param recursionAnchor A helper plan node that temporary stores the anchor term of recursive
+ *                        definitions. In the beginning of recursive resolution the `ResolveWithCTE`
+ *                        rule updates this parameter and once it is resolved the same rule resolves
+ *                        the recursive [[CTERelationRef]] references and removes this parameter.
  */
 case class CTERelationDef(
     child: LogicalPlan,
     id: Long = CTERelationDef.newId,
     originalPlanWithPredicates: Option[(LogicalPlan, Seq[Expression])] = None,
-    underSubquery: Boolean = false) extends UnaryNode {
+    underSubquery: Boolean = false,
+    recursionAnchor: Option[LogicalPlan] = None) extends UnaryNode {
 
   final override val nodePatterns: Seq[TreePattern] = Seq(CTE)
 
@@ -843,6 +884,13 @@ case class CTERelationDef(
     copy(child = newChild)
 
   override def output: Seq[Attribute] = if (resolved) child.output else Nil
+
+  lazy val recursive: Boolean = child.exists{
+    // if the reference is found inside the child, referencing to this CTE definition,
+    // and already marked as recursive, then this CTE definition is recursive.
+    case CTERelationRef(this.id, _, _, _, _, true) => true
+    case _ => false
+  }
 }
 
 object CTERelationDef {
@@ -859,13 +907,15 @@ object CTERelationDef {
  *                             de-duplication.
  * @param statsOpt             The optional statistics inferred from the corresponding CTE
  *                             definition.
+ * @param recursive            If this is a recursive reference.
  */
 case class CTERelationRef(
     cteId: Long,
     _resolved: Boolean,
     override val output: Seq[Attribute],
     override val isStreaming: Boolean,
-    statsOpt: Option[Statistics] = None) extends LeafNode with MultiInstanceRelation {
+    statsOpt: Option[Statistics] = None,
+    recursive: Boolean = false) extends LeafNode with MultiInstanceRelation {
 
   final override val nodePatterns: Seq[TreePattern] = Seq(CTE)
 
@@ -2014,6 +2064,9 @@ case class Deduplicate(
 }
 
 case class DeduplicateWithinWatermark(keys: Seq[Attribute], child: LogicalPlan) extends UnaryNode {
+  // Ensure that references include event time columns so they are not pruned away.
+  override def references: AttributeSet = AttributeSet(keys) ++
+    AttributeSet(child.output.filter(_.metadata.contains(EventTimeWatermark.delayKey)))
   override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] = child.output
   final override val nodePatterns: Seq[TreePattern] = Seq(DISTINCT_LIKE)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala
index 2f5d4b9c86e25..dbd2c0ba8e420 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala
@@ -261,3 +261,15 @@ case class AlterTableClusterBy(
 
   protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(table = newChild)
 }
+
+/**
+ * The logical plan of the ALTER TABLE ... DEFAULT COLLATION name command.
+ */
+case class AlterTableCollation(
+    table: LogicalPlan, collation: String) extends AlterTableCommand {
+  override def changes: Seq[TableChange] = {
+    Seq(TableChange.setProperty(TableCatalog.PROP_COLLATION, collation))
+  }
+
+  protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = copy(table = newChild)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index b465e0e11612f..58c62a90225aa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -21,8 +21,8 @@ import org.apache.spark.{SparkIllegalArgumentException, SparkUnsupportedOperatio
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, AssignmentUtils, EliminateSubqueryAliases, FieldName, NamedRelation, PartitionSpec, ResolvedIdentifier, ResolvedProcedure, TypeCheckResult, UnresolvedException, UnresolvedProcedure, ViewSchemaMode}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.catalog.{FunctionResource, RoutineLanguage}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.catalog.FunctionResource
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, MetadataAttribute, NamedExpression, UnaryExpression, Unevaluable, V2ExpressionUtils}
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.catalyst.trees.BinaryLike
@@ -459,6 +459,12 @@ trait V2CreateTableAsSelectPlan
       newQuery: LogicalPlan): V2CreateTableAsSelectPlan
 }
 
+/**
+ * A trait used for logical plan nodes that create V1 table definitions,
+ * and so that rules from the catalyst module can identify them.
+ */
+trait V1CreateTablePlan extends LogicalPlan
+
 /** A trait used for logical plan nodes that create or replace V2 table definitions. */
 trait V2CreateTablePlan extends LogicalPlan {
   def name: LogicalPlan
@@ -1066,6 +1072,26 @@ case class CreateFunction(
     copy(child = newChild)
 }
 
+/**
+ * The logical plan of the CREATE FUNCTION command for SQL Functions.
+ */
+case class CreateUserDefinedFunction(
+    child: LogicalPlan,
+    inputParamText: Option[String],
+    returnTypeText: String,
+    exprText: Option[String],
+    queryText: Option[String],
+    comment: Option[String],
+    isDeterministic: Option[Boolean],
+    containsSQL: Option[Boolean],
+    language: RoutineLanguage,
+    isTableFunc: Boolean,
+    ignoreIfExists: Boolean,
+    replace: Boolean) extends UnaryCommand {
+  override protected def withNewChildInternal(newChild: LogicalPlan): CreateUserDefinedFunction =
+    copy(child = newChild)
+}
+
 /**
  * The logical plan of the DROP FUNCTION command.
  */
@@ -1332,6 +1358,7 @@ case class CreateView(
     child: LogicalPlan,
     userSpecifiedColumns: Seq[(String, Option[String])],
     comment: Option[String],
+    collation: Option[String],
     properties: Map[String, String],
     originalText: Option[String],
     query: LogicalPlan,
@@ -1480,6 +1507,7 @@ trait TableSpecBase {
   def provider: Option[String]
   def location: Option[String]
   def comment: Option[String]
+  def collation: Option[String]
   def serde: Option[SerdeInfo]
   def external: Boolean
 }
@@ -1490,6 +1518,7 @@ case class UnresolvedTableSpec(
     optionExpression: OptionList,
     location: Option[String],
     comment: Option[String],
+    collation: Option[String],
     serde: Option[SerdeInfo],
     external: Boolean) extends UnaryExpression with Unevaluable with TableSpecBase {
 
@@ -1535,10 +1564,11 @@ case class TableSpec(
     options: Map[String, String],
     location: Option[String],
     comment: Option[String],
+    collation: Option[String],
     serde: Option[SerdeInfo],
     external: Boolean) extends TableSpecBase {
   def withNewLocation(newLocation: Option[String]): TableSpec = {
-    TableSpec(properties, provider, options, newLocation, comment, serde, external)
+    TableSpec(properties, provider, options, newLocation, comment, collation, serde, external)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index 30e223c3c3c87..6e19a1d6bbc8c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -176,6 +176,13 @@ case class OrderedDistribution(ordering: Seq[SortOrder]) extends Distribution {
   override def createPartitioning(numPartitions: Int): Partitioning = {
     RangePartitioning(ordering, numPartitions)
   }
+
+  def areAllClusterKeysMatched(expressions: Seq[Expression]): Boolean = {
+    expressions.length == ordering.length &&
+      expressions.zip(ordering).forall {
+        case (x, o) => x.semanticEquals(o.child)
+      }
+  }
 }
 
 /**
@@ -394,6 +401,9 @@ case class KeyGroupedPartitioning(
             }
           }
 
+        case o @ OrderedDistribution(_) if SQLConf.get.v2BucketingAllowSorting =>
+          o.areAllClusterKeysMatched(expressions)
+
         case _ =>
           false
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index 76d36fab2096a..bdbf698db2e01 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -22,7 +22,8 @@ import org.apache.spark.internal.{Logging, MessageWithContext}
 import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
-import org.apache.spark.sql.catalyst.trees.TreeNode
+import org.apache.spark.sql.catalyst.rules.RuleExecutor.getForceIterationValue
+import org.apache.spark.sql.catalyst.trees.{TreeNode, TreeNodeTag}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS
 import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -30,6 +31,27 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.Utils
 
 object RuleExecutor {
+
+  /**
+   * A tag used to explicitly request an additional iteration of the current batch during
+   * rule execution, even if the query plan remains unchanged. Increment the tag's value
+   * to enforce another iteration.
+   */
+  private val FORCE_ADDITIONAL_ITERATION = TreeNodeTag[Int]("forceAdditionalIteration")
+
+  /**
+   * Increments the value of the FORCE_ADDITIONAL_ITERATION tag on the given plan to
+   * explicitly force another iteration of the current batch during rule execution.
+   */
+  def forceAdditionalIteration(plan: TreeNode[_]): Unit = {
+    val oldValue = getForceIterationValue(plan)
+    plan.setTagValue(FORCE_ADDITIONAL_ITERATION, oldValue + 1)
+  }
+
+  private def getForceIterationValue(plan: TreeNode[_]): Int = {
+    plan.getTagValue(FORCE_ADDITIONAL_ITERATION).getOrElse(0)
+  }
+
   protected val queryExecutionMeter = QueryExecutionMetering()
 
   /** Dump statistics about time spent running specific rules. */
@@ -303,7 +325,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
           continue = false
         }
 
-        if (curPlan.fastEquals(lastPlan)) {
+        if (isFixedPointReached(lastPlan, curPlan)) {
           logTrace(
             s"Fixed point reached for batch ${batch.name} after ${iteration - 1} iterations.")
           continue = false
@@ -317,4 +339,9 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
 
     curPlan
   }
+
+  private def isFixedPointReached(oldPlan: TreeType, newPlan: TreeType): Boolean = {
+    oldPlan.fastEquals(newPlan) &&
+      getForceIterationValue(newPlan) <= getForceIterationValue(oldPlan)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
index 5ae2ca0d532b7..ee5245054bcca 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
@@ -51,6 +51,7 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAliases" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveBinaryArithmetic" ::
+      "org.apache.spark.sql.catalyst.analysis.ResolveCollationName" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveDeserializer" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveEncodersInUDF" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveFunctions" ::
@@ -107,6 +108,8 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.analysis.UpdateOuterReferences" ::
       "org.apache.spark.sql.catalyst.analysis.UpdateAttributeNullability" ::
       "org.apache.spark.sql.catalyst.analysis.ResolveUpdateEventTimeWatermarkColumn" ::
+      "org.apache.spark.sql.catalyst.expressions.EliminatePipeOperators" ::
+      "org.apache.spark.sql.catalyst.expressions.ValidateAndStripPipeExpressions" ::
       // Catalyst Optimizer rules
       "org.apache.spark.sql.catalyst.optimizer.BooleanSimplification" ::
       "org.apache.spark.sql.catalyst.optimizer.CollapseProject" ::
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
index 24b787054fb13..9856a26346f6a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
@@ -63,6 +63,7 @@ object TreePattern extends Enumeration  {
   val LAMBDA_VARIABLE: Value = Value
   val LATERAL_COLUMN_ALIAS_REFERENCE: Value = Value
   val LATERAL_SUBQUERY: Value = Value
+  val LAZY_EXPRESSION: Value = Value
   val LIKE_FAMLIY: Value = Value
   val LIST_SUBQUERY: Value = Value
   val LITERAL: Value = Value
@@ -78,6 +79,8 @@ object TreePattern extends Enumeration  {
   val OUTER_REFERENCE: Value = Value
   val PARAMETER: Value = Value
   val PARAMETERIZED_QUERY: Value = Value
+  val PIPE_EXPRESSION: Value = Value
+  val PIPE_OPERATOR: Value = Value
   val PIVOT: Value = Value
   val PLAN_EXPRESSION: Value = Value
   val PYTHON_UDF: Value = Value
@@ -89,6 +92,9 @@ object TreePattern extends Enumeration  {
   val SCALA_UDF: Value = Value
   val SESSION_WINDOW: Value = Value
   val SORT: Value = Value
+  val SQL_FUNCTION_EXPRESSION: Value = Value
+  val SQL_SCALAR_FUNCTION: Value = Value
+  val SQL_TABLE_FUNCTION: Value = Value
   val SUBQUERY_ALIAS: Value = Value
   val SUM: Value = Value
   val TIME_WINDOW: Value = Value
@@ -145,6 +151,7 @@ object TreePattern extends Enumeration  {
   // Unresolved expression patterns (Alphabetically ordered)
   val UNRESOLVED_ALIAS: Value = Value
   val UNRESOLVED_ATTRIBUTE: Value = Value
+  val UNRESOLVED_COLLATION: Value = Value
   val UNRESOLVED_DESERIALIZER: Value = Value
   val UNRESOLVED_DF_STAR: Value = Value
   val UNRESOLVED_HAVING: Value = Value
@@ -153,8 +160,7 @@ object TreePattern extends Enumeration  {
   val UNRESOLVED_FUNCTION: Value = Value
   val UNRESOLVED_HINT: Value = Value
   val UNRESOLVED_WINDOW_EXPRESSION: Value = Value
-  val UNRESOLVED_IDENTIFIER_WITH_CTE: Value = Value
-  val UNRESOLVED_OUTER_REFERENCE: Value = Value
+  val UNRESOLVED_PLAN_ID: Value = Value
 
   // Unresolved Plan patterns (Alphabetically ordered)
   val UNRESOLVED_FUNC: Value = Value
@@ -169,8 +175,4 @@ object TreePattern extends Enumeration  {
 
   // Execution Plan patterns (alphabetically ordered)
   val EXCHANGE: Value = Value
-
-  // Lazy analysis expression patterns (alphabetically ordered)
-  val LAZY_ANALYSIS_EXPRESSION: Value = Value
-  val LAZY_OUTER_REFERENCE: Value = Value
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
index 628fdcebd3084..6ba7e528ea230 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
@@ -74,7 +74,7 @@ object CharVarcharUtils extends Logging with SparkCharVarcharUtils {
   def replaceCharVarcharWithStringForCast(dt: DataType): DataType = {
     if (SQLConf.get.charVarcharAsString) {
       replaceCharVarcharWithString(dt)
-    } else if (hasCharVarchar(dt)) {
+    } else if (hasCharVarchar(dt) && !SQLConf.get.preserveCharVarcharTypeInfo) {
       logWarning(log"The Spark cast operator does not support char/varchar type and simply treats" +
         log" them as string type. Please use string type directly to avoid confusion. Otherwise," +
         log" you can set ${MDC(CONFIG, SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key)} " +
@@ -164,7 +164,11 @@ object CharVarcharUtils extends Logging with SparkCharVarcharUtils {
       case CharType(length) if charFuncName.isDefined =>
         StaticInvoke(
           classOf[CharVarcharCodegenUtils],
-          StringType,
+          if (SQLConf.get.preserveCharVarcharTypeInfo) {
+            CharType(length)
+          } else {
+            StringType
+          },
           charFuncName.get,
           expr :: Literal(length) :: Nil,
           returnNullable = false)
@@ -172,7 +176,11 @@ object CharVarcharUtils extends Logging with SparkCharVarcharUtils {
       case VarcharType(length) if varcharFuncName.isDefined =>
         StaticInvoke(
           classOf[CharVarcharCodegenUtils],
-          StringType,
+          if (SQLConf.get.preserveCharVarcharTypeInfo) {
+            VarcharType(length)
+          } else {
+            StringType
+          },
           varcharFuncName.get,
           expr :: Literal(length) :: Nil,
           returnNullable = false)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index c9ca3ed864c16..1f741169898e9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -667,33 +667,44 @@ object DateTimeUtils extends SparkDateTimeUtils {
    * @param zoneId The time zone ID at which the operation is performed.
    * @return A timestamp value, expressed in microseconds since 1970-01-01 00:00:00Z.
    */
-  def timestampAdd(unit: String, quantity: Int, micros: Long, zoneId: ZoneId): Long = {
+  def timestampAdd(unit: String, quantity: Long, micros: Long, zoneId: ZoneId): Long = {
     try {
       unit.toUpperCase(Locale.ROOT) match {
         case "MICROSECOND" =>
           timestampAddInterval(micros, 0, 0, quantity, zoneId)
         case "MILLISECOND" =>
           timestampAddInterval(micros, 0, 0,
-            Math.multiplyExact(quantity.toLong, MICROS_PER_MILLIS), zoneId)
+            Math.multiplyExact(quantity, MICROS_PER_MILLIS), zoneId)
         case "SECOND" =>
           timestampAddInterval(micros, 0, 0,
-            Math.multiplyExact(quantity.toLong, MICROS_PER_SECOND), zoneId)
+            Math.multiplyExact(quantity, MICROS_PER_SECOND), zoneId)
         case "MINUTE" =>
           timestampAddInterval(micros, 0, 0,
-            Math.multiplyExact(quantity.toLong, MICROS_PER_MINUTE), zoneId)
+            Math.multiplyExact(quantity, MICROS_PER_MINUTE), zoneId)
         case "HOUR" =>
           timestampAddInterval(micros, 0, 0,
-            Math.multiplyExact(quantity.toLong, MICROS_PER_HOUR), zoneId)
+            Math.multiplyExact(quantity, MICROS_PER_HOUR), zoneId)
         case "DAY" | "DAYOFYEAR" =>
-          timestampAddInterval(micros, 0, quantity, 0, zoneId)
+          // Given that more than `Int32.MaxValue` days will cause an `ArithmeticException` due to
+          // overflow, we can safely cast the quantity to an `Int` here. Same follows for larger
+          // unites.
+          timestampAddInterval(micros, 0, Math.toIntExact(quantity), 0, zoneId)
         case "WEEK" =>
-          timestampAddInterval(micros, 0, Math.multiplyExact(quantity, DAYS_PER_WEEK), 0, zoneId)
+          timestampAddInterval(
+            micros,
+            0,
+            Math.multiplyExact(Math.toIntExact(quantity), DAYS_PER_WEEK),
+            0,
+            zoneId)
         case "MONTH" =>
-          timestampAddMonths(micros, quantity, zoneId)
+          timestampAddMonths(micros, Math.toIntExact(quantity), zoneId)
         case "QUARTER" =>
-          timestampAddMonths(micros, Math.multiplyExact(quantity, 3), zoneId)
+          timestampAddMonths(micros, Math.multiplyExact(Math.toIntExact(quantity), 3), zoneId)
         case "YEAR" =>
-          timestampAddMonths(micros, Math.multiplyExact(quantity, MONTHS_PER_YEAR), zoneId)
+          timestampAddMonths(
+            micros,
+            Math.multiplyExact(Math.toIntExact(quantity), MONTHS_PER_YEAR),
+            zoneId)
       }
     } catch {
       case _: scala.MatchError =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala
index 118dd92c3ed54..f2925314e2e2b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala
@@ -206,8 +206,7 @@ object UnsafeRowUtils {
    */
   def isBinaryStable(dataType: DataType): Boolean = !dataType.existsRecursively {
     case st: StringType =>
-      val collation = CollationFactory.fetchCollation(st.collationId)
-      (!collation.supportsBinaryEquality)
+      !st.supportsBinaryEquality
     case _ => false
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
index 9a0528468842c..4b892da9db255 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
@@ -32,6 +32,8 @@ import scala.util.control.NonFatal
 import scala.xml.SAXException
 
 import org.apache.commons.lang3.exception.ExceptionUtils
+import org.apache.hadoop.hdfs.BlockMissingException
+import org.apache.hadoop.security.AccessControlException
 
 import org.apache.spark.{SparkIllegalArgumentException, SparkUpgradeException}
 import org.apache.spark.internal.Logging
@@ -655,6 +657,10 @@ class XmlTokenizer(
             e)
         case NonFatal(e) =>
           ExceptionUtils.getRootCause(e) match {
+            case _: AccessControlException | _: BlockMissingException =>
+              reader.close()
+              reader = null
+              throw e
             case _: RuntimeException | _: IOException if options.ignoreCorruptFiles =>
               logWarning(
                 "Skipping the rest of" +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
index 848e6ff45c5a2..ecde7c1715bd5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
@@ -30,6 +30,9 @@ import scala.util.control.Exception._
 import scala.util.control.NonFatal
 import scala.xml.SAXException
 
+import org.apache.hadoop.hdfs.BlockMissingException
+import org.apache.hadoop.security.AccessControlException
+
 import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
@@ -157,6 +160,7 @@ class XmlInferSchema(options: XmlOptions, caseSensitive: Boolean)
         logWarning("Skipped missing file", e)
         Some(StructType(Nil))
       case e: FileNotFoundException if !options.ignoreMissingFiles => throw e
+      case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
       case e @ (_: IOException | _: RuntimeException) if options.ignoreCorruptFiles =>
         logWarning("Skipped the rest of the content in the corrupted file", e)
         Some(StructType(Nil))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
index db94659b1033b..9b8584604d32f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
@@ -85,9 +85,10 @@ class CatalogManager(
    * in the fallback configuration, spark.sql.sources.useV1SourceList
    */
   private[sql] def v2SessionCatalog: CatalogPlugin = {
-    conf.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).map { _ =>
-      catalogs.getOrElseUpdate(SESSION_CATALOG_NAME, loadV2SessionCatalog())
-    }.getOrElse(defaultSessionCatalog)
+    conf.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION) match {
+      case "builtin" => defaultSessionCatalog
+      case _ => catalogs.getOrElseUpdate(SESSION_CATALOG_NAME, loadV2SessionCatalog())
+    }
   }
 
   private var _currentNamespace: Option[Array[String]] = None
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index e1f114a6170a4..97cc263c56c5f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -53,6 +53,7 @@ private[sql] object CatalogV2Util {
    */
   val TABLE_RESERVED_PROPERTIES =
     Seq(TableCatalog.PROP_COMMENT,
+      TableCatalog.PROP_COLLATION,
       TableCatalog.PROP_LOCATION,
       TableCatalog.PROP_PROVIDER,
       TableCatalog.PROP_OWNER,
@@ -459,7 +460,7 @@ private[sql] object CatalogV2Util {
   def convertTableProperties(t: TableSpec): Map[String, String] = {
     val props = convertTableProperties(
       t.properties, t.options, t.serde, t.location, t.comment,
-      t.provider, t.external)
+      t.collation, t.provider, t.external)
     withDefaultOwnership(props)
   }
 
@@ -469,6 +470,7 @@ private[sql] object CatalogV2Util {
       serdeInfo: Option[SerdeInfo],
       location: Option[String],
       comment: Option[String],
+      collation: Option[String],
       provider: Option[String],
       external: Boolean = false): Map[String, String] = {
     properties ++
@@ -478,6 +480,7 @@ private[sql] object CatalogV2Util {
       (if (external) Some(TableCatalog.PROP_EXTERNAL -> "true") else None) ++
       provider.map(TableCatalog.PROP_PROVIDER -> _) ++
       comment.map(TableCatalog.PROP_COMMENT -> _) ++
+      collation.map(TableCatalog.PROP_COLLATION -> _) ++
       location.map(TableCatalog.PROP_LOCATION -> _)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
index 4a5a607e8a8ae..570ab1338dbf2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
@@ -85,6 +85,7 @@ private[sql] object V1Table {
         TableCatalog.OPTION_PREFIX + key -> value } ++
       v1Table.provider.map(TableCatalog.PROP_PROVIDER -> _) ++
       v1Table.comment.map(TableCatalog.PROP_COMMENT -> _) ++
+      v1Table.collation.map(TableCatalog.PROP_COLLATION -> _) ++
       v1Table.storage.locationUri.map { loc =>
         TableCatalog.PROP_LOCATION -> CatalogUtils.URIToString(loc)
       } ++
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 03471ae8a3da5..afae0565133b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.{ExtendedAnalysisException, FunctionIdentif
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, FunctionAlreadyExistsException, NamespaceAlreadyExistsException, NoSuchFunctionException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchTableException, Star, TableAlreadyExistsException, UnresolvedRegex}
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, InvalidUDFClassException}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, CreateMap, CreateStruct, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, CreateMap, CreateStruct, Expression, GroupingID, NamedExpression, SortOrder, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AnyValue
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.logical.{Assignment, InputParameter, Join, LogicalPlan, SerdeInfo, Window}
@@ -351,6 +351,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     )
   }
 
+  def collatedStringsInMapKeysNotSupportedError(): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.COLLATIONS_IN_MAP_KEYS",
+      messageParameters = Map.empty)
+  }
+
+  def objectLevelCollationsNotEnabledError(): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.OBJECT_LEVEL_COLLATIONS",
+      messageParameters = Map.empty
+    )
+  }
+
   def trimCollationNotEnabledError(): Throwable = {
     new AnalysisException(
       errorClass = "UNSUPPORTED_FEATURE.TRIM_COLLATION",
@@ -725,28 +738,32 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "windowExpr" -> toSQLExpr(windowExpr)))
   }
 
-  def distinctInverseDistributionFunctionUnsupportedError(funcName: String): Throwable = {
+  def distinctWithOrderingFunctionUnsupportedError(funcName: String): Throwable = {
     new AnalysisException(
-      errorClass = "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
-      messageParameters = Map("funcName" -> toSQLId(funcName)))
+      errorClass = "INVALID_WITHIN_GROUP_EXPRESSION.DISTINCT_UNSUPPORTED",
+      messageParameters = Map("funcName" -> toSQLId(funcName))
+    )
   }
 
-  def inverseDistributionFunctionMissingWithinGroupError(funcName: String): Throwable = {
+  def functionMissingWithinGroupError(funcName: String): Throwable = {
     new AnalysisException(
-      errorClass = "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WITHIN_GROUP_MISSING",
-      messageParameters = Map("funcName" -> toSQLId(funcName)))
+      errorClass = "INVALID_WITHIN_GROUP_EXPRESSION.WITHIN_GROUP_MISSING",
+      messageParameters = Map("funcName" -> toSQLId(funcName))
+    )
   }
 
-  def wrongNumOrderingsForInverseDistributionFunctionError(
+  def wrongNumOrderingsForFunctionError(
       funcName: String,
       validOrderingsNumber: Int,
       actualOrderingsNumber: Int): Throwable = {
     new AnalysisException(
-      errorClass = "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WRONG_NUM_ORDERINGS",
+      errorClass = "INVALID_WITHIN_GROUP_EXPRESSION.WRONG_NUM_ORDERINGS",
       messageParameters = Map(
         "funcName" -> toSQLId(funcName),
         "expectedNum" -> validOrderingsNumber.toString,
-        "actualNum" -> actualOrderingsNumber.toString))
+        "actualNum" -> actualOrderingsNumber.toString
+      )
+    )
   }
 
   def aliasNumberNotMatchColumnNumberError(
@@ -1049,6 +1066,18 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "operation" -> operation))
   }
 
+  def functionAndOrderExpressionMismatchError(
+      functionName: String,
+      functionArg: Expression,
+      orderExpr: Seq[SortOrder]): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_WITHIN_GROUP_EXPRESSION.MISMATCH_WITH_DISTINCT_INPUT",
+      messageParameters = Map(
+        "funcName" -> toSQLId(functionName),
+        "funcArg" -> toSQLExpr(functionArg),
+        "orderingExpr" -> orderExpr.map(order => toSQLExpr(order.child)).mkString(", ")))
+  }
+
   def wrongCommandForObjectTypeError(
       operation: String,
       requiredType: String,
@@ -1593,6 +1622,10 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     notSupportedForV2TablesError("ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]")
   }
 
+  def describeAsJsonNotSupportedForV2TablesError(): Throwable = {
+    notSupportedForV2TablesError("DESCRIBE TABLE AS JSON")
+  }
+
   def loadDataNotSupportedForV2TablesError(): Throwable = {
     notSupportedForV2TablesError("LOAD DATA")
   }
@@ -2150,6 +2183,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "ability" -> ability))
   }
 
+  def tableValuedArgumentsNotYetImplementedForSqlFunctions(
+      action: String, functionName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "TABLE_VALUED_ARGUMENTS_NOT_YET_IMPLEMENTED_FOR_SQL_FUNCTIONS",
+      messageParameters = Map(
+        "action" -> action,
+        "functionName" -> functionName))
+  }
+
   def tableValuedFunctionTooManyTableArgumentsError(num: Int): Throwable = {
     new AnalysisException(
       errorClass = "TABLE_VALUED_FUNCTION_TOO_MANY_TABLE_ARGUMENTS",
@@ -2638,12 +2680,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "comment" -> comment))
   }
 
-  def invalidPartitionColumnKeyInTableError(key: String, tblName: String): Throwable = {
+  def invalidPartitionColumnKeyInTableError(key: String, tableName: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1231",
+      errorClass = "PARTITIONS_NOT_FOUND",
       messageParameters = Map(
-        "key" -> key,
-        "tblName" -> toSQLId(tblName)))
+        "partitionList" -> toSQLId(key),
+        "tableName" -> toSQLId(tableName)))
   }
 
   def invalidPartitionSpecError(
@@ -4114,6 +4156,83 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     )
   }
 
+  def unsupportedSinglePassAnalyzerFeature(feature: String): AnalysisException = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_SINGLE_PASS_ANALYZER_FEATURE",
+      messageParameters = Map("feature" -> feature)
+    )
+  }
+
+  def ambiguousResolverExtension(
+      operator: LogicalPlan,
+      extensionNames: Seq[String]): AnalysisException = {
+    new AnalysisException(
+      errorClass = "AMBIGUOUS_RESOLVER_EXTENSION",
+      messageParameters = Map(
+        "operator" -> operator.getClass.getName,
+        "extensions" -> extensionNames.mkString(", ")
+      )
+    )
+  }
+
+  def fixedPointFailedSinglePassSucceeded(
+      singlePassResult: LogicalPlan,
+      fixedPointException: Throwable): Throwable = {
+    new ExtendedAnalysisException(
+      new AnalysisException(
+        errorClass = "HYBRID_ANALYZER_EXCEPTION.FIXED_POINT_FAILED_SINGLE_PASS_SUCCEEDED",
+        messageParameters = Map("singlePassOutput" -> singlePassResult.toString),
+        cause = Some(fixedPointException)
+      ),
+      plan = singlePassResult
+    )
+  }
+
+  def hybridAnalyzerOutputSchemaComparisonMismatch(
+      fixedPointOutputSchema: StructType,
+      singlePassOutputSchema: StructType): Throwable = {
+
+    def structToString(struct: StructType) =
+      struct.fields.map(structFieldToStringWithMetadata(_)).mkString(",")
+
+    def structFieldToStringWithMetadata(sf: StructField) =
+      s"(${sf.name},${sf.dataType},${sf.nullable},${sf.metadata})"
+
+    new AnalysisException(
+      errorClass = "HYBRID_ANALYZER_EXCEPTION.OUTPUT_SCHEMA_COMPARISON_MISMATCH",
+      messageParameters = Map(
+        "fixedPointOutputSchema" -> structToString(fixedPointOutputSchema),
+        "singlePassOutputSchema" -> structToString(singlePassOutputSchema)
+      )
+    )
+  }
+
+  def hybridAnalyzerLogicalPlanComparisonMismatch(
+      fixedPointOutput: LogicalPlan,
+      singlePassOutput: LogicalPlan): Throwable = {
+    new AnalysisException(
+      errorClass = "HYBRID_ANALYZER_EXCEPTION.LOGICAL_PLAN_COMPARISON_MISMATCH",
+      messageParameters = Map(
+        "fixedPointOutput" -> fixedPointOutput.toString,
+        "singlePassOutput" -> singlePassOutput.toString
+      )
+    )
+  }
+
+  def resolutionValidationError(cause: Throwable, plan: LogicalPlan): Throwable = {
+    new ExtendedAnalysisException(
+      new AnalysisException(
+        errorClass = "INTERNAL_ERROR",
+        cause = Some(cause),
+        messageParameters = Map(
+          "message" -> ("The analysis phase failed with an internal error. Reason: " +
+            cause.getMessage)
+        )
+      ),
+      plan = plan
+    )
+  }
+
   def avroNotLoadedSqlFunctionsUnusable(functionName: String): Throwable = {
     new AnalysisException(
       errorClass = "AVRO_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE",
@@ -4172,4 +4291,44 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       )
     )
   }
+
+  def numColumnsMismatch(
+      operator: String,
+      firstNumColumns: Int,
+      invalidOrdinalNum: Int,
+      invalidNumColumns: Int,
+      origin: Origin): Throwable = {
+    new AnalysisException(
+      errorClass = "NUM_COLUMNS_MISMATCH",
+      messageParameters = Map(
+        "operator" -> toSQLStmt(operator),
+        "firstNumColumns" -> firstNumColumns.toString,
+        "invalidOrdinalNum" -> ordinalNumber(invalidOrdinalNum),
+        "invalidNumColumns" -> invalidNumColumns.toString
+      ),
+      origin = origin
+    )
+  }
+
+  def incompatibleColumnTypeError(
+      operator: String,
+      columnOrdinalNumber: Int,
+      tableOrdinalNumber: Int,
+      dataType1: DataType,
+      dataType2: DataType,
+      hint: String,
+      origin: Origin): Throwable = {
+    new AnalysisException(
+      errorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      messageParameters = Map(
+        "operator" -> toSQLStmt(operator),
+        "columnOrdinalNumber" -> ordinalNumber(columnOrdinalNumber),
+        "tableOrdinalNumber" -> ordinalNumber(tableOrdinalNumber),
+        "dataType1" -> toSQLType(dataType1),
+        "dataType2" -> toSQLType(dataType2),
+        "hint" -> hint
+      ),
+      origin = origin
+    )
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 0852e773c87b4..1ae2e5445c0c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -225,8 +225,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       errorClass = "INVALID_ARRAY_INDEX",
       messageParameters = Map(
         "indexValue" -> toSQLValue(index, IntegerType),
-        "arraySize" -> toSQLValue(numElements, IntegerType),
-        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+        "arraySize" -> toSQLValue(numElements, IntegerType)),
       context = getQueryContext(context),
       summary = getSummary(context))
   }
@@ -239,8 +238,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       errorClass = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
       messageParameters = Map(
         "indexValue" -> toSQLValue(index, IntegerType),
-        "arraySize" -> toSQLValue(numElements, IntegerType),
-        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+        "arraySize" -> toSQLValue(numElements, IntegerType)),
       context = getQueryContext(context),
       summary = getSummary(context))
   }
@@ -267,12 +265,13 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       summary = "")
   }
 
-  def ansiDateTimeParseError(e: Exception): SparkDateTimeException = {
+  def ansiDateTimeParseError(e: Exception, suggestedFunc: String): SparkDateTimeException = {
     new SparkDateTimeException(
       errorClass = "CANNOT_PARSE_TIMESTAMP",
       messageParameters = Map(
         "message" -> e.getMessage,
-        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+        "func" -> toSQLId(suggestedFunc)
+      ),
       context = Array.empty,
       summary = "")
   }
@@ -2475,11 +2474,11 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
     )
   }
 
-  def timestampAddOverflowError(micros: Long, amount: Int, unit: String): ArithmeticException = {
+  def timestampAddOverflowError(micros: Long, amount: Long, unit: String): ArithmeticException = {
     new SparkArithmeticException(
       errorClass = "DATETIME_OVERFLOW",
       messageParameters = Map(
-        "operation" -> (s"add ${toSQLValue(amount, IntegerType)} $unit to " +
+        "operation" -> (s"add ${toSQLValue(amount, LongType)} $unit to " +
           s"${toSQLValue(DateTimeUtils.microsToInstant(micros), TimestampType)}")),
       context = Array.empty,
       summary = "")
@@ -2602,6 +2601,14 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       cause = null)
   }
 
+  def cannotFindBaseSnapshotCheckpoint(lineage: String): Throwable = {
+    new SparkException (
+      errorClass =
+        "CANNOT_LOAD_STATE_STORE.CANNOT_FIND_BASE_SNAPSHOT_CHECKPOINT",
+      messageParameters = Map("lineage" -> lineage),
+      cause = null)
+  }
+
   def unexpectedFileSize(
       dfsFile: Path,
       localFile: File,
@@ -2779,6 +2786,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
     Map.empty
   )
 
+  def invalidFileExtensionError(functionName: String, extension: String): RuntimeException = {
+    new SparkIllegalArgumentException(
+      errorClass = "INVALID_PARAMETER_VALUE.EXTENSION",
+      messageParameters = Map(
+        "functionName" -> toSQLId(functionName),
+        "parameter" -> toSQLId("extension"),
+        "fileExtension" -> toSQLId(extension),
+        "acceptable" -> "Extension is limited to exactly 3 letters (e.g. csv, tsv, etc...)"))
+  }
+
   def invalidCharsetError(functionName: String, charset: String): RuntimeException = {
     new SparkIllegalArgumentException(
       errorClass = "INVALID_PARAMETER_VALUE.CHARSET",
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/SqlScriptingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/SqlScriptingErrors.scala
index f1c07200d503b..da492cce22f2c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/SqlScriptingErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/SqlScriptingErrors.scala
@@ -103,6 +103,14 @@ private[sql] object SqlScriptingErrors {
       messageParameters = Map("invalidStatement" -> toSQLStmt(stmt)))
   }
 
+  def positionalParametersAreNotSupportedWithSqlScripting(): Throwable = {
+    new SqlScriptingException(
+      origin = null,
+      errorClass = "UNSUPPORTED_FEATURE.SQL_SCRIPTING_WITH_POSITIONAL_PARAMETERS",
+      cause = null,
+      messageParameters = Map.empty)
+  }
+
   def labelDoesNotExist(
       origin: Origin,
       labelName: String,
@@ -125,4 +133,14 @@ private[sql] object SqlScriptingErrors {
       cause = null,
       messageParameters = Map("labelName" -> toSQLStmt(labelName)))
   }
+
+  def labelCannotBeQualified(
+      origin: Origin,
+      labelName: String): Throwable = {
+    new SqlScriptingException(
+      origin = origin,
+      errorClass = "INVALID_LABEL_USAGE.QUALIFIED_LABEL_NAME",
+      cause = null,
+      messageParameters = Map("labelName" -> toSQLStmt(labelName)))
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 123759c6c8b80..727d54b6bbd2e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -43,7 +43,7 @@ import org.apache.spark.sql.catalyst.analysis.{HintErrorLogger, Resolver}
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
 import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
 import org.apache.spark.sql.catalyst.plans.logical.HintErrorHandler
-import org.apache.spark.sql.catalyst.util.{CollationFactory, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.{CollationFactory, CollationNames, DateTimeUtils}
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.types.{AtomicType, StringType, TimestampNTZType, TimestampType}
@@ -247,6 +247,78 @@ object SQLConf {
     .intConf
     .createWithDefault(100)
 
+  val ANALYZER_SINGLE_PASS_RESOLVER_ENABLED =
+    buildConf("spark.sql.analyzer.singlePassResolver.enabled")
+      .internal()
+      .doc(
+        "When true, use the single-pass Resolver instead of the fixed-point Analyzer. " +
+        "This is an alternative Analyzer framework, which resolves the parsed logical plan in a " +
+        "single post-order traversal. It uses ExpressionResolver to resolve expressions and " +
+        "NameScope to control the visibility of names. In contrast to the current fixed-point " +
+        "framework, subsequent in-tree traversals are disallowed. Most of the fixed-point " +
+        "Analyzer code is reused in the form of specific node transformation functions " +
+        "(AliasResolution.resolve, FunctionResolution.resolveFunction, etc). " +
+        "This feature is currently under development."
+      )
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val ANALYZER_DUAL_RUN_LEGACY_AND_SINGLE_PASS_RESOLVER =
+    buildConf("spark.sql.analyzer.singlePassResolver.dualRunWithLegacy")
+      .internal()
+      .doc(
+        "When true, run both analyzers to check if single-pass Analyzer correctly produces " +
+        "the same analyzed plan as the fixed-point Analyzer for the existing set of features " +
+        "defined in the ResolverGuard"
+      )
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val ANALYZER_SINGLE_PASS_RESOLVER_VALIDATION_ENABLED =
+    buildConf("spark.sql.analyzer.singlePassResolver.validationEnabled")
+      .internal()
+      .doc(
+        "When true, validate the Resolver output with ResolutionValidator. " +
+        "The ResolutionValidator validates the resolved logical plan tree in one pass " +
+        "and asserts the internal contracts. It uses the ExpressionResolutionValidator " +
+        "internally to validate resolved expression trees in the same manner."
+      )
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(true)
+
+  val ANALYZER_SINGLE_PASS_TRACK_RESOLVED_NODES_ENABLED =
+    buildConf("spark.sql.analyzer.singlePassResolver.trackResolvedNodes.enabled")
+      .internal()
+      .doc(
+        "When true, keep track of resolved nodes in order to assert that the single-pass " +
+        "invariant is never broken. While true, if a resolver attempts to resolve the same node " +
+        "twice, INTERNAL_ERROR exception is thrown. Used only for testing due to memory impact " +
+        "of storing each node in a HashSet."
+      )
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val ANALYZER_SINGLE_PASS_RESOLVER_RELATION_BRIDGING_ENABLED =
+    buildConf("spark.sql.analyzer.singlePassResolver.relationBridging.enabled")
+      .internal()
+      .doc(
+        "When set to true, the single-pass Resolver will reuse the relation metadata that was " +
+        "previously resolved in fixed-point run. This makes sense only in " +
+        "ANALYZER_DUAL_RUN_LEGACY_AND_SINGLE_PASS_RESOLVER mode. In that case HybridAnalyzer " +
+        "enables the AnalyzerBridgeState and passes it to the single-pass Analyzer after the " +
+        "fixed-point run is complete. Single-pass Resolver uses this AnalyzerBridgeState to " +
+        "construct a special RelationMetadataProvider implementation - " +
+        "BridgedRelationMetadataProvider. This component simply reuses cached relation metadata " +
+        "and avoids any blocking calls (catalog RPCs or table metadata reads)."
+      )
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(Utils.isTesting)
+
   val MULTI_COMMUTATIVE_OP_OPT_THRESHOLD =
     buildConf("spark.sql.analyzer.canonicalization.multiCommutativeOpMemoryOptThreshold")
       .internal()
@@ -328,6 +400,19 @@ object SQLConf {
     .booleanConf
     .createWithDefault(Utils.isTesting)
 
+  val EXPRESSION_TREE_CHANGE_LOG_LEVEL = buildConf("spark.sql.expressionTreeChangeLog.level")
+    .internal()
+    .doc("Configures the log level for logging the change from the unresolved expression tree to " +
+      "the resolved expression tree in the single-pass bottom-up Resolver. The value can be " +
+      "'trace', 'debug', 'info', 'warn', or 'error'. The default log level is 'trace'.")
+    .version("4.0.0")
+    .stringConf
+    .transform(_.toUpperCase(Locale.ROOT))
+    .checkValue(logLevel => Set("TRACE", "DEBUG", "INFO", "WARN", "ERROR").contains(logLevel),
+      "Invalid value for 'spark.sql.expressionTreeChangeLog.level'. Valid values are " +
+        "'trace', 'debug', 'info', 'warn' and 'error'.")
+    .createWithDefault("trace")
+
   val LIGHTWEIGHT_PLAN_CHANGE_VALIDATION = buildConf("spark.sql.lightweightPlanChangeValidation")
     .internal()
     .doc(s"Similar to ${PLAN_CHANGE_VALIDATION.key}, this validates plan changes and runs after " +
@@ -600,7 +685,7 @@ object SQLConf {
 
   val AUTO_BROADCASTJOIN_THRESHOLD = buildConf("spark.sql.autoBroadcastJoinThreshold")
     .doc("Configures the maximum size in bytes for a table that will be broadcast to all worker " +
-      "nodes when performing a join.  By setting this value to -1 broadcasting can be disabled.")
+      "nodes when performing a join. By setting this value to -1 broadcasting can be disabled.")
     .version("1.1.0")
     .bytesConf(ByteUnit.BYTE)
     .createWithDefaultString("10MB")
@@ -616,7 +701,7 @@ object SQLConf {
   val LIMIT_INITIAL_NUM_PARTITIONS = buildConf("spark.sql.limit.initialNumPartitions")
     .internal()
     .doc("Initial number of partitions to try when executing a take on a query. Higher values " +
-      "lead to more partitions read. Lower values might lead to longer execution times as more" +
+      "lead to more partitions read. Lower values might lead to longer execution times as more " +
       "jobs will be run")
     .version("3.4.0")
     .intConf
@@ -767,25 +852,35 @@ object SQLConf {
       .checkValue(_ > 0, "The initial number of partitions must be positive.")
       .createOptional
 
-  lazy val TRIM_COLLATION_ENABLED =
-    buildConf("spark.sql.collation.trim.enabled")
+  lazy val ALLOW_COLLATIONS_IN_MAP_KEYS =
+    buildConf("spark.sql.collation.allowInMapKeys")
+      .doc("Allow for non-UTF8_BINARY collated strings inside of map's keys")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  lazy val OBJECT_LEVEL_COLLATIONS_ENABLED =
+    buildConf("spark.sql.collation.objectLevel.enabled")
       .internal()
       .doc(
-        "Trim collation feature is under development and its use should be done under this" +
-        "feature flag. Trim collation trims trailing whitespaces from strings."
+        "Object level collations feature is under development and its use should be done " +
+        "under this feature flag. The feature allows setting default collation for all " +
+        "underlying columns within that object, except the ones that were previously created."
       )
       .version("4.0.0")
       .booleanConf
       .createWithDefault(Utils.isTesting)
 
-  val ALLOW_READING_UNKNOWN_COLLATIONS =
-    buildConf(SqlApiConfHelper.ALLOW_READING_UNKNOWN_COLLATIONS)
+  lazy val TRIM_COLLATION_ENABLED =
+    buildConf("spark.sql.collation.trim.enabled")
       .internal()
-      .doc("Enables spark to read unknown collation name as UTF8_BINARY. If the config is " +
-        "not enabled, when spark encounters an unknown collation name, it will throw an error.")
+      .doc(
+        "Trim collation feature is under development and its use should be done under this" +
+        "feature flag. Trim collation trims trailing whitespaces from strings."
+      )
       .version("4.0.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(Utils.isTesting)
 
   val DEFAULT_COLLATION =
     buildConf(SqlApiConfHelper.DEFAULT_COLLATION)
@@ -1017,8 +1112,8 @@ object SQLConf {
   val FILE_COMPRESSION_FACTOR = buildConf("spark.sql.sources.fileCompressionFactor")
     .internal()
     .doc("When estimating the output data size of a table scan, multiply the file size with this " +
-      "factor as the estimated data size, in case the data is compressed in the file and lead to" +
-      " a heavily underestimated result.")
+      "factor as the estimated data size, in case the data is compressed in the file and lead to " +
+      "a heavily underestimated result.")
     .version("2.3.1")
     .doubleConf
     .checkValue(_ > 0, "the value of fileCompressionFactor must be greater than 0")
@@ -1270,7 +1365,7 @@ object SQLConf {
   val ORC_COMPRESSION = buildConf("spark.sql.orc.compression.codec")
     .doc("Sets the compression codec used when writing ORC files. If either `compression` or " +
       "`orc.compress` is specified in the table-specific options/properties, the precedence " +
-      "would be `compression`, `orc.compress`, `spark.sql.orc.compression.codec`." +
+      "would be `compression`, `orc.compress`, `spark.sql.orc.compression.codec`. " +
       "Acceptable values include: none, uncompressed, snappy, zlib, lzo, zstd, lz4, brotli.")
     .version("2.3.0")
     .stringConf
@@ -1441,7 +1536,7 @@ object SQLConf {
       "to produce the partition columns instead of table scans. It applies when all the columns " +
       "scanned are partition columns and the query has an aggregate operator that satisfies " +
       "distinct semantics. By default the optimization is disabled, and deprecated as of Spark " +
-      "3.0 since it may return incorrect results when the files are empty, see also SPARK-26709." +
+      "3.0 since it may return incorrect results when the files are empty, see also SPARK-26709. " +
       "It will be removed in the future releases. If you must use, use 'SparkSessionExtensions' " +
       "instead to inject it as a custom rule.")
     .version("2.1.1")
@@ -1638,7 +1733,7 @@ object SQLConf {
 
   val V2_BUCKETING_SHUFFLE_ENABLED =
     buildConf("spark.sql.sources.v2.bucketing.shuffle.enabled")
-      .doc("During a storage-partitioned join, whether to allow to shuffle only one side." +
+      .doc("During a storage-partitioned join, whether to allow to shuffle only one side. " +
         "When only one side is KeyGroupedPartitioning, if the conditions are met, spark will " +
         "only shuffle the other side. This optimization will reduce the amount of data that " +
         s"needs to be shuffle. This config requires ${V2_BUCKETING_ENABLED.key} to be enabled")
@@ -1648,9 +1743,9 @@ object SQLConf {
 
    val V2_BUCKETING_ALLOW_JOIN_KEYS_SUBSET_OF_PARTITION_KEYS =
     buildConf("spark.sql.sources.v2.bucketing.allowJoinKeysSubsetOfPartitionKeys.enabled")
-      .doc("Whether to allow storage-partition join in the case where join keys are" +
+      .doc("Whether to allow storage-partition join in the case where join keys are " +
         "a subset of the partition keys of the source tables. At planning time, " +
-        "Spark will group the partitions by only those keys that are in the join keys." +
+        "Spark will group the partitions by only those keys that are in the join keys. " +
         s"This is currently enabled only if ${REQUIRE_ALL_CLUSTER_KEYS_FOR_DISTRIBUTION.key} " +
         "is false."
       )
@@ -1681,6 +1776,16 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val V2_BUCKETING_SORTING_ENABLED =
+    buildConf("spark.sql.sources.v2.bucketing.sorting.enabled")
+      .doc(s"When turned on, Spark will recognize the specific distribution reported by " +
+        s"a V2 data source through SupportsReportPartitioning, and will try to avoid a shuffle " +
+        s"if possible when sorting by those columns. This config requires " +
+        s"${V2_BUCKETING_ENABLED.key} to be enabled.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val BUCKETING_MAX_BUCKETS = buildConf("spark.sql.sources.bucketing.maxBuckets")
     .doc("The maximum number of buckets allowed.")
     .version("2.4.0")
@@ -1978,7 +2083,7 @@ object SQLConf {
   val WHOLESTAGE_BROADCAST_CLEANED_SOURCE_THRESHOLD =
     buildConf("spark.sql.codegen.broadcastCleanedSourceThreshold")
       .internal()
-      .doc("A threshold (in string length) to determine if we should make the generated code a" +
+      .doc("A threshold (in string length) to determine if we should make the generated code a " +
         "broadcast variable in whole stage codegen. To disable this, set the threshold to < 0; " +
         "otherwise if the size is above the threshold, it'll use broadcast variable. Note that " +
         "maximum string length allowed in Java is Integer.MAX_VALUE, so anything above it would " +
@@ -2230,6 +2335,17 @@ object SQLConf {
       .intConf
       .createWithDefault(1)
 
+  val STREAMING_STATE_STORE_ENCODING_FORMAT =
+    buildConf("spark.sql.streaming.stateStore.encodingFormat")
+      .doc("The encoding format used for stateful operators to store information " +
+        "in the state store")
+      .version("4.0.0")
+      .stringConf
+      .transform(_.toLowerCase(Locale.ROOT))
+      .checkValue(v => Set("unsaferow", "avro").contains(v),
+        "Valid values are 'unsaferow' and 'avro'")
+      .createWithDefault("unsaferow")
+
   val STATE_STORE_COMPRESSION_CODEC =
     buildConf("spark.sql.streaming.stateStore.compression.codec")
       .internal()
@@ -3273,6 +3389,24 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val PYTHON_UDF_MAX_RECORDS_PER_BATCH =
+    buildConf("spark.sql.execution.python.udf.maxRecordsPerBatch")
+      .doc("When using Python UDFs, limit the maximum number of records that can be batched " +
+        "for serialization/deserialization.")
+      .version("4.0.0")
+      .intConf
+      .checkValue(_ > 0, "The value of spark.sql.execution.python.udf.maxRecordsPerBatch " +
+        "must be positive.")
+      .createWithDefault(100)
+
+  val PYTHON_UDF_BUFFER_SIZE =
+    buildConf("spark.sql.execution.python.udf.buffer.size")
+      .doc(
+        s"Same as `${BUFFER_SIZE.key}` but only applies to Python UDF executions. If it is not " +
+        s"set, the fallback is `${BUFFER_SIZE.key}`.")
+      .version("4.0.0")
+      .fallbackConf(BUFFER_SIZE)
+
   val PANDAS_UDF_BUFFER_SIZE =
     buildConf("spark.sql.execution.pandas.udf.buffer.size")
       .doc(
@@ -3287,7 +3421,7 @@ object SQLConf {
     buildConf("spark.sql.execution.pandas.structHandlingMode")
       .doc(
         "The conversion mode of struct type when creating pandas DataFrame. " +
-        "When \"legacy\"," +
+        "When \"legacy\", " +
         "1. when Arrow optimization is disabled, convert to Row object, " +
         "2. when Arrow optimization is enabled, convert to dict or raise an Exception " +
         "if there are duplicated nested field names. " +
@@ -3319,6 +3453,17 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val PYTHON_UDF_ARROW_CONCURRENCY_LEVEL =
+    buildConf("spark.sql.execution.pythonUDF.arrow.concurrency.level")
+      .doc("The level of concurrency to execute Arrow-optimized Python UDF. " +
+        "This can be useful if Python UDFs use I/O intensively.")
+      .version("4.0.0")
+      .intConf
+      .checkValue(_ > 1,
+        "The value of spark.sql.execution.pythonUDF.arrow.concurrency.level" +
+          " must be more than one.")
+      .createOptional
+
   val PYTHON_TABLE_UDF_ARROW_ENABLED =
     buildConf("spark.sql.execution.pythonUDTF.arrow.enabled")
       .doc("Enable Arrow optimization for Python UDTFs.")
@@ -3364,7 +3509,7 @@ object SQLConf {
     buildConf("spark.sql.execution.pyspark.python")
       .internal()
       .doc("Python binary executable to use for PySpark in executors when running Python " +
-        "UDF, pandas UDF and pandas function APIs." +
+        "UDF, pandas UDF and pandas function APIs. " +
         "If not set, it falls back to 'spark.pyspark.python' by default.")
       .version("3.5.0")
       .stringConf
@@ -3593,7 +3738,7 @@ object SQLConf {
   val ANSI_ENABLED = buildConf(SqlApiConfHelper.ANSI_ENABLED_KEY)
     .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " +
       "For example, Spark will throw an exception at runtime instead of returning null results " +
-      "when the inputs to a SQL operator/function are invalid." +
+      "when the inputs to a SQL operator/function are invalid. " +
       "For full details of this dialect, you can find them in the section \"ANSI Compliance\" of " +
       "Spark's documentation. Some ANSI dialect features may be not from the ANSI SQL " +
       "standard directly, but their behaviors align with ANSI SQL's style")
@@ -3684,7 +3829,7 @@ object SQLConf {
       .internal()
       .doc("When true, use the common expression ID for the alias when rewriting With " +
         "expressions. Otherwise, use the index of the common expression definition. When true " +
-        "this avoids duplicate alias names, but is helpful to set to false for testing to ensure" +
+        "this avoids duplicate alias names, but is helpful to set to false for testing to ensure " +
         "that alias names are consistent.")
       .version("4.0.0")
       .booleanConf
@@ -3979,7 +4124,7 @@ object SQLConf {
       .createWithDefault(true)
 
   val ARTIFACTS_SESSION_ISOLATION_ALWAYS_APPLY_CLASSLOADER =
-    buildConf("spark.sql.artifact.isolation.always.apply.classloader")
+    buildConf("spark.sql.artifact.isolation.alwaysApplyClassloader")
       .internal()
       .doc("When enabled, the classloader holding per-session artifacts will always be applied " +
         "during SQL executions (useful for Spark Connect). When disabled, the classloader will " +
@@ -4146,7 +4291,7 @@ object SQLConf {
   val LEGACY_ALLOW_UNTYPED_SCALA_UDF =
     buildConf("spark.sql.legacy.allowUntypedScalaUDF")
       .internal()
-      .doc("When set to true, user is allowed to use org.apache.spark.sql.functions." +
+      .doc("When set to true, user is allowed to use org.apache.spark.sql.functions. " +
         "udf(f: AnyRef, dataType: DataType). Otherwise, an exception will be thrown at runtime.")
       .version("3.0.0")
       .booleanConf
@@ -4183,7 +4328,7 @@ object SQLConf {
 
   val MAX_TO_STRING_FIELDS = buildConf("spark.sql.debug.maxToStringFields")
     .doc("Maximum number of fields of sequence-like entries can be converted to strings " +
-      "in debug output. Any elements beyond the limit will be dropped and replaced by a" +
+      "in debug output. Any elements beyond the limit will be dropped and replaced by a " +
       """ "... N more fields" placeholder.""")
     .version("3.0.0")
     .intConf
@@ -4289,10 +4434,16 @@ object SQLConf {
         s"the $SESSION_CATALOG_NAME and must be consistent with it; for example, if a table can " +
         s"be loaded by the $SESSION_CATALOG_NAME, this catalog must also return the table " +
         s"metadata. To delegate operations to the $SESSION_CATALOG_NAME, implementations can " +
-        "extend 'CatalogExtension'.")
+        "extend 'CatalogExtension'. The value should be either 'builtin' which represents the " +
+        "spark's builit-in V2SessionCatalog, or a fully qualified class name of the catalog " +
+        "implementation.")
       .version("3.0.0")
       .stringConf
-      .createOptional
+      .transform {
+        case builtin if builtin.equalsIgnoreCase("builtin") => "builtin"
+        case fullClassName => fullClassName
+      }
+      .createWithDefault("builtin")
 
   object MapKeyDedupPolicy extends Enumeration {
     val EXCEPTION, LAST_WIN = Value
@@ -4319,7 +4470,7 @@ object SQLConf {
   val LEGACY_CTE_PRECEDENCE_POLICY = buildConf("spark.sql.legacy.ctePrecedencePolicy")
     .internal()
     .doc("When LEGACY, outer CTE definitions takes precedence over inner definitions. If set to " +
-      "EXCEPTION, AnalysisException is thrown while name conflict is detected in nested CTE." +
+      "EXCEPTION, AnalysisException is thrown while name conflict is detected in nested CTE. " +
       "The default is CORRECTED, inner CTE definitions take precedence. This config " +
       "will be removed in future versions and CORRECTED will be the only behavior.")
     .version("3.0.0")
@@ -4526,6 +4677,40 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val VARIANT_ALLOW_READING_SHREDDED =
+    buildConf("spark.sql.variant.allowReadingShredded")
+      .internal()
+      .doc("When true, the Parquet reader is allowed to read shredded or unshredded variant. " +
+        "When false, it only reads unshredded variant.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(true)
+
+  val PUSH_VARIANT_INTO_SCAN =
+    buildConf("spark.sql.variant.pushVariantIntoScan")
+      .internal()
+      .doc("When true, replace variant type in the scan schema with a struct containing " +
+        "requested fields.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val VARIANT_WRITE_SHREDDING_ENABLED =
+    buildConf("spark.sql.variant.writeShredding.enabled")
+      .internal()
+      .doc("When true, the Parquet writer is allowed to write shredded variant. ")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST =
+    buildConf("spark.sql.variant.forceShreddingSchemaForTest")
+      .internal()
+      .doc("FOR INTERNAL TESTING ONLY. Sets shredding schema for Variant.")
+      .version("4.0.0")
+      .stringConf
+      .createWithDefault("")
+
   val LEGACY_CSV_ENABLE_DATE_TIME_PARSING_FALLBACK =
     buildConf("spark.sql.legacy.csv.enableDateTimeParsingFallback")
       .internal()
@@ -4729,7 +4914,7 @@ object SQLConf {
       .doc("When true, NULL-aware anti join execution will be planed into " +
         "BroadcastHashJoinExec with flag isNullAwareAntiJoin enabled, " +
         "optimized from O(M*N) calculation into O(M) calculation " +
-        "using Hash lookup instead of Looping lookup." +
+        "using Hash lookup instead of Looping lookup. " +
         "Only support for singleColumn NAAJ for now.")
       .version("3.1.0")
       .booleanConf
@@ -4805,6 +4990,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val PRESERVE_CHAR_VARCHAR_TYPE_INFO = buildConf("spark.sql.preserveCharVarcharTypeInfo")
+    .doc("When true, Spark does not replace CHAR/VARCHAR types the STRING type, which is the " +
+      "default behavior of Spark 3.0 and earlier versions. This means the length checks for " +
+      "CHAR/VARCHAR types is enforced and CHAR type is also properly padded.")
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(false)
+
   val READ_SIDE_CHAR_PADDING = buildConf("spark.sql.readSideCharPadding")
     .doc("When true, Spark applies string padding when reading CHAR type columns/fields, " +
       "in addition to the write-side padding. This config is true by default to better enforce " +
@@ -5093,7 +5286,7 @@ object SQLConf {
         "the sequence of steps that the query performs in a composable fashion.")
       .version("4.0.0")
       .booleanConf
-      .createWithDefault(Utils.isTesting)
+      .createWithDefault(true)
 
   val LEGACY_PERCENTILE_DISC_CALCULATION = buildConf("spark.sql.legacy.percentileDiscCalculation")
     .internal()
@@ -5121,7 +5314,7 @@ object SQLConf {
     buildConf("spark.sql.legacy.raiseErrorWithoutErrorClass")
       .internal()
       .doc("When set to true, restores the legacy behavior of `raise_error` and `assert_true` to " +
-        "not return the `[USER_RAISED_EXCEPTION]` prefix." +
+        "not return the `[USER_RAISED_EXCEPTION]` prefix. " +
         "For example, `raise_error('error!')` returns `error!` instead of " +
         "`[USER_RAISED_EXCEPTION] Error!`.")
       .version("4.0.0")
@@ -5179,7 +5372,7 @@ object SQLConf {
       .internal()
       .doc("When set to true, datetime formatter used for csv, json and xml " +
         "will support zone offsets that have seconds in it. e.g. LA timezone offset prior to 1883" +
-        "was -07:52:58. When this flag is not set we lose seconds information." )
+        " was -07:52:58. When this flag is not set we lose seconds information." )
       .version("4.0.0")
       .booleanConf
       .createWithDefault(true)
@@ -5260,7 +5453,7 @@ object SQLConf {
   val LEGACY_BANG_EQUALS_NOT = buildConf("spark.sql.legacy.bangEqualsNot")
     .internal()
     .doc("When set to true, '!' is a lexical equivalent for 'NOT'. That is '!' can be used " +
-      "outside of the documented prefix usage in a logical expression." +
+      "outside of the documented prefix usage in a logical expression. " +
       "Examples are: `expr ! IN (1, 2)` and `expr ! BETWEEN 1 AND 2`, but also `IF ! EXISTS`."
     )
     .version("4.0.0")
@@ -5277,6 +5470,19 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val LAZY_SET_OPERATOR_OUTPUT = buildConf("spark.sql.lazySetOperatorOutput.enabled")
+    .internal()
+    .doc(
+      "When set to true, Except/Intersect/Union operator's output will be a lazy val. It " +
+      "is a performance optimization for querires with a large number of stacked set operators. " +
+      "This is because of rules like WidenSetOperationTypes that traverse the logical plan tree " +
+      "and call output on each Except/Intersect/Union node. Such traversal has quadratic " +
+      "complexity: O(number_of_nodes * (1 + 2 + 3  + ... + number_of_nodes))."
+    )
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(true)
+
   /**
    * Holds information about keys that have been deprecated.
    *
@@ -5369,7 +5575,7 @@ object SQLConf {
       RemovedConfig("spark.sql.legacy.compareDateTimestampInTimestamp", "3.0.0", "true",
         "It was removed to prevent errors like SPARK-23549 for non-default value."),
       RemovedConfig("spark.sql.parquet.int64AsTimestampMillis", "3.0.0", "false",
-        "The config was deprecated since Spark 2.3." +
+        "The config was deprecated since Spark 2.3. " +
         s"Use '${PARQUET_OUTPUT_TIMESTAMP_TYPE.key}' instead of it."),
       RemovedConfig("spark.sql.execution.pandas.respectSessionTimeZone", "3.0.0", "true",
         "The non-default behavior is considered as a bug, see SPARK-22395. " +
@@ -5445,6 +5651,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def planChangeBatches: Option[String] = getConf(PLAN_CHANGE_LOG_BATCHES)
 
+  def expressionTreeChangeLogLevel: String = getConf(EXPRESSION_TREE_CHANGE_LOG_LEVEL)
+
   def dynamicPartitionPruningEnabled: Boolean = getConf(DYNAMIC_PARTITION_PRUNING_ENABLED)
 
   def dynamicPartitionPruningUseStats: Boolean = getConf(DYNAMIC_PARTITION_PRUNING_USE_STATS)
@@ -5572,18 +5780,20 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
     }
   }
 
+  def allowCollationsInMapKeys: Boolean = getConf(ALLOW_COLLATIONS_IN_MAP_KEYS)
+
+  def objectLevelCollationsEnabled: Boolean = getConf(OBJECT_LEVEL_COLLATIONS_ENABLED)
+
   def trimCollationEnabled: Boolean = getConf(TRIM_COLLATION_ENABLED)
 
   override def defaultStringType: StringType = {
-    if (getConf(DEFAULT_COLLATION).toUpperCase(Locale.ROOT) == "UTF8_BINARY") {
+    if (getConf(DEFAULT_COLLATION).toUpperCase(Locale.ROOT) == CollationNames.UTF8_BINARY) {
       StringType
     } else {
-      StringType(CollationFactory.collationNameToId(getConf(DEFAULT_COLLATION)))
+      StringType(getConf(DEFAULT_COLLATION))
     }
   }
 
-  override def allowReadingUnknownCollations: Boolean = getConf(ALLOW_READING_UNKNOWN_COLLATIONS)
-
   def adaptiveExecutionEnabled: Boolean = getConf(ADAPTIVE_EXECUTION_ENABLED)
 
   def adaptiveExecutionLogLevel: String = getConf(ADAPTIVE_EXECUTION_LOG_LEVEL)
@@ -5607,6 +5817,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def stateStoreCheckpointFormatVersion: Int = getConf(STATE_STORE_CHECKPOINT_FORMAT_VERSION)
 
+  def stateStoreEncodingFormat: String = getConf(STREAMING_STATE_STORE_ENCODING_FORMAT)
+
   def checkpointRenamedFileCheck: Boolean = getConf(CHECKPOINT_RENAMEDFILE_CHECK_ENABLED)
 
   def parquetFilterPushDown: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_ENABLED)
@@ -5852,6 +6064,9 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
   def v2BucketingAllowCompatibleTransforms: Boolean =
     getConf(SQLConf.V2_BUCKETING_ALLOW_COMPATIBLE_TRANSFORMS)
 
+  def v2BucketingAllowSorting: Boolean =
+    getConf(SQLConf.V2_BUCKETING_SORTING_ENABLED)
+
   def dataFrameSelfJoinAutoResolveAmbiguity: Boolean =
     getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY)
 
@@ -5995,6 +6210,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def pythonUDFWorkerFaulthandlerEnabled: Boolean = getConf(PYTHON_UDF_WORKER_FAULTHANLDER_ENABLED)
 
+  def pythonUDFArrowConcurrencyLevel: Option[Int] = getConf(PYTHON_UDF_ARROW_CONCURRENCY_LEVEL)
+
   def pysparkPlotMaxRows: Int = getConf(PYSPARK_PLOT_MAX_ROWS)
 
   def arrowSparkREnabled: Boolean = getConf(ARROW_SPARKR_EXECUTION_ENABLED)
@@ -6188,6 +6405,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def charVarcharAsString: Boolean = getConf(SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING)
 
+  def preserveCharVarcharTypeInfo: Boolean = getConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO)
+
   def readSideCharPadding: Boolean = getConf(SQLConf.READ_SIDE_CHAR_PADDING)
 
   def cliPrintHeader: Boolean = getConf(SQLConf.CLI_PRINT_HEADER)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index cd17a63e5d433..a14c584fdc6a6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -280,7 +280,7 @@ object StaticSQLConf {
     buildStaticConf("spark.sql.streaming.ui.enabledCustomMetricList")
       .internal()
       .doc("Configures a list of custom metrics on Structured Streaming UI, which are enabled. " +
-        "The list contains the name of the custom metrics separated by comma. In aggregation" +
+        "The list contains the name of the custom metrics separated by comma. In aggregation " +
         "only sum used. The list of supported custom metrics is state store provider specific " +
         "and it can be found out for example from query progress log entry.")
       .version("3.1.0")
@@ -295,4 +295,14 @@ object StaticSQLConf {
       .version("3.1.0")
       .stringConf
       .createWithDefault("")
+
+  val DATA_FRAME_DEBUGGING_ENABLED =
+    buildStaticConf("spark.python.sql.dataFrameDebugging.enabled")
+    .internal()
+    .doc(
+      "Enable the DataFrame debugging. This feature is enabled by default, but has a " +
+        "non-trivial performance overhead because of the stack trace collection.")
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(true)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
index 1e0bac331dc75..53b82f9e86f77 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, NamedExpression}
 import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, NamedTransform, Transform}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
-import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StringHelper, StringType, StructField, StructType}
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.SparkSchemaUtils
 
@@ -304,6 +304,17 @@ private[spark] object SchemaUtils {
     }
   }
 
+  def checkNoCollationsInMapKeys(schema: DataType): Unit = schema match {
+    case m: MapType =>
+      if (hasNonUTF8BinaryCollation(m.keyType)) {
+        throw QueryCompilationErrors.collatedStringsInMapKeysNotSupportedError()
+      }
+      checkNoCollationsInMapKeys(m.valueType)
+    case s: StructType => s.fields.foreach(field => checkNoCollationsInMapKeys(field.dataType))
+    case a: ArrayType => checkNoCollationsInMapKeys(a.elementType)
+    case _ =>
+  }
+
   /**
    * Replaces any collated string type with non collated StringType
    * recursively in the given data type.
@@ -317,7 +328,7 @@ private[spark] object SchemaUtils {
       StructType(fields.map { field =>
         field.copy(dataType = replaceCollatedStringWithString(field.dataType))
       })
-    case _: StringType => StringType
+    case st: StringType => StringHelper.removeCollation(st)
     case _ => dt
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/QueryPlanningTrackerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/QueryPlanningTrackerSuite.scala
index 972b98780bcca..500bbef3c89bf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/QueryPlanningTrackerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/QueryPlanningTrackerSuite.scala
@@ -95,7 +95,13 @@ class QueryPlanningTrackerSuite extends SparkFunSuite {
     val mockCallback = mock[QueryPlanningTrackerCallback]
     val mockPlan1 = mock[LogicalPlan]
     val mockPlan2 = mock[LogicalPlan]
+    val mockPlan3 = mock[LogicalPlan]
+    val mockPlan4 = mock[LogicalPlan]
     val t = new QueryPlanningTracker(Some(mockCallback))
+    t.setAnalysisFailed(mockPlan3)
+    verify(mockCallback, times(1)).analysisFailed(t, mockPlan3)
+    t.setAnalysisFailed(mockPlan4)
+    verify(mockCallback, times(1)).analysisFailed(t, mockPlan4)
     t.setAnalyzed(mockPlan1)
     verify(mockCallback, times(1)).analyzed(t, mockPlan1)
     t.setAnalyzed(mockPlan2)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
index 55f59f7a22574..325862127d366 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
@@ -40,7 +40,6 @@ class AnalysisExceptionPositionSuite extends AnalysisTest {
   }
 
   test("SPARK-34057: UnresolvedTableOrView should retain sql text position") {
-    verifyTableOrViewPosition("DESCRIBE TABLE unknown", "unknown")
     verifyTableOrPermanentViewPosition("ANALYZE TABLE unknown COMPUTE STATISTICS", "unknown")
     verifyTableOrViewPosition("ANALYZE TABLE unknown COMPUTE STATISTICS FOR COLUMNS col", "unknown")
     verifyTableOrViewPosition("ANALYZE TABLE unknown COMPUTE STATISTICS FOR ALL COLUMNS", "unknown")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 939801e3f07af..2ffe6de974c74 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -81,6 +81,24 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     }
   }
 
+  test(s"do not fail if a leaf node has char/varchar type output and " +
+    s"${SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key} is true") {
+    withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
+      val schema1 = new StructType().add("c", CharType(5))
+      val schema2 = new StructType().add("c", VarcharType(5))
+      val schema3 = new StructType().add("c", ArrayType(CharType(5)))
+      Seq(schema1, schema2, schema3).foreach { schema =>
+        val table = new InMemoryTable("t", schema, Array.empty, Map.empty[String, String].asJava)
+        DataSourceV2Relation(
+          table,
+          DataTypeUtils.toAttributes(schema),
+          None,
+          None,
+          CaseInsensitiveStringMap.empty()).analyze
+      }
+    }
+  }
+
   test("union project *") {
     val plan = (1 to 120)
       .map(_ => testRelation)
@@ -777,6 +795,14 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         PosExplode($"list"), Seq("first_pos", "first_val")), Seq("second_pos", "second_val"))))
   }
 
+  test("SPARK-50497 Non-generator function with multiple aliases") {
+    assertAnalysisErrorCondition(parsePlan("SELECT 'length' (a)"),
+      "MULTI_ALIAS_WITHOUT_GENERATOR",
+      Map("expr" -> "\"length\"", "names" -> "a"),
+      Array(ExpectedContext("SELECT 'length' (a)", 0, 18))
+    )
+  }
+
   test("SPARK-24151: CURRENT_DATE, CURRENT_TIMESTAMP should be case insensitive") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       val input = Project(Seq(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index 71744f4d15105..58e6cd7fe1695 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -198,6 +198,21 @@ trait AnalysisTest extends PlanTest {
     }
   }
 
+  protected def assertParseErrorClass(
+      parser: String => Any,
+      sqlCommand: String,
+      errorClass: String,
+      parameters: Map[String, String],
+      queryContext: Array[ExpectedContext] = Array.empty): Unit = {
+    val e = parseException(parser)(sqlCommand)
+    checkError(
+      exception = e,
+      condition = errorClass,
+      parameters = parameters,
+      queryContext = queryContext
+    )
+  }
+
   protected def interceptParseException(parser: String => Any)(
     sqlCommand: String, messages: String*)(condition: Option[String] = None): Unit = {
     val e = parseException(parser)(sqlCommand)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
index 8cf7d78b510be..139e89828f8e5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
@@ -1057,11 +1057,11 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
       ArrayType(IntegerType))
     shouldCast(
       ArrayType(StringType),
-      AbstractArrayType(StringTypeWithCollation),
+      AbstractArrayType(StringTypeWithCollation(supportsTrimCollation = true)),
       ArrayType(StringType))
     shouldCast(
       ArrayType(IntegerType),
-      AbstractArrayType(StringTypeWithCollation),
+      AbstractArrayType(StringTypeWithCollation(supportsTrimCollation = true)),
       ArrayType(StringType))
     shouldCast(
       ArrayType(StringType),
@@ -1075,11 +1075,11 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
       ArrayType(ArrayType(IntegerType)))
     shouldCast(
       ArrayType(ArrayType(StringType)),
-      AbstractArrayType(AbstractArrayType(StringTypeWithCollation)),
+      AbstractArrayType(AbstractArrayType(StringTypeWithCollation(supportsTrimCollation = true))),
       ArrayType(ArrayType(StringType)))
     shouldCast(
       ArrayType(ArrayType(IntegerType)),
-      AbstractArrayType(AbstractArrayType(StringTypeWithCollation)),
+      AbstractArrayType(AbstractArrayType(StringTypeWithCollation(supportsTrimCollation = true))),
       ArrayType(ArrayType(StringType)))
     shouldCast(
       ArrayType(ArrayType(StringType)),
@@ -1088,16 +1088,16 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
 
     // Invalid casts involving casting arrays into non-complex types.
     shouldNotCast(ArrayType(IntegerType), IntegerType)
-    shouldNotCast(ArrayType(StringType), StringTypeWithCollation)
+    shouldNotCast(ArrayType(StringType), StringTypeWithCollation(supportsTrimCollation = true))
     shouldNotCast(ArrayType(StringType), IntegerType)
-    shouldNotCast(ArrayType(IntegerType), StringTypeWithCollation)
+    shouldNotCast(ArrayType(IntegerType), StringTypeWithCollation(supportsTrimCollation = true))
 
     // Invalid casts involving casting arrays of arrays into arrays of non-complex types.
     shouldNotCast(ArrayType(ArrayType(IntegerType)), AbstractArrayType(IntegerType))
     shouldNotCast(ArrayType(ArrayType(StringType)),
-      AbstractArrayType(StringTypeWithCollation))
+      AbstractArrayType(StringTypeWithCollation(supportsTrimCollation = true)))
     shouldNotCast(ArrayType(ArrayType(StringType)), AbstractArrayType(IntegerType))
     shouldNotCast(ArrayType(ArrayType(IntegerType)),
-      AbstractArrayType(StringTypeWithCollation))
+      AbstractArrayType(StringTypeWithCollation(supportsTrimCollation = true)))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
index 6b034d3dbee09..133670d5fcced 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.util.ArrayImplicits._
 
 class CreateTablePartitioningValidationSuite extends AnalysisTest {
   val tableSpec =
-    UnresolvedTableSpec(Map.empty, None, OptionList(Seq.empty), None, None, None, false)
+    UnresolvedTableSpec(Map.empty, None, OptionList(Seq.empty), None, None, None, None, false)
   test("CreateTableAsSelect: fail missing top-level column") {
     val plan = CreateTableAsSelect(
       UnresolvedIdentifier(Array("table_name").toImmutableArraySeq),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/LimitExpressionResolverSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/LimitExpressionResolverSuite.scala
new file mode 100644
index 0000000000000..fdab4df379a71
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/LimitExpressionResolverSuite.scala
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast, Expression, Literal}
+import org.apache.spark.sql.errors.QueryErrorsBase
+import org.apache.spark.sql.types.IntegerType
+
+class LimitExpressionResolverSuite extends SparkFunSuite with QueryErrorsBase {
+
+  private class IdentityExpressionResolver extends TreeNodeResolver[Expression, Expression] {
+    override def resolve(expression: Expression): Expression = expression
+  }
+
+  private val expressionResolver = new IdentityExpressionResolver
+  private val limitExpressionResolver = new LimitExpressionResolver(expressionResolver)
+
+  test("Basic LIMIT without errors") {
+    val expr = Literal(42, IntegerType)
+    assert(limitExpressionResolver.resolve(expr) == expr)
+  }
+
+  test("Unfoldable LIMIT") {
+    val col = AttributeReference(name = "foo", dataType = IntegerType)()
+    checkError(
+      exception = intercept[AnalysisException] {
+        limitExpressionResolver.resolve(col)
+      },
+      condition = "INVALID_LIMIT_LIKE_EXPRESSION.IS_UNFOLDABLE",
+      parameters = Map("name" -> "limit", "expr" -> toSQLExpr(col))
+    )
+  }
+
+  test("LIMIT with non-integer") {
+    val anyNonInteger = Literal("42")
+    checkError(
+      exception = intercept[AnalysisException] {
+        limitExpressionResolver.resolve(anyNonInteger)
+      },
+      condition = "INVALID_LIMIT_LIKE_EXPRESSION.DATA_TYPE",
+      parameters = Map(
+        "name" -> "limit",
+        "expr" -> toSQLExpr(anyNonInteger),
+        "dataType" -> toSQLType(anyNonInteger.dataType)
+      )
+    )
+  }
+
+  test("LIMIT with null") {
+    val expr = Cast(Literal(null), IntegerType)
+    checkError(
+      exception = intercept[AnalysisException] {
+        limitExpressionResolver.resolve(expr)
+      },
+      condition = "INVALID_LIMIT_LIKE_EXPRESSION.IS_NULL",
+      parameters = Map(
+        "name" -> "limit",
+        "expr" -> toSQLExpr(expr)
+      )
+    )
+  }
+
+  test("LIMIT with negative integer") {
+    val expr = Literal(-1, IntegerType)
+    checkError(
+      exception = intercept[AnalysisException] {
+        limitExpressionResolver.resolve(expr)
+      },
+      condition = "INVALID_LIMIT_LIKE_EXPRESSION.IS_NEGATIVE",
+      parameters = Map(
+        "name" -> "limit",
+        "expr" -> toSQLExpr(expr),
+        "v" -> toSQLValue(-1, IntegerType)
+      )
+    )
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolutionValidatorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolutionValidatorSuite.scala
new file mode 100644
index 0000000000000..922e94ea442b3
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/ResolutionValidatorSuite.scala
@@ -0,0 +1,265 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import scala.collection.immutable
+import scala.reflect.runtime.universe.typeOf
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.expressions.{
+  Add,
+  Alias,
+  AttributeReference,
+  Cast,
+  GreaterThan,
+  Literal,
+  NamedExpression,
+  TimeAdd
+}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, LogicalPlan, Project}
+import org.apache.spark.sql.types.{
+  BooleanType,
+  DayTimeIntervalType,
+  DecimalType,
+  IntegerType,
+  StringType,
+  TimestampType
+}
+
+class ResolutionValidatorSuite extends SparkFunSuite with SQLConfHelper {
+  private val resolveMethodNamesToIgnore = Seq(
+    // [[Resolver]] accepts [[UnresolvedInlineTable]], [[ResolvedInlineTable]] and
+    // [[LocalRelation]], but produces only [[ResolvedInlineTable]] and [[LocalRelation]], so
+    // we omit one of them here.
+    // See [[Resolver.resolveInlineTable]] scaladoc for more info.
+    "resolveResolvedInlineTable"
+  )
+
+  private val colInteger = AttributeReference(name = "colInteger", dataType = IntegerType)()
+  private val colBoolean = AttributeReference(name = "colBoolean", dataType = BooleanType)()
+  private val colTimestamp = AttributeReference(name = "colTimestamp", dataType = TimestampType)()
+
+  test("All resolve* methods must have validate* counterparts") {
+    val actualMethodNames = typeOf[ResolutionValidator].decls
+      .collect {
+        case decl if decl.isMethod => decl.name.toString
+      }
+      .filter(name => {
+        name.startsWith("validate")
+      })
+    val actualMethodNamesSet = immutable.HashSet(actualMethodNames.toSeq: _*)
+
+    val resolveMethodNamesToIgnoreSet = immutable.HashSet(resolveMethodNamesToIgnore: _*)
+
+    typeOf[Resolver].decls
+      .collect {
+        case decl if decl.isMethod => decl.name.toString
+      }
+      .filter(name => {
+        name.startsWith("resolve") && !resolveMethodNamesToIgnoreSet.contains(name)
+      })
+      .map(name => {
+        "validate" + name.stripPrefix("resolve")
+      })
+      .foreach(name => {
+        assert(actualMethodNamesSet.contains(name), name)
+      })
+  }
+
+  test("Project") {
+    validate(
+      Project(
+        projectList = Seq(colInteger, colBoolean, colInteger),
+        child = LocalRelation(output = Seq(colInteger, colBoolean))
+      )
+    )
+    validate(
+      Project(
+        projectList = Seq(colInteger),
+        child = LocalRelation(output = colBoolean)
+      ),
+      error = Some("Project list contains nonexisting attribute")
+    )
+  }
+
+  test("Filter") {
+    validate(
+      Project(
+        projectList = Seq(colBoolean),
+        child = Filter(
+          condition = colBoolean,
+          child = LocalRelation(output = colBoolean)
+        )
+      )
+    )
+    validate(
+      Project(
+        projectList = Seq(colInteger),
+        child = Filter(
+          condition = colInteger,
+          child = LocalRelation(output = colInteger)
+        )
+      ),
+      error = Some("Non-boolean condition")
+    )
+    validate(
+      Project(
+        projectList = Seq(colBoolean),
+        child = Filter(
+          condition = AttributeReference(name = "colBooleanOther", dataType = BooleanType)(),
+          child = LocalRelation(output = colBoolean)
+        )
+      ),
+      error = Some("Condition references nonexisting attribute")
+    )
+  }
+
+  test("Predicate") {
+    validate(
+      Project(
+        projectList = Seq(colInteger),
+        child = Filter(
+          condition = GreaterThan(colInteger, colInteger),
+          child = LocalRelation(output = colInteger)
+        )
+      )
+    )
+    validate(
+      Project(
+        projectList = Seq(colInteger),
+        child = Filter(
+          condition = GreaterThan(colInteger, colBoolean),
+          child = LocalRelation(output = Seq(colInteger, colBoolean))
+        )
+      ),
+      error = Some("Input data types mismatch")
+    )
+  }
+
+  test("BinaryExpression") {
+    validate(
+      Project(
+        projectList = Seq(
+          Alias(
+            child = Add(
+              left = Literal(5),
+              right = Literal(1)
+            ),
+            "Add"
+          )(NamedExpression.newExprId)
+        ),
+        child = LocalRelation(output = colInteger)
+      )
+    )
+    validate(
+      Project(
+        projectList = Seq(
+          Alias(
+            child = Add(
+              left = Literal(5),
+              right = Literal("1")
+            ),
+            "AddWrongInputTypes"
+          )(NamedExpression.newExprId)
+        ),
+        child = LocalRelation(output = colInteger)
+      ),
+      error = Some("checkInputDataTypes mismatch")
+    )
+    validate(
+      Project(
+        projectList = Seq(
+          Alias(
+            child = TimeAdd(
+              start = Cast(
+                child = Literal("2024-10-01"),
+                dataType = TimestampType,
+                timeZoneId = Option(conf.sessionLocalTimeZone)
+              ),
+              interval = Cast(
+                child = Literal(1),
+                dataType = DayTimeIntervalType(DayTimeIntervalType.DAY, DayTimeIntervalType.DAY),
+                timeZoneId = Option(conf.sessionLocalTimeZone)
+              )
+            ),
+            "AddNoTimezone"
+          )(NamedExpression.newExprId)
+        ),
+        child = LocalRelation(output = colInteger)
+      ),
+      error = Some("TimezoneId is not set for TimeAdd")
+    )
+  }
+
+  test("TimeZoneAwareExpression") {
+    validate(
+      Project(
+        projectList = Seq(
+          Alias(
+            Cast(
+              child = colInteger,
+              dataType = DecimalType.USER_DEFAULT,
+              timeZoneId = Option(conf.sessionLocalTimeZone)
+            ),
+            "withTimezone"
+          )(NamedExpression.newExprId)
+        ),
+        child = LocalRelation(output = colInteger)
+      )
+    )
+    validate(
+      Project(
+        projectList = Seq(
+          Alias(
+            Cast(
+              child = colTimestamp,
+              dataType = StringType
+            ),
+            "withoutTimezone"
+          )(NamedExpression.newExprId)
+        ),
+        child = LocalRelation(output = colTimestamp)
+      ),
+      error = Some("TimezoneId is not set")
+    )
+  }
+
+  def validate(plan: LogicalPlan, error: Option[String] = None): Unit = {
+    def errorWrapper(error: String)(body: => Unit): Unit = {
+      withClue(error) {
+        intercept[Throwable] {
+          body
+        }
+      }
+    }
+
+    def noopWrapper(body: => Unit) = {
+      body
+    }
+
+    val wrapper = error
+      .map(error => { errorWrapper(error) _ })
+      .getOrElse { noopWrapper _ }
+
+    val validator = new ResolutionValidator
+    wrapper {
+      validator.validatePlan(plan)
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimezoneAwareExpressionResolverSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimezoneAwareExpressionResolverSuite.scala
new file mode 100644
index 0000000000000..d5c5387d4b763
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/TimezoneAwareExpressionResolverSuite.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.expressions.{
+  AttributeReference,
+  Cast,
+  Expression,
+  TimeZoneAwareExpression
+}
+import org.apache.spark.sql.types.{IntegerType, StringType}
+
+class TimezoneAwareExpressionResolverSuite extends SparkFunSuite {
+
+  class HardCodedExpressionResolver(resolvedExpression: Expression)
+      extends TreeNodeResolver[Expression, Expression] {
+    override def resolve(expression: Expression): Expression = resolvedExpression
+  }
+
+  private val unresolvedChild =
+    AttributeReference(name = "unresolvedChild", dataType = StringType)()
+  private val resolvedChild = AttributeReference(name = "resolvedChild", dataType = IntegerType)()
+  private val castExpression = Cast(child = unresolvedChild, dataType = IntegerType)
+  private val expressionResolver = new HardCodedExpressionResolver(resolvedChild)
+  private val timezoneAwareExpressionResolver = new TimezoneAwareExpressionResolver(
+    expressionResolver
+  )
+
+  test("TimeZoneAwareExpression resolution") {
+    assert(castExpression.children.head == unresolvedChild)
+    assert(castExpression.timeZoneId.isEmpty)
+    assert(castExpression.getTagValue(Cast.USER_SPECIFIED_CAST).isEmpty)
+
+    castExpression.setTagValue(Cast.USER_SPECIFIED_CAST, ())
+    val resolvedExpression =
+      timezoneAwareExpressionResolver.resolve(castExpression).asInstanceOf[TimeZoneAwareExpression]
+
+    assert(resolvedExpression.children.head == resolvedChild)
+    assert(resolvedExpression.timeZoneId.nonEmpty)
+    assert(resolvedExpression.getTagValue(Cast.USER_SPECIFIED_CAST).nonEmpty)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/TypeCoercionResolverSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/TypeCoercionResolverSuite.scala
new file mode 100644
index 0000000000000..7e0107147c9ac
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/resolver/TypeCoercionResolverSuite.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis.resolver
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion
+import org.apache.spark.sql.catalyst.expressions.{Add, Cast, Expression, Literal}
+import org.apache.spark.sql.types.{DoubleType, IntegerType}
+
+class TypeCoercionResolverSuite extends SparkFunSuite with SQLConfHelper {
+
+  class HardCodedExpressionResolver(resolvedExpression: Expression)
+      extends TreeNodeResolver[Expression, Expression] {
+    override def resolve(expression: Expression): Expression = resolvedExpression
+  }
+
+  private val integerChild = Literal(1, IntegerType)
+  private val doubleChild = Literal(1.1, DoubleType)
+  private val castIntegerChild = Cast(child = integerChild, dataType = DoubleType)
+  private val expressionResolver = new HardCodedExpressionResolver(castIntegerChild)
+  private val timezoneAwareExpressionResolver = new TimezoneAwareExpressionResolver(
+    expressionResolver
+  )
+  private val typeCoercionRules = Seq(
+    AnsiTypeCoercion.ImplicitTypeCasts.transform
+  )
+  private val typeCoercionResolver =
+    new TypeCoercionResolver(timezoneAwareExpressionResolver, typeCoercionRules)
+
+  test("TypeCoercion resolution - with children reinstantiation") {
+    val expression = Add(left = doubleChild, right = integerChild)
+    val resolvedExpression = typeCoercionResolver.resolve(expression).asInstanceOf[Add]
+    // left child remains the same
+    assert(resolvedExpression.left == doubleChild)
+    // right first gets resolved to castIntegerChild. However, after the Cast gets
+    // re-resolved with timezone, it won't be equal to castIntegerChild because of re-instantiation
+    assert(resolvedExpression.right.isInstanceOf[Cast])
+    val newRightChild = resolvedExpression.right.asInstanceOf[Cast]
+    assert(newRightChild != castIntegerChild)
+    assert(newRightChild.timeZoneId.nonEmpty)
+    // not a user-specified cast
+    assert(newRightChild.getTagValue(Cast.USER_SPECIFIED_CAST).isEmpty)
+  }
+
+  test("TypeCoercion resolution - no children reinstantiation") {
+    val expression = Add(left = doubleChild, right = castIntegerChild)
+    val resolvedExpression = typeCoercionResolver.resolve(expression).asInstanceOf[Add]
+    assert(resolvedExpression.left == doubleChild)
+    // Cast that isn't a product of type coercion resolution won't be re-instantiated with timezone
+    assert(resolvedExpression.right == castIntegerChild)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index 79c6d07d6d218..645b80ffaacb8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -488,4 +488,41 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
     val data = Row(mutable.ArraySeq.make(Array(Row("key", "value".getBytes))))
     val row = encoder.createSerializer()(data)
   }
+
+  test("do not allow serializing too long strings into char/varchar") {
+    Seq(CharType(5), VarcharType(5)).foreach { typ =>
+      withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
+        val schema = new StructType().add("c", typ)
+        val encoder = ExpressionEncoder(schema).resolveAndBind()
+        val value = "abcdef"
+        checkError(
+          exception = intercept[SparkRuntimeException]({
+            val row = toRow(encoder, Row(value))
+          }),
+          condition = "EXCEED_LIMIT_LENGTH",
+          parameters = Map("limit" -> "5")
+        )
+      }
+    }
+  }
+
+  test("do not allow deserializing too long strings into char/varchar") {
+    Seq(CharType(5), VarcharType(5)).foreach { typ =>
+      withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
+        val fromSchema = new StructType().add("c", StringType)
+        val fromEncoder = ExpressionEncoder(fromSchema).resolveAndBind()
+        val toSchema = new StructType().add("c", typ)
+        val toEncoder = ExpressionEncoder(toSchema).resolveAndBind()
+        val value = "abcdef"
+        val row = toRow(fromEncoder, Row(value))
+        checkError(
+          exception = intercept[SparkRuntimeException]({
+            val value = fromRow(toEncoder, row)
+          }),
+          condition = "EXCEED_LIMIT_LENGTH",
+          parameters = Map("limit" -> "5")
+        )
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
index 7e545d3321054..e0d3a176b1a43 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
@@ -479,4 +479,17 @@ class CanonicalizeSuite extends SparkFunSuite {
         }
     }
   }
+
+  test("unit test for gatherCommutative()") {
+    val addExpression = Add(
+      Literal(1),
+      Add(
+        Literal(2),
+        Literal(3)
+      )
+    )
+    val commutativeExpressions = addExpression.gatherCommutative(addExpression,
+      { case Add(l, r, _) => Seq(l, r)})
+    assert(commutativeExpressions == Seq(Literal(1), Literal(2), Literal(3)))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index e87b54339821f..cec49a5ae1de0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -729,6 +729,8 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       assert(Cast.canUpCast(DateType, TimestampNTZType))
       assert(Cast.canUpCast(TimestampType, TimestampNTZType))
       assert(Cast.canUpCast(TimestampNTZType, TimestampType))
+      assert(Cast.canUpCast(IntegerType, StringType("UTF8_LCASE")))
+      assert(Cast.canUpCast(CalendarIntervalType, StringType("UTF8_LCASE")))
       assert(!Cast.canUpCast(TimestampType, DateType))
       assert(!Cast.canUpCast(TimestampNTZType, DateType))
     }
@@ -1013,6 +1015,13 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("allow type conversions between calendar interval type and char/varchar types") {
+    Seq(CharType(10), VarcharType(10))
+      .foreach { typ =>
+        assert(cast(Literal.default(CalendarIntervalType), typ).checkInputDataTypes().isSuccess)
+    }
+  }
+
   test("SPARK-35720: cast string to timestamp without timezone") {
     specialTs.foreach { s =>
       val expectedTs = LocalDateTime.parse(s)
@@ -1409,4 +1418,43 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     assert(!Cast(timestampLiteral, TimestampNTZType).resolved)
     assert(!Cast(timestampNTZLiteral, TimestampType).resolved)
   }
+
+  test("Casting between TimestampType and StringType requires timezone") {
+    val timestampLiteral = Literal.create(1L, TimestampType)
+    assert(!Cast(timestampLiteral, StringType).resolved)
+    assert(!Cast(timestampLiteral, StringType("UTF8_LCASE")).resolved)
+  }
+
+  test(s"Casting from char/varchar") {
+    Seq(CharType(10), VarcharType(10)).foreach { typ =>
+      Seq(
+        IntegerType -> ("123", 123),
+        LongType -> ("123 ", 123L),
+        BooleanType -> ("true ", true),
+        BooleanType -> ("false", false),
+        DoubleType -> ("1.2", 1.2)
+      ).foreach { case (toType, (from, to)) =>
+        checkEvaluation(cast(Literal.create(from, typ), toType), to)
+      }
+    }
+  }
+
+  test("Casting to char/varchar") {
+    Seq(CharType(10), VarcharType(10)).foreach { typ =>
+      Seq(
+        IntegerType -> (123, "123"),
+        LongType -> (123L, "123"),
+        BooleanType -> (true, "true"),
+        BooleanType -> (false, "false"),
+        DoubleType -> (1.2, "1.2")
+      ).foreach { case (fromType, (from, to)) =>
+        val paddedTo = if (typ.isInstanceOf[CharType]) {
+          to.padTo(10, ' ')
+        } else {
+          to
+        }
+        checkEvaluation(cast(Literal.create(from, fromType), typ), paddedTo)
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
index 77a3d6df69221..9e9eeaf2f80d0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
@@ -17,67 +17,63 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CollationFactory, GenericArrayData}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
+  private val fullyQualifiedPrefix = s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}."
+  private val UTF8_BINARY_COLLATION_NAME = ResolvedCollation("UTF8_BINARY")
+  private val UTF8_LCASE_COLLATION_NAME = ResolvedCollation("UTF8_LCASE")
+
   test("validate default collation") {
     val collationId = CollationFactory.collationNameToId("UTF8_BINARY")
     assert(collationId == 0)
-    val collateExpr = Collate(Literal("abc"), "UTF8_BINARY")
+    val collateExpr = Collate(Literal("abc"), UTF8_BINARY_COLLATION_NAME)
     assert(collateExpr.dataType === StringType(collationId))
     assert(collateExpr.dataType.asInstanceOf[StringType].collationId == 0)
     checkEvaluation(collateExpr, "abc")
   }
 
   test("collate against literal") {
-    val collateExpr = Collate(Literal("abc"), "UTF8_LCASE")
+    val collateExpr = Collate(Literal("abc"), UTF8_LCASE_COLLATION_NAME)
     val collationId = CollationFactory.collationNameToId("UTF8_LCASE")
     assert(collateExpr.dataType === StringType(collationId))
     checkEvaluation(collateExpr, "abc")
   }
 
   test("check input types") {
-    val collateExpr = Collate(Literal("abc"), "UTF8_BINARY")
+    val collateExpr = Collate(Literal("abc"), UTF8_BINARY_COLLATION_NAME)
     assert(collateExpr.checkInputDataTypes().isSuccess)
 
     val collateExprExplicitDefault =
-      Collate(Literal.create("abc", StringType(0)), "UTF8_BINARY")
+      Collate(Literal.create("abc", StringType(0)), UTF8_BINARY_COLLATION_NAME)
     assert(collateExprExplicitDefault.checkInputDataTypes().isSuccess)
 
     val collateExprExplicitNonDefault =
-      Collate(Literal.create("abc", StringType(1)), "UTF8_BINARY")
+      Collate(Literal.create("abc", StringType(1)), UTF8_BINARY_COLLATION_NAME)
     assert(collateExprExplicitNonDefault.checkInputDataTypes().isSuccess)
 
-    val collateOnNull = Collate(Literal.create(null, StringType(1)), "UTF8_BINARY")
+    val collateOnNull = Collate(Literal.create(null, StringType(1)), UTF8_BINARY_COLLATION_NAME)
     assert(collateOnNull.checkInputDataTypes().isSuccess)
 
-    val collateOnInt = Collate(Literal(1), "UTF8_BINARY")
+    val collateOnInt = Collate(Literal(1), UTF8_BINARY_COLLATION_NAME)
     assert(collateOnInt.checkInputDataTypes().isFailure)
   }
 
-  test("collate on non existing collation") {
-    checkError(
-      exception = intercept[SparkException] { Collate(Literal("abc"), "UTF8_BS") },
-      condition = "COLLATION_INVALID_NAME",
-      sqlState = "42704",
-      parameters = Map("collationName" -> "UTF8_BS", "proposals" -> "UTF8_LCASE"))
-  }
-
   test("collation on non-explicit default collation") {
-    checkEvaluation(Collation(Literal("abc")), "UTF8_BINARY")
+    checkEvaluation(Collation(Literal("abc")), fullyQualifiedPrefix + "UTF8_BINARY")
   }
 
   test("collation on explicitly collated string") {
     checkEvaluation(
       Collation(Literal.create("abc",
         StringType(CollationFactory.UTF8_LCASE_COLLATION_ID))),
-      "UTF8_LCASE")
+      fullyQualifiedPrefix + "UTF8_LCASE")
     checkEvaluation(
-      Collation(Collate(Literal("abc"), "UTF8_LCASE")),
-      "UTF8_LCASE")
+      Collation(Collate(Literal("abc"), UTF8_LCASE_COLLATION_NAME)),
+      fullyQualifiedPrefix + "UTF8_LCASE")
   }
 
   test("Array operations on arrays of collated strings") {
@@ -222,7 +218,7 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     ).foreach {
       case (collation, normalized) =>
         checkEvaluation(Collation(Literal.create("abc", StringType(collation))),
-          normalized)
+          fullyQualifiedPrefix + normalized)
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 5cd974838fa24..09650a0dcc022 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1893,26 +1893,26 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("SPARK-38195: add a quantity of interval units to a timestamp") {
     // Check case-insensitivity
     checkEvaluation(
-      TimestampAdd("Hour", Literal(1), Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
+      TimestampAdd("Hour", Literal(1L), Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
       LocalDateTime.of(2022, 2, 15, 13, 57, 0))
     // Check nulls as input values
     checkEvaluation(
       TimestampAdd(
         "MINUTE",
-        Literal.create(null, IntegerType),
+        Literal.create(null, LongType),
         Literal(LocalDateTime.of(2022, 2, 15, 12, 57, 0))),
       null)
     checkEvaluation(
       TimestampAdd(
         "MINUTE",
-        Literal(1),
+        Literal(1L),
         Literal.create(null, TimestampType)),
       null)
     // Check crossing the daylight saving time
     checkEvaluation(
       TimestampAdd(
         "HOUR",
-        Literal(6),
+        Literal(6L),
         Literal(Instant.parse("2022-03-12T23:30:00Z")),
         Some("America/Los_Angeles")),
       Instant.parse("2022-03-13T05:30:00Z"))
@@ -1920,7 +1920,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(
       TimestampAdd(
         "DAY",
-        Literal(2),
+        Literal(2L),
         Literal(LocalDateTime.of(2020, 2, 28, 10, 11, 12)),
         Some("America/Los_Angeles")),
       LocalDateTime.of(2020, 3, 1, 10, 11, 12))
@@ -1940,7 +1940,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
                 quantity,
                 timestamp,
                 Some(tz)),
-            IntegerType, tsType)
+            LongType, tsType)
         }
       }
     }
@@ -1961,84 +1961,127 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
       // timestampadd(DAY, 1, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
       checkEvaluation(
-        TimestampAdd("DAY", Literal(1), Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        TimestampAdd("DAY", Literal(1L),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
         skippedTime)
       // timestampadd(HOUR, 24, 2011-03-12 03:00:00) = 2011-03-13 04:00:00
       checkEvaluation(
-        TimestampAdd("HOUR", Literal(24),
+        TimestampAdd("HOUR", Literal(24L),
           Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
         skippedTime + MICROS_PER_HOUR)
       // timestampadd(HOUR, 23, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
       checkEvaluation(
-        TimestampAdd("HOUR", Literal(23),
+        TimestampAdd("HOUR", Literal(23L),
           Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
         skippedTime)
       // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 2011-03-13 04:00:00
       checkEvaluation(
         TimestampAdd(
-          "SECOND", Literal(SECONDS_PER_DAY.toInt),
+          "SECOND", Literal(SECONDS_PER_DAY),
           Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
         skippedTime + MICROS_PER_HOUR)
       // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 2011-03-13 03:59:59
       checkEvaluation(
         TimestampAdd(
-          "SECOND", Literal(SECONDS_PER_DAY.toInt - 1),
+          "SECOND", Literal(SECONDS_PER_DAY - 1),
           Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
         skippedTime + MICROS_PER_HOUR - MICROS_PER_SECOND)
 
       // timestampadd(DAY, 1, 2011-11-05 02:00:00) = 2011-11-06 02:00:00
       checkEvaluation(
-        TimestampAdd("DAY", Literal(1),
+        TimestampAdd("DAY", Literal(1L),
           Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
         repeatedTime + MICROS_PER_HOUR)
       // timestampadd(DAY, 1, 2011-11-05 01:00:00) = 2011-11-06 01:00:00 (pre-transition)
       checkEvaluation(
-        TimestampAdd("DAY", Literal(1),
+        TimestampAdd("DAY", Literal(1L),
           Literal(repeatedTime - 25 * MICROS_PER_HOUR, TimestampType)),
         repeatedTime - MICROS_PER_HOUR)
       // timestampadd(DAY, -1, 2011-11-07 01:00:00) = 2011-11-06 01:00:00 (post-transition)
       checkEvaluation(
-        TimestampAdd("DAY", Literal(-1),
+        TimestampAdd("DAY", Literal(-1L),
           Literal(repeatedTime + 24 * MICROS_PER_HOUR, TimestampType)),
         repeatedTime)
       // timestampadd(MONTH, 1, 2011-10-06 01:00:00) = 2011-11-06 01:00:00 (pre-transition)
       checkEvaluation(
         TimestampAdd(
-          "MONTH", Literal(1),
+          "MONTH", Literal(1L),
           Literal(repeatedTime - MICROS_PER_HOUR - 31 * MICROS_PER_DAY, TimestampType)),
         repeatedTime - MICROS_PER_HOUR)
       // timestampadd(MONTH, -1, 2011-12-06 01:00:00) = 2011-11-06 01:00:00 (post-transition)
       checkEvaluation(
         TimestampAdd(
-          "MONTH", Literal(-1),
+          "MONTH", Literal(-1L),
           Literal(repeatedTime + 30 * MICROS_PER_DAY, TimestampType)),
         repeatedTime)
       // timestampadd(HOUR, 23, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 (pre-transition)
       checkEvaluation(
-        TimestampAdd("HOUR", Literal(23),
+        TimestampAdd("HOUR", Literal(23L),
           Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
         repeatedTime - MICROS_PER_HOUR)
       // timestampadd(HOUR, 24, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 (post-transition)
       checkEvaluation(
-        TimestampAdd("HOUR", Literal(24),
+        TimestampAdd("HOUR", Literal(24L),
           Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
         repeatedTime)
     }
   }
 
+  test("SPARK-50669: timestampadd with long types") {
+    // A value that is larger than Int.MaxValue.
+    val longValue = 10_000_000_000L
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      checkEvaluation(
+        TimestampAdd("MICROSECOND", Literal(longValue), Literal(0L, TimestampType)),
+        longValue)
+      checkEvaluation(
+        TimestampAdd("MILLISECOND", Literal(longValue), Literal(0L, TimestampType)),
+        longValue * MICROS_PER_MILLIS)
+      checkEvaluation(
+        TimestampAdd("SECOND", Literal(longValue), Literal(0L, TimestampType)),
+        longValue * MICROS_PER_SECOND)
+      checkEvaluation(
+        TimestampAdd("MINUTE", Literal(longValue), Literal(0L, TimestampType)),
+        longValue * MICROS_PER_MINUTE)
+
+      // Add a smaller value so overflow doesn't happen.
+      val valueToAdd = 1_000L
+      checkEvaluation(
+        TimestampAdd("HOUR", Literal(valueToAdd), Literal(0L, TimestampType)),
+        valueToAdd * MICROS_PER_HOUR)
+      checkEvaluation(
+        TimestampAdd("DAY", Literal(valueToAdd), Literal(0L, TimestampType)),
+        valueToAdd * MICROS_PER_DAY)
+      checkEvaluation(
+        TimestampAdd("WEEK", Literal(valueToAdd), Literal(0L, TimestampType)),
+        valueToAdd * MICROS_PER_DAY * DAYS_PER_WEEK)
+
+      // Make sure overflow are thrown for larger values.
+      val overflowVal = Long.MaxValue
+      Seq("MILLISECOND", "SECOND", "MINUTE", "HOUR", "DAY", "WEEK").foreach { interval =>
+        checkErrorInExpression[SparkArithmeticException](TimestampAdd(interval,
+          Literal(overflowVal),
+          Literal(0L, TimestampType)),
+          condition = "DATETIME_OVERFLOW",
+          parameters = Map("operation" ->
+            s"add ${overflowVal}L $interval to TIMESTAMP '1970-01-01 00:00:00'"))
+      }
+    }
+  }
+
   test("SPARK-42635: timestampadd unit conversion overflow") {
     withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
       checkErrorInExpression[SparkArithmeticException](TimestampAdd("DAY",
-        Literal(106751992),
+        Literal(106751992L),
         Literal(0L, TimestampType)),
         condition = "DATETIME_OVERFLOW",
-        parameters = Map("operation" -> "add 106751992 DAY to TIMESTAMP '1970-01-01 00:00:00'"))
+        parameters = Map("operation" -> "add 106751992L DAY to TIMESTAMP '1970-01-01 00:00:00'"))
       checkErrorInExpression[SparkArithmeticException](TimestampAdd("QUARTER",
-        Literal(1431655764),
+        Literal(1431655764L),
         Literal(0L, TimestampType)),
         condition = "DATETIME_OVERFLOW",
         parameters = Map("operation" ->
-          "add 1431655764 QUARTER to TIMESTAMP '1970-01-01 00:00:00'"))
+          "add 1431655764L QUARTER to TIMESTAMP '1970-01-01 00:00:00'"))
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
index 92ef24bb8ec63..019c953a3b0ac 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
@@ -625,8 +625,8 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       val s1 = "aaa"
       val s2 = "AAA"
 
-      val murmur3Hash1 = Murmur3Hash(Seq(Collate(Literal(s1), collation)), 42)
-      val murmur3Hash2 = Murmur3Hash(Seq(Collate(Literal(s2), collation)), 42)
+      val murmur3Hash1 = Murmur3Hash(Seq(Collate(Literal(s1), ResolvedCollation(collation))), 42)
+      val murmur3Hash2 = Murmur3Hash(Seq(Collate(Literal(s2), ResolvedCollation(collation))), 42)
 
       // Interpreted hash values for s1 and s2
       val interpretedHash1 = murmur3Hash1.eval()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index 3a58cb92cecf2..0ec1a93b5cd29 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -273,8 +273,9 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
   }
 
   test("json_tuple escaping") {
-    GenerateUnsafeProjection.generate(
-      JsonTuple(Literal("\"quote") ::  Literal("\"quote") :: Nil) :: Nil)
+    checkJsonTuple(
+      JsonTuple(Literal("\"quote") ::  Literal("\"quote") :: Nil),
+      InternalRow.fromSeq(Seq(null).map(UTF8String.fromString)))
   }
 
   test("json_tuple - hive key 1") {
@@ -420,7 +421,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
   test("from_json escaping") {
     val schema = StructType(StructField("\"quote", IntegerType) :: Nil)
     GenerateUnsafeProjection.generate(
-      JsonToStructs(schema, Map.empty, Literal("\"quote"), UTC_OPT).replacement :: Nil)
+      JsonToStructs(schema, Map.empty, Literal("\"quote"), UTC_OPT) :: Nil)
   }
 
   test("from_json") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index b351d69d3a0bb..5da5c6ac412cc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -25,13 +25,12 @@ import java.util.TimeZone
 import scala.collection.mutable
 import scala.reflect.runtime.universe.TypeTag
 
-import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.{SparkFunSuite, SparkRuntimeException}
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection}
 import org.apache.spark.sql.catalyst.encoders.ExamplePointUDT
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLType
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType._
@@ -91,16 +90,8 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     // ExamplePointUDT.sqlType is ArrayType(DoubleType, false).
     checkEvaluation(Literal.default(new ExamplePointUDT), Array())
 
-    // DateType without default value`
-    List(CharType(1), VarcharType(1)).foreach(errType => {
-      checkError(
-        exception = intercept[SparkException] {
-          Literal.default(errType)
-        },
-        condition = "INTERNAL_ERROR",
-        parameters = Map("message" -> s"No default value for type: ${toSQLType(errType)}.")
-      )
-    })
+    checkEvaluation(Literal.default(CharType(5)), "     ")
+    checkEvaluation(Literal.default(VarcharType(5)), "")
   }
 
   test("boolean literals") {
@@ -160,6 +151,42 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Literal.create("\u0000"), "\u0000")
   }
 
+  test("char literals") {
+    withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
+      val typ = CharType(5)
+      checkEvaluation(Literal.create("", typ), "     ")
+      checkEvaluation(Literal.create("test", typ), "test ")
+      checkEvaluation(Literal.create("test      ", typ), "test ")
+      checkEvaluation(Literal.create("\u0000", typ), "\u0000    ")
+
+      checkError(
+        exception = intercept[SparkRuntimeException]({
+          Literal.create("123456", typ)
+        }),
+        condition = "EXCEED_LIMIT_LENGTH",
+        parameters = Map("limit" -> "5")
+      )
+    }
+  }
+
+  test("varchar literals") {
+    withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
+      val typ = VarcharType(5)
+      checkEvaluation(Literal.create("", typ), "")
+      checkEvaluation(Literal.create("test", typ), "test")
+      checkEvaluation(Literal.create("test     ", typ), "test ")
+      checkEvaluation(Literal.create("\u0000", typ), "\u0000")
+
+      checkError(
+        exception = intercept[SparkRuntimeException]({
+          Literal.create("123456", typ)
+        }),
+        condition = "EXCEED_LIMIT_LENGTH",
+        parameters = Map("limit" -> "5")
+      )
+    }
+  }
+
   test("sum two literals") {
     checkEvaluation(Add(Literal(1), Literal(1)), 2)
     checkEvaluation(Add(Literal.create(1), Literal.create(1)), 2)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index a0c75b703ade4..4a7bf807d1de9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -674,4 +674,14 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkInAndInSet(In(Literal(Double.NaN),
       Seq(Literal(Double.NaN), Literal(2d), Literal.create(null, DoubleType))), true)
   }
+
+  test("In and InSet logging limits") {
+    assert(In(Literal(1), Seq(Literal(1), Literal(2))).simpleString(1)
+      === "1 IN (1,... 1 more fields)")
+    assert(In(Literal(1), Seq(Literal(1), Literal(2))).simpleString(2) === "1 IN (1,2)")
+    assert(In(Literal(1), Seq(Literal(1))).simpleString(1) === "1 IN (1)")
+    assert(InSet(Literal(1), Set(1, 2)).simpleString(1) === "1 INSET 1, ... 1 more fields")
+    assert(InSet(Literal(1), Set(1, 2)).simpleString(2) === "1 INSET 1, 2")
+    assert(InSet(Literal(1), Set(1)).simpleString(1) === "1 INSET 1")
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
index 2d58d9d3136aa..9e6b59b51138d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
@@ -52,7 +52,6 @@ class RandomSuite extends SparkFunSuite with ExpressionEvalHelper {
     testRandStr(1, "c")
     testRandStr(5, "ceV0P")
     testRandStr(10, "ceV0PXaR2I")
-    testRandStr(10L, "ceV0PXaR2I")
 
     def testUniform(first: Any, second: Any, result: Any): Unit = {
       checkEvaluationWithoutCodegen(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala
index 783fba3bfc0df..2a5f76cab3619 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.UTC_OPT
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
 
@@ -89,6 +90,9 @@ class ToPrettyStringSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("Char as pretty strings") {
     checkEvaluation(ToPrettyString(Literal.create('a', CharType(5))), "a")
+    withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
+      checkEvaluation(ToPrettyString(Literal.create('a', CharType(5))), "a    ")
+    }
   }
 
   test("Byte as pretty strings") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSparkSubmitSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSparkSubmitSuite.scala
index 891e2d048b7a8..b0ed1ecabb8d6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSparkSubmitSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSparkSubmitSuite.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.catalyst.expressions.codegen
 
 import org.scalatest.{Assertions, BeforeAndAfterEach}
+import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
 import org.scalatest.matchers.must.Matchers
+import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.{SparkIllegalArgumentException, TestUtils}
 import org.apache.spark.deploy.SparkSubmitTestUtils
@@ -46,7 +48,10 @@ class BufferHolderSparkSubmitSuite
       "--conf", "spark.master.rest.enabled=false",
       "--conf", "spark.driver.extraJavaOptions=-ea",
       unusedJar.toString)
-    runSparkSubmit(argsForSparkSubmit)
+    // Given that the default timeout of runSparkSubmit is 60 seconds, try 3 times in total.
+    eventually(timeout(210.seconds), interval(70.seconds)) {
+      runSparkSubmit(argsForSparkSubmit)
+    }
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonParserSuite.scala
index 587e22e787b87..89cdd38a3e7b4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JacksonParserSuite.scala
@@ -24,6 +24,19 @@ import org.apache.spark.sql.types.StructType
 import org.apache.spark.unsafe.types.UTF8String
 
 class JacksonParserSuite extends SparkFunSuite {
+  test("feature mask should remain unchanged") {
+    val options = new JSONOptions(Map.empty[String, String], "GMT", "")
+    val parser = new JacksonParser(StructType.fromDDL("a string"), options, false, Nil)
+    val input = """{"a": {"b": 1}}""".getBytes
+    // The creating function is usually called inside `parser.parse`, but we need the JSON parser
+    // here for testing purpose.
+    val jsonParser = options.buildJsonFactory().createParser(input)
+    val oldFeature = jsonParser.getFeatureMask
+    val result = parser.parse[Array[Byte]](input, (_, _) => jsonParser, UTF8String.fromBytes)
+    assert(result === Seq(InternalRow(UTF8String.fromString("""{"b": 1}"""))))
+    assert(jsonParser.getFeatureMask == oldFeature)
+  }
+
   test("skipping rows using pushdown filters") {
     def check(
       input: String = """{"i":1, "s": "a"}""",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index fc2697d55f6d0..4cc2ee99284a5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -46,7 +46,7 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper {
     $"e".boolean, $"f".boolean, $"g".boolean, $"h".boolean)
 
   val testRelationWithData = LocalRelation.fromExternalRows(
-    testRelation.output, Seq(Row(1, 2, 3, "abc"))
+    testRelation.output, Seq(Row(1, 2, 3, "abc", true, true, true, true))
   )
 
   val testNotNullableRelation = LocalRelation($"a".int.notNull, $"b".int.notNull, $"c".int.notNull,
@@ -54,7 +54,7 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper {
     $"h".boolean.notNull)
 
   val testNotNullableRelationWithData = LocalRelation.fromExternalRows(
-    testNotNullableRelation.output, Seq(Row(1, 2, 3, "abc"))
+    testNotNullableRelation.output, Seq(Row(1, 2, 3, "abc", true, true, true, true))
   )
 
   private def checkCondition(input: Expression, expected: LogicalPlan): Unit = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
index 02631c4cf61c9..2dcab5cfd29c4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.Add
+import org.apache.spark.sql.catalyst.expressions.{Add, GenericInternalRow}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
@@ -189,7 +189,9 @@ class LimitPushdownSuite extends PlanTest {
   }
 
   test("full outer join where neither side is limited and left side has larger statistics") {
-    val xBig = testRelation.copy(data = Seq.fill(10)(null)).subquery("x")
+    val nulls = new GenericInternalRow(
+      Seq.fill(testRelation.output.length)(null).toArray.asInstanceOf[Array[Any]])
+    val xBig = testRelation.copy(data = Seq.fill(10)(nulls)).subquery("x")
     assert(xBig.stats.sizeInBytes > y.stats.sizeInBytes)
     Seq(Some("x.a".attr === "y.b".attr), None).foreach { condition =>
       val originalQuery = xBig.join(y, FullOuter, condition).limit(1).analyze
@@ -204,7 +206,9 @@ class LimitPushdownSuite extends PlanTest {
   }
 
   test("full outer join where neither side is limited and right side has larger statistics") {
-    val yBig = testRelation.copy(data = Seq.fill(10)(null)).subquery("y")
+    val nulls = new GenericInternalRow(
+      Seq.fill(testRelation.output.length)(null).toArray.asInstanceOf[Array[Any]])
+    val yBig = testRelation.copy(data = Seq.fill(10)(nulls)).subquery("y")
     assert(x.stats.sizeInBytes < yBig.stats.sizeInBytes)
     Seq(Some("x.a".attr === "y.b".attr), None).foreach { condition =>
       val originalQuery = x.join(yBig, FullOuter, condition).limit(1).analyze
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
index 7af2be2db01d1..eed06da609f8e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
@@ -292,7 +292,7 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
 
     Seq("""{"a":1, "b":2, "c": 123, "d": "test"}""", null).foreach(v => {
       val row = create_row(v)
-      checkEvaluation(e1, replace(e2).eval(row), row)
+      checkEvaluation(e1, e2.eval(row), row)
     })
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
index ac10fbfa3a3ee..95b55797b294c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
@@ -21,6 +21,7 @@ import org.apache.logging.log4j.Level
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.InSet
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -152,4 +153,14 @@ class OptimizerLoggingSuite extends PlanTest {
       verifyLog(Level.INFO, Seq("Batch Has No Effect"))
     }
   }
+
+  test("SPARK-50329: toString for InSet should be valid for unresolved plan") {
+    val input = LocalRelation($"a".int, $"b".string, $"c".double)
+    val inSetPredicate = InSet($"a", Set(1, 2))
+    val query = input.select($"a", $"b").where(inSetPredicate)
+    val analyzed = query.analyze
+
+    assert(query.toString.contains("'a INSET (values with unresolved data types)"))
+    assert(analyzed.toString.contains("INSET 1, 2"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReorderAssociativeOperatorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReorderAssociativeOperatorSuite.scala
index 7733e58547fe0..69c303d4773b4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReorderAssociativeOperatorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReorderAssociativeOperatorSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.Count
 import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.internal.SQLConf
 
 class ReorderAssociativeOperatorSuite extends PlanTest {
 
@@ -109,15 +110,17 @@ class ReorderAssociativeOperatorSuite extends PlanTest {
   }
 
   test("SPARK-50380: conditional branches with error expression") {
-    val originalQuery1 = testRelation.select(If($"a" === 1, 1L, Literal(1).div(0) + $"b")).analyze
-    val optimized1 = Optimize.execute(originalQuery1)
-    comparePlans(optimized1, originalQuery1)
-
-    val originalQuery2 = testRelation.select(
-      If($"a" === 1, 1, ($"b" + Literal(Int.MaxValue)) + 1).as("col")).analyze
-    val optimized2 = Optimize.execute(originalQuery2)
-    val correctAnswer2 = testRelation.select(
-      If($"a" === 1, 1, $"b" + (Literal(Int.MaxValue) + 1)).as("col")).analyze
-    comparePlans(optimized2, correctAnswer2)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> true.toString) {
+      val originalQuery1 = testRelation.select(If($"a" === 1, 1L, Literal(1).div(0) + $"b")).analyze
+      val optimized1 = Optimize.execute(originalQuery1)
+      comparePlans(optimized1, originalQuery1)
+
+      val originalQuery2 = testRelation.select(
+        If($"a" === 1, 1, ($"b" + Literal(Int.MaxValue)) + 1).as("col")).analyze
+      val optimized2 = Optimize.execute(originalQuery2)
+      val correctAnswer2 = testRelation.select(
+        If($"a" === 1, 1, $"b" + (Literal(Int.MaxValue) + 1)).as("col")).analyze
+      comparePlans(optimized2, correctAnswer2)
+    }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpressionSuite.scala
index 0aeca961aa513..8918b58ca1b56 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpressionSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.analysis.TempResolvedColumn
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -29,7 +28,7 @@ import org.apache.spark.sql.catalyst.rules.RuleExecutor
 class RewriteWithExpressionSuite extends PlanTest {
 
   object Optimizer extends RuleExecutor[LogicalPlan] {
-    val batches = Batch("Rewrite With expression", Once,
+    val batches = Batch("Rewrite With expression", FixedPoint(5),
       PullOutGroupingExpressions,
       RewriteWithExpression) :: Nil
   }
@@ -84,13 +83,11 @@ class RewriteWithExpressionSuite extends PlanTest {
       ref * ref
     }
 
-    val plan = testRelation.select(outerExpr.as("col"))
     comparePlans(
-      Optimizer.execute(plan),
+      Optimizer.execute(testRelation.select(outerExpr.as("col"))),
       testRelation
-        .select((testRelation.output :+ (a + a).as("_common_expr_0")): _*)
-        .select((testRelation.output ++ Seq($"_common_expr_0",
-          ($"_common_expr_0" + $"_common_expr_0" + b).as("_common_expr_1"))): _*)
+        .select(star(), (a + a).as("_common_expr_0"))
+        .select(a, b, ($"_common_expr_0" + $"_common_expr_0" + b).as("_common_expr_1"))
         .select(($"_common_expr_1" * $"_common_expr_1").as("col"))
         .analyze
     )
@@ -104,42 +101,61 @@ class RewriteWithExpressionSuite extends PlanTest {
     val outerExpr = With(b + b) { case Seq(ref) =>
       ref * ref + innerExpr
     }
-
-    val plan = testRelation.select(outerExpr.as("col"))
-    val rewrittenInnerExpr = (a + a).as("_common_expr_0")
-    val rewrittenOuterExpr = (b + b).as("_common_expr_1")
-    val finalExpr = rewrittenOuterExpr.toAttribute * rewrittenOuterExpr.toAttribute +
-      (rewrittenInnerExpr.toAttribute + rewrittenInnerExpr.toAttribute)
+    val finalExpr = $"_common_expr_1" * $"_common_expr_1" + ($"_common_expr_0" + $"_common_expr_0")
     comparePlans(
-      Optimizer.execute(plan),
+      Optimizer.execute(testRelation.select(outerExpr.as("col"))),
       testRelation
-        .select((testRelation.output :+ rewrittenInnerExpr): _*)
-        .select((testRelation.output :+ rewrittenInnerExpr.toAttribute :+ rewrittenOuterExpr): _*)
+        .select(star(), (b + b).as("_common_expr_1"))
+        .select(star(), (a + a).as("_common_expr_0"))
         .select(finalExpr.as("col"))
         .analyze
     )
   }
 
-  test("correlated nested WITH expression is not supported") {
+  test("correlated nested WITH expression is supported") {
     val Seq(a, b) = testRelation.output
     val outerCommonExprDef = CommonExpressionDef(b + b, CommonExpressionId(0))
     val outerRef = new CommonExpressionRef(outerCommonExprDef)
+    val rewrittenOuterExpr = (b + b).as("_common_expr_0")
 
     // The inner expression definition references the outer expression
     val commonExprDef1 = CommonExpressionDef(a + a + outerRef, CommonExpressionId(1))
     val ref1 = new CommonExpressionRef(commonExprDef1)
     val innerExpr1 = With(ref1 + ref1, Seq(commonExprDef1))
-
     val outerExpr1 = With(outerRef + innerExpr1, Seq(outerCommonExprDef))
-    intercept[SparkException](Optimizer.execute(testRelation.select(outerExpr1.as("col"))))
+    comparePlans(
+      Optimizer.execute(testRelation.select(outerExpr1.as("col"))),
+      testRelation
+        // The first Project contains the common expression of the outer With
+        .select(star(), rewrittenOuterExpr)
+        // The second Project contains the common expression of the inner With, which references
+        // the common expression of the outer With.
+        .select(star(), (a + a + $"_common_expr_0").as("_common_expr_1"))
+        // The final Project contains the final result expression, which references both common
+        // expressions.
+        .select(($"_common_expr_0" + ($"_common_expr_1" + $"_common_expr_1")).as("col"))
+        .analyze
+    )
 
-    val commonExprDef2 = CommonExpressionDef(a + a)
+    val commonExprDef2 = CommonExpressionDef(a + a, CommonExpressionId(2))
     val ref2 = new CommonExpressionRef(commonExprDef2)
     // The inner main expression references the outer expression
-    val innerExpr2 = With(ref2 + outerRef, Seq(commonExprDef1))
-
+    val innerExpr2 = With(ref2 + ref2 + outerRef, Seq(commonExprDef2))
     val outerExpr2 = With(outerRef + innerExpr2, Seq(outerCommonExprDef))
-    intercept[SparkException](Optimizer.execute(testRelation.select(outerExpr2.as("col"))))
+    comparePlans(
+      Optimizer.execute(testRelation.select(outerExpr2.as("col"))),
+      testRelation
+        // The first Project contains the common expression of the outer With
+        .select(star(), rewrittenOuterExpr)
+        // The second Project contains the common expression of the inner With, which does not
+        // reference the common expression of the outer With.
+        .select(star(), (a + a).as("_common_expr_2"))
+        // The final Project contains the final result expression, which references both common
+        // expressions.
+        .select(($"_common_expr_0" +
+          ($"_common_expr_2" + $"_common_expr_2" + $"_common_expr_0")).as("col"))
+        .analyze
+    )
   }
 
   test("WITH expression in filter") {
@@ -389,17 +405,16 @@ class RewriteWithExpressionSuite extends PlanTest {
       Optimizer.execute(plan),
       testRelation
         .select(a, b, (b + 2).as("_common_expr_0"))
-        .select(a, b, $"_common_expr_0", (b + 2).as("_common_expr_1"))
         .window(
           Seq(windowExpr(count(a), windowSpec(Seq($"_common_expr_0" * $"_common_expr_0"), Nil,
             frame)).as("col2")),
-          Seq($"_common_expr_1" * $"_common_expr_1"),
+          Seq($"_common_expr_0" * $"_common_expr_0"),
           Nil
         )
         .select(a, b, $"col2")
-        .select(a, b, $"col2", (a + 1).as("_common_expr_2"))
+        .select(a, b, $"col2", (a + 1).as("_common_expr_1"))
         .window(
-          Seq(windowExpr(sum($"_common_expr_2" * $"_common_expr_2"),
+          Seq(windowExpr(sum($"_common_expr_1" * $"_common_expr_1"),
             windowSpec(Seq(a), Nil, frame)).as("col3")),
           Seq(a),
           Nil
@@ -452,4 +467,37 @@ class RewriteWithExpressionSuite extends PlanTest {
       testRelation.groupBy($"b")(avg("a").as("a")).where($"a" === 1).analyze
     )
   }
+
+  test("SPARK-50679: duplicated common expressions in different With") {
+    val a = testRelation.output.head
+    val exprDef = CommonExpressionDef(a + a)
+    val exprRef = new CommonExpressionRef(exprDef)
+    val expr1 = With(exprRef * exprRef, Seq(exprDef))
+    val expr2 = With(exprRef - exprRef, Seq(exprDef))
+    val plan = testRelation.select(expr1.as("c1"), expr2.as("c2")).analyze
+    comparePlans(
+      Optimizer.execute(plan),
+      testRelation
+        .select(star(), (a + a).as("_common_expr_0"))
+        .select(
+          ($"_common_expr_0" * $"_common_expr_0").as("c1"),
+          ($"_common_expr_0" - $"_common_expr_0").as("c2"))
+        .analyze
+    )
+
+    val wrongExprDef = CommonExpressionDef(a * a, exprDef.id)
+    val wrongExprRef = new CommonExpressionRef(wrongExprDef)
+    val expr3 = With(wrongExprRef + wrongExprRef, Seq(wrongExprDef))
+    val wrongPlan = testRelation.select(expr1.as("c1"), expr3.as("c3")).analyze
+    intercept[AssertionError](Optimizer.execute(wrongPlan))
+  }
+
+  test("SPARK-50683: inline the common expression in With if used once") {
+    val a = testRelation.output.head
+    val exprDef = CommonExpressionDef(a + a)
+    val exprRef = new CommonExpressionRef(exprDef)
+    val expr = With(exprRef + 1, Seq(exprDef))
+    val plan = testRelation.select(expr.as("col"))
+    comparePlans(Optimizer.execute(plan), testRelation.select((a + a + 1).as("col")))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 5e871208698af..0ec2c80282fc2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -2655,7 +2655,7 @@ class DDLParserSuite extends AnalysisTest {
     val createTableResult =
       CreateTable(UnresolvedIdentifier(Seq("my_tab")), columnsWithDefaultValue,
         Seq.empty[Transform], UnresolvedTableSpec(Map.empty[String, String], Some("parquet"),
-         OptionList(Seq.empty), None, None, None, false), false)
+         OptionList(Seq.empty), None, None, None, None, false), false)
     // Parse the CREATE TABLE statement twice, swapping the order of the NOT NULL and DEFAULT
     // options, to make sure that the parser accepts any ordering of these options.
     comparePlans(parsePlan(
@@ -2668,7 +2668,7 @@ class DDLParserSuite extends AnalysisTest {
       "b STRING NOT NULL DEFAULT 'abc') USING parquet"),
       ReplaceTable(UnresolvedIdentifier(Seq("my_tab")), columnsWithDefaultValue,
         Seq.empty[Transform], UnresolvedTableSpec(Map.empty[String, String], Some("parquet"),
-          OptionList(Seq.empty), None, None, None, false), false))
+          OptionList(Seq.empty), None, None, None, None, false), false))
     // These ALTER TABLE statements should parse successfully.
     comparePlans(
       parsePlan("ALTER TABLE t1 ADD COLUMN x int NOT NULL DEFAULT 42"),
@@ -2828,12 +2828,12 @@ class DDLParserSuite extends AnalysisTest {
       "CREATE TABLE my_tab(a INT, b INT NOT NULL GENERATED ALWAYS AS (a+1)) USING parquet"),
       CreateTable(UnresolvedIdentifier(Seq("my_tab")), columnsWithGenerationExpr,
         Seq.empty[Transform], UnresolvedTableSpec(Map.empty[String, String], Some("parquet"),
-          OptionList(Seq.empty), None, None, None, false), false))
+          OptionList(Seq.empty), None, None, None, None, false), false))
     comparePlans(parsePlan(
       "REPLACE TABLE my_tab(a INT, b INT NOT NULL GENERATED ALWAYS AS (a+1)) USING parquet"),
       ReplaceTable(UnresolvedIdentifier(Seq("my_tab")), columnsWithGenerationExpr,
         Seq.empty[Transform], UnresolvedTableSpec(Map.empty[String, String], Some("parquet"),
-          OptionList(Seq.empty), None, None, None, false), false))
+          OptionList(Seq.empty), None, None, None, None, false), false))
     // Two generation expressions
     checkError(
       exception = parseException("CREATE TABLE my_tab(a INT, " +
@@ -2903,6 +2903,7 @@ class DDLParserSuite extends AnalysisTest {
             None,
             None,
             None,
+            None,
             false
           ),
           false
@@ -2925,6 +2926,7 @@ class DDLParserSuite extends AnalysisTest {
             None,
             None,
             None,
+            None,
             false
           ),
           false
@@ -3198,4 +3200,49 @@ class DDLParserSuite extends AnalysisTest {
       condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "INSERT OVERWRITE DIRECTORY is not supported."))
   }
+
+  test("create table with bad collation name") {
+    checkError(
+      exception = internalException("CREATE TABLE t DEFAULT COLLATION XD"),
+      condition = "COLLATION_INVALID_NAME",
+      parameters = Map("proposals" -> "id, xh, af", "collationName" -> "XD")
+    )
+  }
+
+  private val testSuppCollations =
+    Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI", "UNICODE_CI_RTRIM", "sr", "sr_CI_AI")
+
+  test("create table with default collation") {
+    testSuppCollations.foreach { collation =>
+      comparePlans(parsePlan(
+        s"CREATE TABLE t (c STRING) USING parquet DEFAULT COLLATION ${collation.toLowerCase()}"),
+        CreateTable(UnresolvedIdentifier(Seq("t")),
+          Seq(ColumnDefinition("c", StringType)),
+          Seq.empty[Transform],
+          UnresolvedTableSpec(Map.empty[String, String], Some("parquet"), OptionList(Seq.empty),
+            None, None, Some(collation), None, false), false))
+    }
+  }
+
+  test("replace table with default collation") {
+    testSuppCollations.foreach { collation =>
+      comparePlans(parsePlan(
+        s"REPLACE TABLE t (c STRING) USING parquet DEFAULT COLLATION ${collation.toLowerCase()}"),
+        ReplaceTable(UnresolvedIdentifier(Seq("t")),
+          Seq(ColumnDefinition("c", StringType)),
+          Seq.empty[Transform],
+          UnresolvedTableSpec(Map.empty[String, String], Some("parquet"), OptionList(Seq.empty),
+            None, None, Some(collation), None, false), false))
+    }
+  }
+
+  test("alter table collation") {
+    testSuppCollations.foreach { collation =>
+      comparePlans(parsePlan(
+        s"ALTER TABLE t DEFAULT COLLATION ${collation.toLowerCase()}"),
+        AlterTableCollation(UnresolvedTable(Seq("t"),
+          "ALTER TABLE ... DEFAULT COLLATION"), collation)
+      )
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index c556a92373954..9e5555c4c6c0c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -323,19 +323,9 @@ class PlanParserSuite extends AnalysisTest {
     assertEqual(
       "from db.a select b, c where d < 1", table("db", "a").where($"d" < 1).select($"b", $"c"))
     assertEqual("from a select distinct b, c", Distinct(table("a").select($"b", $"c")))
-
-    // Weird "FROM table" queries, should be invalid anyway
-    val sql1 = "from a"
-    checkError(
-      exception = parseException(sql1),
-      condition = "PARSE_SYNTAX_ERROR",
-      parameters = Map("error" -> "end of input", "hint" -> ""))
-
-    val sql2 = "from (from a union all from b) c select *"
-    checkError(
-      exception = parseException(sql2),
-      condition = "PARSE_SYNTAX_ERROR",
-      parameters = Map("error" -> "'union'", "hint" -> ""))
+    assertEqual("from a", table("a"))
+    assertEqual("from (from a union all from b) c select *",
+      table("a").union(table("b")).subquery("c").select(star()))
   }
 
   test("multi select query") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala
index 3bb84f603dc67..e129c6dbba052 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.expressions.{Alias, EqualTo, Expression, In, Literal, ScalarSubquery}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
-import org.apache.spark.sql.catalyst.plans.logical.{CaseStatement, CompoundBody, CreateVariable, IfElseStatement, IterateStatement, LeaveStatement, LoopStatement, Project, RepeatStatement, SingleStatement, WhileStatement}
+import org.apache.spark.sql.catalyst.plans.logical.{CaseStatement, CompoundBody, CreateVariable, ForStatement, IfElseStatement, IterateStatement, LeaveStatement, LoopStatement, Project, RepeatStatement, SingleStatement, WhileStatement}
 import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
 import org.apache.spark.sql.exceptions.SqlScriptingException
 import org.apache.spark.sql.internal.SQLConf
@@ -82,7 +82,7 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
       }
   }
 
-  test("empty BEGIN END block") {
+  test("empty singleCompoundStatement") {
     val sqlScriptText =
       """
         |BEGIN
@@ -91,6 +91,20 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
     assert(tree.collection.isEmpty)
   }
 
+  test("empty beginEndCompoundBlock") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        | BEGIN
+        | END;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[CompoundBody])
+    val innerBody = tree.collection.head.asInstanceOf[CompoundBody]
+    assert(innerBody.collection.isEmpty)
+  }
+
   test("multiple ; in row - should fail") {
     val sqlScriptText =
       """
@@ -439,6 +453,21 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
     assert(ifStmt.conditions.head.getText == "1=1")
   }
 
+  test("if with empty body") {
+    val sqlScriptText =
+      """BEGIN
+        | IF 1 = 1 THEN
+        | END IF;
+        |END
+      """.stripMargin
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(sqlScriptText)
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'IF'", "hint" -> ""))
+  }
+
   test("if else") {
     val sqlScriptText =
       """BEGIN
@@ -623,6 +652,21 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
     assert(whileStmt.label.contains("lbl"))
   }
 
+  test("while with empty body") {
+    val sqlScriptText =
+      """BEGIN
+        | WHILE 1 = 1 DO
+        | END WHILE;
+        |END
+      """.stripMargin
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(sqlScriptText)
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'WHILE'", "hint" -> ""))
+  }
+
   test("while with complex condition") {
     val sqlScriptText =
       """
@@ -1067,6 +1111,21 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
     assert(repeatStmt.label.contains("lbl"))
   }
 
+  test("repeat with empty body") {
+    val sqlScriptText =
+      """BEGIN
+        | REPEAT UNTIL 1 = 1
+        | END REPEAT;
+        |END
+      """.stripMargin
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(sqlScriptText)
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'1'", "hint" -> ""))
+  }
+
   test("repeat with complex condition") {
     val sqlScriptText =
       """
@@ -1176,7 +1235,6 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
       head.asInstanceOf[SingleStatement].getText == "SELECT 42")
 
     assert(whileStmt.label.contains("lbl"))
-
   }
 
   test("searched case statement") {
@@ -1198,6 +1256,22 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
     assert(caseStmt.conditions.head.getText == "1 = 1")
   }
 
+  test("searched case statement with empty body") {
+    val sqlScriptText =
+      """BEGIN
+        | CASE
+        |  WHEN 1 = 1 THEN
+        | END CASE;
+        |END
+      """.stripMargin
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(sqlScriptText)
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'CASE'", "hint" -> ""))
+  }
+
   test("searched case statement - multi when") {
     val sqlScriptText =
       """
@@ -1336,6 +1410,21 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
     checkSimpleCaseStatementCondition(caseStmt.conditions.head, _ == Literal(1), _ == Literal(1))
   }
 
+  test("simple case statement with empty body") {
+    val sqlScriptText =
+      """BEGIN
+        | CASE 1
+        |  WHEN 1 THEN
+        | END CASE;
+        |END
+      """.stripMargin
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(sqlScriptText)
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'CASE'", "hint" -> ""))
+  }
 
   test("simple case statement - multi when") {
     val sqlScriptText =
@@ -1483,6 +1572,21 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
     assert(whileStmt.label.contains("lbl"))
   }
 
+  test("loop with empty body") {
+    val sqlScriptText =
+      """BEGIN
+        | LOOP
+        | END LOOP;
+        |END
+      """.stripMargin
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(sqlScriptText)
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'LOOP'", "hint" -> ""))
+  }
+
   test("loop with if else block") {
     val sqlScriptText =
       """BEGIN
@@ -1823,6 +1927,25 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
       parameters = Map("label" -> toSQLId("l_loop")))
   }
 
+  test("unique label names: nested for loops") {
+    val sqlScriptText =
+      """BEGIN
+        |f_loop: FOR x AS SELECT 1 DO
+        |  f_loop: FOR y AS SELECT 2 DO
+        |    SELECT 1;
+        |  END FOR;
+        |END FOR;
+        |END
+      """.stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    }
+    checkError(
+      exception = exception,
+      condition = "LABEL_ALREADY_EXISTS",
+      parameters = Map("label" -> toSQLId("f_loop")))
+  }
+
   test("unique label names: begin-end block on the same level") {
     val sqlScriptText =
       """BEGIN
@@ -1858,10 +1981,13 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
         |  SELECT 4;
         |UNTIL 1=1
         |END REPEAT;
+        |lbl: FOR x AS SELECT 1 DO
+        |  SELECT 5;
+        |END FOR;
         |END
       """.stripMargin
     val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
-    assert(tree.collection.length == 4)
+    assert(tree.collection.length == 5)
     assert(tree.collection.head.isInstanceOf[CompoundBody])
     assert(tree.collection.head.asInstanceOf[CompoundBody].label.get == "lbl")
     assert(tree.collection(1).isInstanceOf[WhileStatement])
@@ -1870,6 +1996,23 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
     assert(tree.collection(2).asInstanceOf[LoopStatement].label.get == "lbl")
     assert(tree.collection(3).isInstanceOf[RepeatStatement])
     assert(tree.collection(3).asInstanceOf[RepeatStatement].label.get == "lbl")
+    assert(tree.collection(4).isInstanceOf[ForStatement])
+    assert(tree.collection(4).asInstanceOf[ForStatement].label.get == "lbl")
+  }
+
+  test("qualified label name: label cannot be qualified") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  part1.part2: BEGIN
+        |  END;
+        |END""".stripMargin
+    checkError(
+      exception = intercept[SqlScriptingException] {
+        parsePlan(sqlScriptText)
+      },
+      condition = "INVALID_LABEL_USAGE.QUALIFIED_LABEL_NAME",
+      parameters = Map("labelName" -> "PART1.PART2"))
   }
 
   test("unique label names: nested labeled scope statements") {
@@ -1879,7 +2022,9 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
         |  lbl_1: WHILE 1=1 DO
         |    lbl_2: LOOP
         |      lbl_3: REPEAT
-        |        SELECT 4;
+        |        lbl_4: FOR x AS SELECT 1 DO
+        |          SELECT 4;
+        |        END FOR;
         |      UNTIL 1=1
         |      END REPEAT;
         |    END LOOP;
@@ -1905,6 +2050,271 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
     // Repeat statement
     val repeatStatement = loopStatement.body.collection.head.asInstanceOf[RepeatStatement]
     assert(repeatStatement.label.get == "lbl_3")
+    // For statement
+    val forStatement = repeatStatement.body.collection.head.asInstanceOf[ForStatement]
+    assert(forStatement.label.get == "lbl_4")
+  }
+
+  test("for statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: FOR x AS SELECT 5 DO
+        |    SELECT 1;
+        |  END FOR;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT 5")
+    assert(forStmt.variableName.contains("x"))
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 1)
+    assert(forStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(forStmt.label.contains("lbl"))
+  }
+
+  test("for statement - empty body") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: FOR x AS SELECT 5 DO
+        |  END FOR;
+        |END""".stripMargin
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(sqlScriptText)
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'FOR'", "hint" -> ""))
+  }
+
+  test("for statement - no label") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  FOR x AS SELECT 5 DO
+        |    SELECT 1;
+        |  END FOR;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT 5")
+    assert(forStmt.variableName.contains("x"))
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 1)
+    assert(forStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    // when not explicitly set, label is random UUID
+    assert(forStmt.label.isDefined)
+  }
+
+  test("for statement - with complex subquery") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: FOR x AS SELECT c1, c2 FROM t WHERE c2 = 5 GROUP BY c1 ORDER BY c1 DO
+        |    SELECT x.c1;
+        |    SELECT x.c2;
+        |  END FOR;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT c1, c2 FROM t WHERE c2 = 5 GROUP BY c1 ORDER BY c1")
+    assert(forStmt.variableName.contains("x"))
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 2)
+    assert(forStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT x.c1")
+    assert(forStmt.body.collection(1).isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection(1).asInstanceOf[SingleStatement].getText == "SELECT x.c2")
+
+    assert(forStmt.label.contains("lbl"))
+  }
+
+  test("for statement - nested") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl1: FOR i AS SELECT 1 DO
+        |    lbl2: FOR j AS SELECT 2 DO
+        |      SELECT i + j;
+        |    END FOR lbl2;
+        |  END FOR lbl1;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT 1")
+    assert(forStmt.variableName.contains("i"))
+    assert(forStmt.label.contains("lbl1"))
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 1)
+    assert(forStmt.body.collection.head.isInstanceOf[ForStatement])
+    val nestedForStmt = forStmt.body.collection.head.asInstanceOf[ForStatement]
+
+    assert(nestedForStmt.query.isInstanceOf[SingleStatement])
+    assert(nestedForStmt.query.getText == "SELECT 2")
+    assert(nestedForStmt.variableName.contains("j"))
+    assert(nestedForStmt.label.contains("lbl2"))
+
+    assert(nestedForStmt.body.isInstanceOf[CompoundBody])
+    assert(nestedForStmt.body.collection.length == 1)
+    assert(nestedForStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedForStmt.body.collection.
+      head.asInstanceOf[SingleStatement].getText == "SELECT i + j")
+  }
+
+  test("for statement - no variable") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: FOR SELECT 5 DO
+        |    SELECT 1;
+        |  END FOR;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT 5")
+    assert(forStmt.variableName.isEmpty)
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 1)
+    assert(forStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(forStmt.label.contains("lbl"))
+  }
+
+  test("for statement - no variable - empty body") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: FOR SELECT 5 DO
+        |  END FOR;
+        |END""".stripMargin
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(sqlScriptText)
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'FOR'", "hint" -> ""))
+  }
+
+  test("for statement - no variable - no label") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  FOR SELECT 5 DO
+        |    SELECT 1;
+        |  END FOR;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT 5")
+    assert(forStmt.variableName.isEmpty)
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 1)
+    assert(forStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    // when not explicitly set, label is random UUID
+    assert(forStmt.label.isDefined)
+  }
+
+  test("for statement - no variable - with complex subquery") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: FOR SELECT c1, c2 FROM t WHERE c2 = 5 GROUP BY c1 ORDER BY c1 DO
+        |    SELECT 1;
+        |    SELECT 2;
+        |  END FOR;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT c1, c2 FROM t WHERE c2 = 5 GROUP BY c1 ORDER BY c1")
+    assert(forStmt.variableName.isEmpty)
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 2)
+    assert(forStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+    assert(forStmt.body.collection(1).isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection(1).asInstanceOf[SingleStatement].getText == "SELECT 2")
+
+    assert(forStmt.label.contains("lbl"))
+  }
+
+  test("for statement - no variable - nested") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl1: FOR SELECT 1 DO
+        |    lbl2: FOR SELECT 2 DO
+        |      SELECT 3;
+        |    END FOR lbl2;
+        |  END FOR lbl1;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT 1")
+    assert(forStmt.variableName.isEmpty)
+    assert(forStmt.label.contains("lbl1"))
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 1)
+    assert(forStmt.body.collection.head.isInstanceOf[ForStatement])
+    val nestedForStmt = forStmt.body.collection.head.asInstanceOf[ForStatement]
+
+    assert(nestedForStmt.query.isInstanceOf[SingleStatement])
+    assert(nestedForStmt.query.getText == "SELECT 2")
+    assert(nestedForStmt.variableName.isEmpty)
+    assert(nestedForStmt.label.contains("lbl2"))
+
+    assert(nestedForStmt.body.isInstanceOf[CompoundBody])
+    assert(nestedForStmt.body.collection.length == 1)
+    assert(nestedForStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedForStmt.body.collection.
+      head.asInstanceOf[SingleStatement].getText == "SELECT 3")
   }
 
   // Helper methods
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/NormalizePlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/NormalizePlanSuite.scala
new file mode 100644
index 0000000000000..5ff66098107c2
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/NormalizePlanSuite.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.{AssertTrue, Cast, If, Literal, TimeZoneAwareExpression}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.types.BooleanType
+
+class NormalizePlanSuite extends SparkFunSuite with SQLConfHelper {
+
+  test("Normalize InheritAnalysisRules expressions") {
+    val castWithoutTimezone =
+      Cast(child = Literal(1), dataType = BooleanType, ansiEnabled = conf.ansiEnabled)
+    val castWithTimezone = castWithoutTimezone.withTimeZone(conf.sessionLocalTimeZone)
+
+    val baselineExpression = AssertTrue(castWithTimezone)
+    val baselinePlan = LocalRelation().select(baselineExpression)
+
+    val testExpression = AssertTrue(castWithoutTimezone)
+    val testPlan = LocalRelation().select(testExpression)
+
+    // Before calling [[setTimezoneForAllExpression]], [[AssertTrue]] node will look like:
+    //
+    // AssertTrue(Cast(Literal(1)), message, If(Cast(Literal(1)), Literal(null), error))
+    //
+    // Calling [[setTimezoneForAllExpression]] will only apply timezone to the second Cast node
+    // because [[InheritAnalysisRules]] only sees replacement expression as its child. This will
+    // cause the difference when comparing [[resolvedBaselinePlan]] and [[resolvedTestPlan]],
+    // therefore we need normalization.
+
+    // Before applying timezone, no timezone is set.
+    testPlan.expressions.foreach {
+      case _ @ AssertTrue(firstCast: Cast, _, _ @ If(secondCast: Cast, _, _)) =>
+        assert(firstCast.timeZoneId.isEmpty)
+        assert(secondCast.timeZoneId.isEmpty)
+      case _ =>
+    }
+
+    val resolvedBaselinePlan = setTimezoneForAllExpression(baselinePlan)
+    val resolvedTestPlan = setTimezoneForAllExpression(testPlan)
+
+    // After applying timezone, only the second cast gets timezone.
+    resolvedTestPlan.expressions.foreach {
+      case _ @ AssertTrue(firstCast: Cast, _, _ @ If(secondCast: Cast, _, _)) =>
+        assert(firstCast.timeZoneId.isEmpty)
+        assert(secondCast.timeZoneId.isDefined)
+      case _ =>
+    }
+
+    // However, plans are still different.
+    assert(resolvedBaselinePlan != resolvedTestPlan)
+    assert(NormalizePlan(resolvedBaselinePlan) == NormalizePlan(resolvedTestPlan))
+  }
+
+  private def setTimezoneForAllExpression(plan: LogicalPlan): LogicalPlan = {
+    plan.transformAllExpressions {
+      case e: TimeZoneAwareExpression if e.timeZoneId.isEmpty =>
+        e.withTimeZone(conf.sessionLocalTimeZone)
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
index 385850376d147..fb4053964a841 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala
@@ -136,4 +136,19 @@ class StringUtilsSuite extends SparkFunSuite with SQLHelper {
     val expectedOutput = Seq("`c1`", "`v2.c2`", "`v1`.`c2`")
     assert(orderSuggestedIdentifiersBySimilarity(baseString, testStrings) === expectedOutput)
   }
+
+  test("SPARK-50579: truncated string") {
+    assert(truncatedString(Seq.empty, ", ", -1) === "")
+    assert(truncatedString(Seq("a"), ", ", -1) === "... 1 more fields")
+    assert(truncatedString(Seq("B"), "(", ", ", ")", -1) === "(... 1 more fields)")
+    assert(truncatedString(Seq.empty, ", ", 0) === "")
+    assert(truncatedString(Seq.empty, "[", ", ", "]", 0) === "[]")
+    assert(truncatedString(Seq("a", "b"), ", ", 0) === "... 2 more fields")
+    assert(truncatedString(Seq.empty, ",", 1) === "")
+    assert(truncatedString(Seq("a"), ",", 1) === "a")
+    assert(truncatedString(Seq("a", "b"), ", ", 1) === "a, ... 1 more fields")
+    assert(truncatedString(Seq("a", "b"), ", ", 2) === "a, b")
+    assert(truncatedString(Seq("a", "b", "c"), ", ", Int.MaxValue) === "a, b, c")
+    assert(truncatedString(Seq("a", "b", "c"), ", ", Int.MinValue) === "... 3 more fields")
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
index 497ef848ac78f..ab17b93ad6146 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
@@ -295,7 +295,7 @@ abstract class InMemoryBaseTable(
     TableCapability.TRUNCATE)
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
-    new InMemoryScanBuilder(schema)
+    new InMemoryScanBuilder(schema, options)
   }
 
   private def canEvaluate(filter: Filter): Boolean = {
@@ -309,8 +309,10 @@ abstract class InMemoryBaseTable(
     }
   }
 
-  class InMemoryScanBuilder(tableSchema: StructType) extends ScanBuilder
-      with SupportsPushDownRequiredColumns with SupportsPushDownFilters {
+  class InMemoryScanBuilder(
+      tableSchema: StructType,
+      options: CaseInsensitiveStringMap) extends ScanBuilder
+    with SupportsPushDownRequiredColumns with SupportsPushDownFilters {
     private var schema: StructType = tableSchema
     private var postScanFilters: Array[Filter] = Array.empty
     private var evaluableFilters: Array[Filter] = Array.empty
@@ -318,7 +320,7 @@ abstract class InMemoryBaseTable(
 
     override def build: Scan = {
       val scan = InMemoryBatchScan(
-        data.map(_.asInstanceOf[InputPartition]).toImmutableArraySeq, schema, tableSchema)
+        data.map(_.asInstanceOf[InputPartition]).toImmutableArraySeq, schema, tableSchema, options)
       if (evaluableFilters.nonEmpty) {
         scan.filter(evaluableFilters)
       }
@@ -442,7 +444,8 @@ abstract class InMemoryBaseTable(
   case class InMemoryBatchScan(
       var _data: Seq[InputPartition],
       readSchema: StructType,
-      tableSchema: StructType)
+      tableSchema: StructType,
+      options: CaseInsensitiveStringMap)
     extends BatchScanBaseClass(_data, readSchema, tableSchema) with SupportsRuntimeFiltering {
 
     override def filterAttributes(): Array[NamedReference] = {
@@ -474,17 +477,17 @@ abstract class InMemoryBaseTable(
     }
   }
 
-  abstract class InMemoryWriterBuilder() extends SupportsTruncate with SupportsDynamicOverwrite
-    with SupportsStreamingUpdateAsAppend {
+  abstract class InMemoryWriterBuilder(val info: LogicalWriteInfo)
+    extends SupportsTruncate with SupportsDynamicOverwrite with SupportsStreamingUpdateAsAppend {
 
-    protected var writer: BatchWrite = Append
-    protected var streamingWriter: StreamingWrite = StreamingAppend
+    protected var writer: BatchWrite = new Append(info)
+    protected var streamingWriter: StreamingWrite = new StreamingAppend(info)
 
     override def overwriteDynamicPartitions(): WriteBuilder = {
-      if (writer != Append) {
+      if (!writer.isInstanceOf[Append]) {
         throw new IllegalArgumentException(s"Unsupported writer type: $writer")
       }
-      writer = DynamicOverwrite
+      writer = new DynamicOverwrite(info)
       streamingWriter = new StreamingNotSupportedOperation("overwriteDynamicPartitions")
       this
     }
@@ -529,13 +532,13 @@ abstract class InMemoryBaseTable(
     override def abort(messages: Array[WriterCommitMessage]): Unit = {}
   }
 
-  protected object Append extends TestBatchWrite {
+  class Append(val info: LogicalWriteInfo) extends TestBatchWrite {
     override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       withData(messages.map(_.asInstanceOf[BufferedRows]))
     }
   }
 
-  private object DynamicOverwrite extends TestBatchWrite {
+  class DynamicOverwrite(val info: LogicalWriteInfo) extends TestBatchWrite {
     override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       val newData = messages.map(_.asInstanceOf[BufferedRows])
       dataMap --= newData.flatMap(_.rows.map(getKey))
@@ -543,7 +546,7 @@ abstract class InMemoryBaseTable(
     }
   }
 
-  protected object TruncateAndAppend extends TestBatchWrite {
+  class TruncateAndAppend(val info: LogicalWriteInfo) extends TestBatchWrite {
     override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       dataMap.clear()
       withData(messages.map(_.asInstanceOf[BufferedRows]))
@@ -572,7 +575,7 @@ abstract class InMemoryBaseTable(
       s"${operation} isn't supported for streaming query.")
   }
 
-  private object StreamingAppend extends TestStreamingWrite {
+  class StreamingAppend(val info: LogicalWriteInfo) extends TestStreamingWrite {
     override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
       dataMap.synchronized {
         withData(messages.map(_.asInstanceOf[BufferedRows]))
@@ -580,7 +583,7 @@ abstract class InMemoryBaseTable(
     }
   }
 
-  protected object StreamingTruncateAndAppend extends TestStreamingWrite {
+  class StreamingTruncateAndAppend(val info: LogicalWriteInfo) extends TestStreamingWrite {
     override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
       dataMap.synchronized {
         dataMap.clear()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala
index 4abe4c8b3e3fb..3a684dc57c02f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala
@@ -59,7 +59,7 @@ class InMemoryRowLevelOperationTable(
     }
 
     override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
-      new InMemoryScanBuilder(schema) {
+      new InMemoryScanBuilder(schema, options) {
         override def build: Scan = {
           val scan = super.build()
           configuredScan = scan.asInstanceOf[InMemoryBatchScan]
@@ -115,7 +115,7 @@ class InMemoryRowLevelOperationTable(
     override def rowId(): Array[NamedReference] = Array(PK_COLUMN_REF)
 
     override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
-      new InMemoryScanBuilder(schema)
+      new InMemoryScanBuilder(schema, options)
     }
 
     override def newWriteBuilder(info: LogicalWriteInfo): DeltaWriteBuilder =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
index af04816e6b6f0..c27b8fea059f7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
@@ -84,23 +84,23 @@ class InMemoryTable(
     InMemoryBaseTable.maybeSimulateFailedTableWrite(new CaseInsensitiveStringMap(properties))
     InMemoryBaseTable.maybeSimulateFailedTableWrite(info.options)
 
-    new InMemoryWriterBuilderWithOverWrite()
+    new InMemoryWriterBuilderWithOverWrite(info)
   }
 
-  private class InMemoryWriterBuilderWithOverWrite() extends InMemoryWriterBuilder
-    with SupportsOverwrite {
+  class InMemoryWriterBuilderWithOverWrite(override val info: LogicalWriteInfo)
+    extends InMemoryWriterBuilder(info) with SupportsOverwrite {
 
     override def truncate(): WriteBuilder = {
-      if (writer != Append) {
+      if (!writer.isInstanceOf[Append]) {
         throw new IllegalArgumentException(s"Unsupported writer type: $writer")
       }
-      writer = TruncateAndAppend
-      streamingWriter = StreamingTruncateAndAppend
+      writer = new TruncateAndAppend(info)
+      streamingWriter = new StreamingTruncateAndAppend(info)
       this
     }
 
     override def overwrite(filters: Array[Filter]): WriteBuilder = {
-      if (writer != Append) {
+      if (!writer.isInstanceOf[Append]) {
         throw new IllegalArgumentException(s"Unsupported writer type: $writer")
       }
       writer = new Overwrite(filters)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala
index 20ada0d622bca..9b7a90774f91c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala
@@ -47,19 +47,22 @@ class InMemoryTableWithV2Filter(
   }
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
-    new InMemoryV2FilterScanBuilder(schema)
+    new InMemoryV2FilterScanBuilder(schema, options)
   }
 
-  class InMemoryV2FilterScanBuilder(tableSchema: StructType)
-    extends InMemoryScanBuilder(tableSchema) {
+  class InMemoryV2FilterScanBuilder(
+     tableSchema: StructType,
+     options: CaseInsensitiveStringMap)
+    extends InMemoryScanBuilder(tableSchema, options) {
     override def build: Scan = InMemoryV2FilterBatchScan(
-      data.map(_.asInstanceOf[InputPartition]).toImmutableArraySeq, schema, tableSchema)
+      data.map(_.asInstanceOf[InputPartition]).toImmutableArraySeq, schema, tableSchema, options)
   }
 
   case class InMemoryV2FilterBatchScan(
       var _data: Seq[InputPartition],
       readSchema: StructType,
-      tableSchema: StructType)
+      tableSchema: StructType,
+      options: CaseInsensitiveStringMap)
     extends BatchScanBaseClass(_data, readSchema, tableSchema) with SupportsRuntimeV2Filtering {
 
     override def filterAttributes(): Array[NamedReference] = {
@@ -93,21 +96,21 @@ class InMemoryTableWithV2Filter(
     InMemoryBaseTable.maybeSimulateFailedTableWrite(new CaseInsensitiveStringMap(properties))
     InMemoryBaseTable.maybeSimulateFailedTableWrite(info.options)
 
-    new InMemoryWriterBuilderWithOverWrite()
+    new InMemoryWriterBuilderWithOverWrite(info)
   }
 
-  private class InMemoryWriterBuilderWithOverWrite() extends InMemoryWriterBuilder
-    with SupportsOverwriteV2 {
+  class InMemoryWriterBuilderWithOverWrite(override val info: LogicalWriteInfo)
+    extends InMemoryWriterBuilder(info) with SupportsOverwriteV2 {
 
     override def truncate(): WriteBuilder = {
-      assert(writer == Append)
-      writer = TruncateAndAppend
-      streamingWriter = StreamingTruncateAndAppend
+      assert(writer.isInstanceOf[Append])
+      writer = new TruncateAndAppend(info)
+      streamingWriter = new StreamingTruncateAndAppend(info)
       this
     }
 
     override def overwrite(predicates: Array[Predicate]): WriteBuilder = {
-      assert(writer == Append)
+      assert(writer.isInstanceOf[Append])
       writer = new Overwrite(predicates)
       streamingWriter = new StreamingNotSupportedOperation(
         s"overwrite (${predicates.mkString("filters(", ", ", ")")})")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/StagingInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/StagingInMemoryTableCatalog.scala
index f3c7bc98cec09..2a207901b83f5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/StagingInMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/StagingInMemoryTableCatalog.scala
@@ -78,7 +78,7 @@ class StagingInMemoryTableCatalog extends InMemoryTableCatalog with StagingTable
     maybeSimulateFailedTableCreation(properties)
   }
 
-  private abstract class TestStagedTable(
+  protected abstract class TestStagedTable(
       ident: Identifier,
       delegateTable: InMemoryTable)
     extends StagedTable with SupportsWrite with SupportsRead {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index d5fc4d87bb6ad..397241be76eb1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -23,13 +23,11 @@ import org.json4s.jackson.JsonMethods
 import org.apache.spark.{SparkException, SparkFunSuite, SparkIllegalArgumentException}
 import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution, caseSensitiveResolution}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{CollationFactory, StringConcat}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.DataTypeTestUtils.{dayTimeIntervalTypes, yearMonthIntervalTypes}
 
-class DataTypeSuite extends SparkFunSuite with SQLHelper {
+class DataTypeSuite extends SparkFunSuite {
 
   private val UNICODE_COLLATION_ID = CollationFactory.collationNameToId("UNICODE")
 
@@ -368,6 +366,8 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
   checkDefaultSize(TimestampType, 8)
   checkDefaultSize(TimestampNTZType, 8)
   checkDefaultSize(StringType, 20)
+  checkDefaultSize(CharType(20), 20)
+  checkDefaultSize(VarcharType(20), 20)
   checkDefaultSize(BinaryType, 100)
   checkDefaultSize(ArrayType(DoubleType, true), 8)
   checkDefaultSize(ArrayType(StringType, false), 20)
@@ -412,6 +412,14 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
     from = ArrayType(DoubleType, containsNull = false),
     to = ArrayType(StringType, containsNull = false),
     expected = false)
+  checkEqualsIgnoreCompatibleNullability(
+    from = ArrayType(CharType(5), containsNull = false),
+    to = ArrayType(StringType, containsNull = false),
+    expected = false)
+  checkEqualsIgnoreCompatibleNullability(
+    from = ArrayType(VarcharType(5), containsNull = false),
+    to = ArrayType(StringType, containsNull = false),
+    expected = false)
 
   checkEqualsIgnoreCompatibleNullability(
     from = MapType(StringType, DoubleType, valueContainsNull = true),
@@ -425,6 +433,14 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
     from = MapType(StringType, DoubleType, valueContainsNull = false),
     to = MapType(StringType, DoubleType, valueContainsNull = true),
     expected = true)
+  checkEqualsIgnoreCompatibleNullability(
+    from = MapType(CharType(5), DoubleType, valueContainsNull = false),
+    to = MapType(StringType, DoubleType, valueContainsNull = true),
+    expected = false)
+  checkEqualsIgnoreCompatibleNullability(
+    from = MapType(VarcharType(5), DoubleType, valueContainsNull = false),
+    to = MapType(StringType, DoubleType, valueContainsNull = true),
+    expected = false)
   checkEqualsIgnoreCompatibleNullability(
     from = MapType(StringType, DoubleType, valueContainsNull = true),
     to = MapType(StringType, DoubleType, valueContainsNull = false),
@@ -443,10 +459,26 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
     from = StructType(StructField("a", StringType, nullable = true) :: Nil),
     to = StructType(StructField("a", StringType, nullable = true) :: Nil),
     expected = true)
+  checkEqualsIgnoreCompatibleNullability(
+    from = StructType(StructField("a", CharType(5), nullable = true) :: Nil),
+    to = StructType(StructField("a", StringType, nullable = true) :: Nil),
+    expected = false)
+  checkEqualsIgnoreCompatibleNullability(
+    from = StructType(StructField("a", VarcharType(5), nullable = true) :: Nil),
+    to = StructType(StructField("a", StringType, nullable = true) :: Nil),
+    expected = false)
   checkEqualsIgnoreCompatibleNullability(
     from = StructType(StructField("a", StringType, nullable = false) :: Nil),
     to = StructType(StructField("a", StringType, nullable = false) :: Nil),
     expected = true)
+  checkEqualsIgnoreCompatibleNullability(
+    from = StructType(StructField("a", CharType(5), nullable = false) :: Nil),
+    to = StructType(StructField("a", StringType, nullable = false) :: Nil),
+    expected = false)
+  checkEqualsIgnoreCompatibleNullability(
+    from = StructType(StructField("a", VarcharType(5), nullable = false) :: Nil),
+    to = StructType(StructField("a", StringType, nullable = false) :: Nil),
+    expected = false)
   checkEqualsIgnoreCompatibleNullability(
     from = StructType(StructField("a", StringType, nullable = false) :: Nil),
     to = StructType(StructField("a", StringType, nullable = true) :: Nil),
@@ -485,6 +517,8 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
   checkCatalogString(DecimalType(10, 5))
   checkCatalogString(BinaryType)
   checkCatalogString(StringType)
+  checkCatalogString(CharType(5))
+  checkCatalogString(VarcharType(10))
   checkCatalogString(DateType)
   checkCatalogString(TimestampType)
   checkCatalogString(createStruct(4))
@@ -509,8 +543,18 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
   checkEqualsStructurally(BooleanType, BooleanType, true)
   checkEqualsStructurally(IntegerType, IntegerType, true)
   checkEqualsStructurally(IntegerType, LongType, false)
+  checkEqualsStructurally(CharType(5), CharType(5), true)
+  checkEqualsStructurally(CharType(5), CharType(10), false)
+  checkEqualsStructurally(CharType(5), VarcharType(5), false)
+  checkEqualsStructurally(VarcharType(5), VarcharType(5), true)
+  checkEqualsStructurally(VarcharType(5), VarcharType(10), false)
+  checkEqualsStructurally(VarcharType(5), CharType(5), false)
   checkEqualsStructurally(ArrayType(IntegerType, true), ArrayType(IntegerType, true), true)
   checkEqualsStructurally(ArrayType(IntegerType, true), ArrayType(IntegerType, false), false)
+  checkEqualsStructurally(ArrayType(CharType(5), true), ArrayType(CharType(5), true), true)
+  checkEqualsStructurally(ArrayType(CharType(5), true), ArrayType(CharType(5), false), false)
+  checkEqualsStructurally(ArrayType(VarcharType(5), true), ArrayType(VarcharType(5), true), true)
+  checkEqualsStructurally(ArrayType(VarcharType(5), true), ArrayType(VarcharType(5), false), false)
 
   checkEqualsStructurally(
     new StructType().add("f1", IntegerType),
@@ -521,6 +565,15 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
     new StructType().add("f2", IntegerType, false),
     false)
 
+  checkEqualsStructurally(
+    new StructType().add("f1", CharType(5)),
+    new StructType().add("f2", StringType),
+    false)
+  checkEqualsStructurally(
+    new StructType().add("f1", VarcharType(5)),
+    new StructType().add("f2", StringType),
+    false)
+
   checkEqualsStructurally(
     new StructType().add("f1", IntegerType).add("f", new StructType().add("f2", StringType)),
     new StructType().add("f2", IntegerType).add("g", new StructType().add("f1", StringType)),
@@ -540,6 +593,14 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
       .add("g", new StructType().add("f1", StringType)),
     true,
     ignoreNullability = true)
+  checkEqualsStructurally(
+    new StructType().add("f1", IntegerType).add("f", new StructType().add("f2", CharType(5))),
+    new StructType().add("f2", IntegerType).add("g", new StructType().add("f1", StringType)),
+    false)
+  checkEqualsStructurally(
+    new StructType().add("f1", IntegerType).add("f", new StructType().add("f2", VarcharType(5))),
+    new StructType().add("f2", IntegerType).add("g", new StructType().add("f1", StringType)),
+    false)
 
   checkEqualsStructurally(
     ArrayType(
@@ -581,6 +642,22 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
     true,
     ignoreNullability = true)
 
+  checkEqualsStructurally(
+    ArrayType(
+      ArrayType(CharType(5), true), true),
+    ArrayType(
+      ArrayType(StringType, true), true),
+    false,
+    ignoreNullability = false)
+
+  checkEqualsStructurally(
+    ArrayType(
+      ArrayType(VarcharType(5), true), true),
+    ArrayType(
+      ArrayType(StringType, true), true),
+    false,
+    ignoreNullability = false)
+
   checkEqualsStructurally(
     MapType(
       ArrayType(IntegerType, true), ArrayType(IntegerType, true), true),
@@ -629,6 +706,22 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
     true,
     ignoreNullability = true)
 
+  checkEqualsStructurally(
+    MapType(
+      ArrayType(IntegerType, true), ArrayType(CharType(5), true), true),
+    MapType(
+      ArrayType(IntegerType, true), ArrayType(StringType, true), true),
+    false,
+    ignoreNullability = false)
+
+  checkEqualsStructurally(
+    MapType(
+      ArrayType(IntegerType, true), ArrayType(VarcharType(5), true), true),
+    MapType(
+      ArrayType(IntegerType, true), ArrayType(StringType, true), true),
+    false,
+    ignoreNullability = false)
+
   def checkEqualsStructurallyByName(
       from: DataType,
       to: DataType,
@@ -659,6 +752,10 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
   checkEqualsStructurallyByName(BooleanType, BooleanType, true)
   checkEqualsStructurallyByName(BooleanType, IntegerType, true)
   checkEqualsStructurallyByName(IntegerType, LongType, true)
+  checkEqualsStructurallyByName(StringType, CharType(5), true)
+  checkEqualsStructurallyByName(StringType, VarcharType(5), true)
+  checkEqualsStructurallyByName(CharType(5), StringType, true)
+  checkEqualsStructurallyByName(VarcharType(5), StringType, true)
 
   checkEqualsStructurallyByName(
     new StructType().add("f1", IntegerType).add("f2", IntegerType),
@@ -667,6 +764,16 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
 
   checkEqualsStructurallyByName(
     new StructType().add("f1", IntegerType).add("f2", IntegerType),
+    new StructType().add("f1", CharType(5)).add("f2", StringType),
+    true)
+
+  checkEqualsStructurallyByName(
+    new StructType().add("f1", IntegerType).add("f2", IntegerType),
+    new StructType().add("f2", LongType).add("f1", StringType),
+    false)
+
+  checkEqualsStructurallyByName(
+    new StructType().add("f1", IntegerType).add("f2", VarcharType(5)),
     new StructType().add("f2", LongType).add("f1", StringType),
     false)
 
@@ -675,23 +782,45 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
     new StructType().add("f1", LongType).add("f", new StructType().add("f2", BooleanType)),
     true)
 
+  checkEqualsStructurallyByName(
+    new StructType().add("f1", IntegerType).add("f", new StructType().add("f2", StringType)),
+    new StructType().add("f1", LongType).add("f", new StructType().add("f2", VarcharType(5))),
+    true)
+
   checkEqualsStructurallyByName(
     new StructType().add("f1", IntegerType).add("f", new StructType().add("f2", StringType)),
     new StructType().add("f", new StructType().add("f2", BooleanType)).add("f1", LongType),
     false)
 
+  checkEqualsStructurallyByName(
+    new StructType().add("f1", IntegerType).add("f", new StructType().add("f2", StringType)),
+    new StructType().add("f", new StructType().add("f2", CharType(5))).add("f1", LongType),
+    false)
+
   checkEqualsStructurallyByName(
     new StructType().add("f1", IntegerType).add("f2", IntegerType),
     new StructType().add("F1", LongType).add("F2", StringType),
     true,
     caseSensitive = false)
 
+  checkEqualsStructurallyByName(
+    new StructType().add("f1", IntegerType).add("f2", IntegerType),
+    new StructType().add("F1", LongType).add("F2", CharType(5)),
+    true,
+    caseSensitive = false)
+
   checkEqualsStructurallyByName(
     new StructType().add("f1", IntegerType).add("f2", IntegerType),
     new StructType().add("F1", LongType).add("F2", StringType),
     false,
     caseSensitive = true)
 
+  checkEqualsStructurallyByName(
+    new StructType().add("f1", IntegerType).add("f2", IntegerType),
+    new StructType().add("F1", LongType).add("F2", VarcharType(5)),
+    false,
+    caseSensitive = true)
+
   def checkEqualsIgnoreCompatibleCollation(
       from: DataType,
       to: DataType,
@@ -705,19 +834,45 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
 
   // Simple types.
   checkEqualsIgnoreCompatibleCollation(IntegerType, IntegerType, expected = true)
-  checkEqualsIgnoreCompatibleCollation(BooleanType, BooleanType, expected = true)
-  checkEqualsIgnoreCompatibleCollation(StringType, StringType, expected = true)
   checkEqualsIgnoreCompatibleCollation(IntegerType, BooleanType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(IntegerType, StringType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(IntegerType, CharType(5), expected = false)
+  checkEqualsIgnoreCompatibleCollation(IntegerType, VarcharType(5), expected = false)
   checkEqualsIgnoreCompatibleCollation(BooleanType, IntegerType, expected = false)
-  checkEqualsIgnoreCompatibleCollation(StringType, BooleanType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(BooleanType, BooleanType, expected = true)
   checkEqualsIgnoreCompatibleCollation(BooleanType, StringType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(BooleanType, CharType(5), expected = false)
+  checkEqualsIgnoreCompatibleCollation(BooleanType, VarcharType(5), expected = false)
   checkEqualsIgnoreCompatibleCollation(StringType, IntegerType, expected = false)
-  checkEqualsIgnoreCompatibleCollation(IntegerType, StringType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(StringType, BooleanType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(StringType, StringType, expected = true)
+  checkEqualsIgnoreCompatibleCollation(StringType, CharType(5), expected = false)
+  checkEqualsIgnoreCompatibleCollation(StringType, VarcharType(5), expected = false)
+  checkEqualsIgnoreCompatibleCollation(CharType(5), IntegerType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(CharType(5), BooleanType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(CharType(5), StringType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(CharType(5), CharType(5), expected = true)
+  checkEqualsIgnoreCompatibleCollation(CharType(5), CharType(10), expected = false)
+  checkEqualsIgnoreCompatibleCollation(CharType(5), VarcharType(5), expected = false)
+  checkEqualsIgnoreCompatibleCollation(VarcharType(5), IntegerType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(VarcharType(5), BooleanType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(VarcharType(5), StringType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(VarcharType(5), CharType(5), expected = false)
+  checkEqualsIgnoreCompatibleCollation(VarcharType(5), VarcharType(5), expected = true)
+  checkEqualsIgnoreCompatibleCollation(VarcharType(5), VarcharType(10), expected = false)
   // Collated `StringType`.
   checkEqualsIgnoreCompatibleCollation(StringType, StringType("UTF8_LCASE"),
     expected = true)
   checkEqualsIgnoreCompatibleCollation(
-    StringType("UTF8_BINARY"), StringType("UTF8_LCASE"), expected = true)
+    StringType("UTF8_LCASE"), StringType("UTF8_BINARY"), expected = true)
+  checkEqualsIgnoreCompatibleCollation(
+    StringType("UTF8_LCASE"), CharType(5), expected = false)
+  checkEqualsIgnoreCompatibleCollation(
+    CharType(5), StringType("UTF8_LCASE"), expected = false)
+  checkEqualsIgnoreCompatibleCollation(
+    StringType("UTF8_LCASE"), VarcharType(5), expected = false)
+  checkEqualsIgnoreCompatibleCollation(
+    VarcharType(5), StringType("UTF8_LCASE"), expected = false)
   // Complex types.
   checkEqualsIgnoreCompatibleCollation(
     ArrayType(StringType),
@@ -734,6 +889,26 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
     ArrayType(ArrayType(StringType("UTF8_LCASE"))),
     expected = false
   )
+  checkEqualsIgnoreCompatibleCollation(
+    ArrayType(ArrayType(StringType)),
+    ArrayType(ArrayType(CharType(5))),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    ArrayType(ArrayType(StringType("UTF8_LCASE"))),
+    ArrayType(ArrayType(CharType(5))),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    ArrayType(ArrayType(StringType)),
+    ArrayType(ArrayType(VarcharType(5))),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    ArrayType(ArrayType(StringType("UTF8_LCASE"))),
+    ArrayType(ArrayType(VarcharType(5))),
+    expected = false
+  )
   checkEqualsIgnoreCompatibleCollation(
     MapType(StringType, StringType),
     MapType(StringType, StringType("UTF8_LCASE")),
@@ -744,11 +919,51 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
     MapType(StringType, StringType),
     expected = false
   )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(StringType("UTF8_LCASE"), StringType),
+    MapType(CharType(5), StringType),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(StringType, StringType),
+    MapType(CharType(5), StringType),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(StringType("UTF8_LCASE"), StringType),
+    MapType(VarcharType(5), StringType),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(StringType, StringType),
+    MapType(VarcharType(5), StringType),
+    expected = false
+  )
   checkEqualsIgnoreCompatibleCollation(
     MapType(StringType("UTF8_LCASE"), ArrayType(StringType)),
     MapType(StringType("UTF8_LCASE"), ArrayType(StringType("UTF8_LCASE"))),
     expected = false
   )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(StringType("UTF8_LCASE"), ArrayType(StringType)),
+    MapType(StringType("UTF8_LCASE"), ArrayType(CharType(5))),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(StringType("UTF8_LCASE"), ArrayType(StringType("UTF8_LCASE"))),
+    MapType(StringType("UTF8_LCASE"), ArrayType(CharType(5))),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(StringType("UTF8_LCASE"), ArrayType(StringType)),
+    MapType(StringType("UTF8_LCASE"), ArrayType(VarcharType(5))),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(StringType("UTF8_LCASE"), ArrayType(StringType("UTF8_LCASE"))),
+    MapType(StringType("UTF8_LCASE"), ArrayType(VarcharType(5))),
+    expected = false
+  )
   checkEqualsIgnoreCompatibleCollation(
     MapType(ArrayType(StringType), IntegerType),
     MapType(ArrayType(StringType("UTF8_LCASE")), IntegerType),
@@ -759,14 +974,74 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
     MapType(ArrayType(StringType("UTF8_LCASE")), IntegerType),
     expected = true
   )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(ArrayType(StringType), IntegerType),
+    MapType(ArrayType(CharType(5)), IntegerType),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(ArrayType(StringType("UTF8_LCASE")), IntegerType),
+    MapType(ArrayType(CharType(5)), IntegerType),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(ArrayType(StringType), IntegerType),
+    MapType(ArrayType(VarcharType(5)), IntegerType),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(ArrayType(StringType("UTF8_LCASE")), IntegerType),
+    MapType(ArrayType(VarcharType(5)), IntegerType),
+    expected = false
+  )
   checkEqualsIgnoreCompatibleCollation(
     StructType(StructField("a", StringType) :: Nil),
     StructType(StructField("a", StringType("UTF8_LCASE")) :: Nil),
     expected = false
   )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", StringType) :: Nil),
+    StructType(StructField("a", CharType(5)) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", StringType("UTF8_LCASE")) :: Nil),
+    StructType(StructField("a", CharType(5)) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", StringType) :: Nil),
+    StructType(StructField("a", VarcharType(5)) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", StringType("UTF8_LCASE")) :: Nil),
+    StructType(StructField("a", VarcharType(5)) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", ArrayType(StringType)) :: Nil),
+    StructType(StructField("a", ArrayType(StringType("UTF8_LCASE"))) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", ArrayType(StringType)) :: Nil),
+    StructType(StructField("a", ArrayType(CharType(5))) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", ArrayType(StringType("UTF8_LCASE"))) :: Nil),
+    StructType(StructField("a", ArrayType(CharType(5))) :: Nil),
+    expected = false
+  )
   checkEqualsIgnoreCompatibleCollation(
     StructType(StructField("a", ArrayType(StringType)) :: Nil),
+    StructType(StructField("a", ArrayType(VarcharType(5))) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
     StructType(StructField("a", ArrayType(StringType("UTF8_LCASE"))) :: Nil),
+    StructType(StructField("a", ArrayType(VarcharType(5))) :: Nil),
     expected = false
   )
   checkEqualsIgnoreCompatibleCollation(
@@ -774,11 +1049,51 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
     StructType(StructField("a", MapType(StringType("UTF8_LCASE"), IntegerType)) :: Nil),
     expected = false
   )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", MapType(StringType, IntegerType)) :: Nil),
+    StructType(StructField("a", MapType(CharType(5), IntegerType)) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", MapType(StringType("UTF8_LCASE"), IntegerType)) :: Nil),
+    StructType(StructField("a", MapType(CharType(5), IntegerType)) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", MapType(StringType, IntegerType)) :: Nil),
+    StructType(StructField("a", MapType(VarcharType(5), IntegerType)) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", MapType(StringType("UTF8_LCASE"), IntegerType)) :: Nil),
+    StructType(StructField("a", MapType(VarcharType(5), IntegerType)) :: Nil),
+    expected = false
+  )
   checkEqualsIgnoreCompatibleCollation(
     StructType(StructField("a", StringType) :: Nil),
     StructType(StructField("b", StringType("UTF8_LCASE")) :: Nil),
     expected = false
   )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", StringType) :: Nil),
+    StructType(StructField("b", CharType(5)) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", StringType("UTF8_LCASE")) :: Nil),
+    StructType(StructField("b", CharType(5)) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", StringType) :: Nil),
+    StructType(StructField("b", VarcharType(5)) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", StringType("UTF8_LCASE")) :: Nil),
+    StructType(StructField("b", VarcharType(5)) :: Nil),
+    expected = false
+  )
   // Null compatibility checks.
   checkEqualsIgnoreCompatibleCollation(
     ArrayType(StringType, containsNull = true),
@@ -878,90 +1193,6 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
       }
   }
 
-  test("string field with invalid collation name") {
-    val collationProviders = Seq("spark", "icu")
-    collationProviders.foreach { provider =>
-      val json =
-        s"""
-           |{
-           |  "type": "struct",
-           |  "fields": [
-           |    {
-           |      "name": "c1",
-           |      "type": "string",
-           |      "nullable": false,
-           |      "metadata": {
-           |        "${DataType.COLLATIONS_METADATA_KEY}": {
-           |          "c1": "$provider.INVALID"
-           |        }
-           |      }
-           |    }
-           |  ]
-           |}
-           |""".stripMargin
-
-      // Check that the exception will be thrown in case of invalid collation name and
-      // UNKNOWN_COLLATION_NAME config not enabled.
-      checkError(
-        exception = intercept[SparkException] {
-          DataType.fromJson(json)
-        },
-        condition = "COLLATION_INVALID_NAME",
-        parameters = Map(
-          "proposals" -> "id",
-          "collationName" -> "INVALID"))
-
-      // Check that the exception will not be thrown in case of invalid collation name and
-      // UNKNOWN_COLLATION_NAME enabled, but UTF8_BINARY collation will be returned.
-      withSQLConf(SQLConf.ALLOW_READING_UNKNOWN_COLLATIONS.key -> "true") {
-        val dataType = DataType.fromJson(json)
-        assert(dataType === StructType(
-          StructField("c1", StringType(CollationFactory.UTF8_BINARY_COLLATION_ID), false) :: Nil))
-      }
-    }
-  }
-
-  test("string field with invalid collation provider") {
-    val json =
-      s"""
-         |{
-         |  "type": "struct",
-         |  "fields": [
-         |    {
-         |      "name": "c1",
-         |      "type": "string",
-         |      "nullable": false,
-         |      "metadata": {
-         |        "${DataType.COLLATIONS_METADATA_KEY}": {
-         |          "c1": "INVALID.INVALID"
-         |        }
-         |      }
-         |    }
-         |  ]
-         |}
-         |""".stripMargin
-
-
-    // Check that the exception will be thrown in case of invalid collation name and
-    // UNKNOWN_COLLATION_NAME config not enabled.
-    checkError(
-      exception = intercept[SparkException] {
-        DataType.fromJson(json)
-      },
-      condition = "COLLATION_INVALID_PROVIDER",
-      parameters = Map(
-        "supportedProviders" -> "spark, icu",
-        "provider" -> "INVALID"))
-
-    // Check that the exception will not be thrown in case of invalid collation name and
-    // UNKNOWN_COLLATION_NAME enabled, but UTF8_BINARY collation will be returned.
-    withSQLConf(SQLConf.ALLOW_READING_UNKNOWN_COLLATIONS.key -> "true") {
-      val dataType = DataType.fromJson(json)
-      assert(dataType === StructType(
-        StructField("c1", StringType(CollationFactory.UTF8_BINARY_COLLATION_ID), false) :: Nil))
-    }
-  }
-
   test("non string field has collation metadata") {
     val json =
       s"""
@@ -1109,42 +1340,6 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
     assert(parsedWithCollations === ArrayType(StringType(unicodeCollationId)))
   }
 
-  test("parse array type with invalid collation metadata") {
-    val utf8BinaryCollationId = CollationFactory.UTF8_BINARY_COLLATION_ID
-    val arrayJson =
-      s"""
-         |{
-         |  "type": "array",
-         |  "elementType": "string",
-         |  "containsNull": true
-         |}
-         |""".stripMargin
-
-    val collationsMap = Map("element" -> "INVALID")
-
-    // Parse without collations map
-    assert(DataType.parseDataType(JsonMethods.parse(arrayJson)) === ArrayType(StringType))
-
-    // Check that the exception will be thrown in case of invalid collation name and
-    // UNKNOWN_COLLATION_NAME config not enabled.
-    checkError(
-      exception = intercept[SparkException] {
-        DataType.parseDataType(JsonMethods.parse(arrayJson), collationsMap = collationsMap)
-      },
-      condition = "COLLATION_INVALID_NAME",
-      parameters = Map(
-        "proposals" -> "id",
-        "collationName" -> "INVALID"))
-
-    // Check that the exception will not be thrown in case of invalid collation name and
-    // UNKNOWN_COLLATION_NAME enabled, but UTF8_BINARY collation will be returned.
-    withSQLConf(SQLConf.ALLOW_READING_UNKNOWN_COLLATIONS.key -> "true") {
-      val dataType = DataType.parseDataType(
-        JsonMethods.parse(arrayJson), collationsMap = collationsMap)
-      assert(dataType === ArrayType(StringType(utf8BinaryCollationId)))
-    }
-  }
-
   test("parse map type with collation metadata") {
     val unicodeCollationId = CollationFactory.collationNameToId("UNICODE")
     val mapJson =
@@ -1168,44 +1363,6 @@ class DataTypeSuite extends SparkFunSuite with SQLHelper {
       MapType(StringType(unicodeCollationId), StringType(unicodeCollationId)))
   }
 
-  test("parse map type with invalid collation metadata") {
-    val utf8BinaryCollationId = CollationFactory.UTF8_BINARY_COLLATION_ID
-    val mapJson =
-      s"""
-         |{
-         |  "type": "map",
-         |  "keyType": "string",
-         |  "valueType": "string",
-         |  "valueContainsNull": true
-         |}
-         |""".stripMargin
-
-    val collationsMap = Map("key" -> "INVALID", "value" -> "INVALID")
-
-    // Parse without collations map
-    assert(DataType.parseDataType(JsonMethods.parse(mapJson)) === MapType(StringType, StringType))
-
-    // Check that the exception will be thrown in case of invalid collation name and
-    // UNKNOWN_COLLATION_NAME config not enabled.
-    checkError(
-      exception = intercept[SparkException] {
-        DataType.parseDataType(JsonMethods.parse(mapJson), collationsMap = collationsMap)
-      },
-      condition = "COLLATION_INVALID_NAME",
-      parameters = Map(
-        "proposals" -> "id",
-        "collationName" -> "INVALID"))
-
-    // Check that the exception will not be thrown in case of invalid collation name and
-    // UNKNOWN_COLLATION_NAME enabled, but UTF8_BINARY collation will be returned.
-    withSQLConf(SQLConf.ALLOW_READING_UNKNOWN_COLLATIONS.key -> "true") {
-      val dataType = DataType.parseDataType(
-        JsonMethods.parse(mapJson), collationsMap = collationsMap)
-      assert(dataType === MapType(
-        StringType(utf8BinaryCollationId), StringType(utf8BinaryCollationId)))
-    }
-  }
-
   test("SPARK-48680: Add CharType and VarcharType to DataTypes JAVA API") {
     assert(DataTypes.createCharType(1) === CharType(1))
     assert(DataTypes.createVarcharType(100) === VarcharType(100))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
index f07ee8b35bbb2..ba3eaf46a5597 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
@@ -685,6 +685,11 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
     )
   }
 
+  test("Check string types: cast allowed regardless of collation") {
+    assertAllowed(StringType, StringType("UTF8_LCASE"),
+      "date time types", "Should allow writing string to collated string")
+  }
+
   // Helper functions
 
   def assertAllowed(
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/base.proto b/sql/connect/common/src/main/protobuf/spark/connect/base.proto
index e3c84ddd5e887..1d34011b20e78 100644
--- a/sql/connect/common/src/main/protobuf/spark/connect/base.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/base.proto
@@ -25,6 +25,7 @@ import "spark/connect/common.proto";
 import "spark/connect/expressions.proto";
 import "spark/connect/relations.proto";
 import "spark/connect/types.proto";
+import "spark/connect/ml.proto";
 
 option java_multiple_files = true;
 option java_package = "org.apache.spark.connect.proto";
@@ -94,6 +95,7 @@ message AnalyzePlanRequest {
     Persist persist = 14;
     Unpersist unpersist = 15;
     GetStorageLevel get_storage_level = 16;
+    JsonToDDL json_to_ddl = 18;
   }
 
   message Schema {
@@ -199,6 +201,11 @@ message AnalyzePlanRequest {
     // (Required) The logical plan to get the storage level.
     Relation relation = 1;
   }
+
+  message JsonToDDL {
+    // (Required) The JSON formatted string to be converted to DDL.
+    string json_string = 1;
+  }
 }
 
 // Response to performing analysis of the query. Contains relevant metadata to be able to
@@ -224,6 +231,7 @@ message AnalyzePlanResponse {
     Persist persist = 12;
     Unpersist unpersist = 13;
     GetStorageLevel get_storage_level = 14;
+    JsonToDDL json_to_ddl = 16;
   }
 
   message Schema {
@@ -275,6 +283,10 @@ message AnalyzePlanResponse {
     // (Required) The StorageLevel as a result of get_storage_level request.
     StorageLevel storage_level = 1;
   }
+
+  message JsonToDDL {
+    string ddl_string = 1;
+  }
 }
 
 // A request to be executed by the service.
@@ -384,6 +396,9 @@ message ExecutePlanResponse {
     // Response for command that checkpoints a DataFrame.
     CheckpointCommandResult checkpoint_command_result = 19;
 
+    // ML command response
+    MlCommandResult ml_command_result = 20;
+
     // Support arbitrary result objects.
     google.protobuf.Any extension = 999;
   }
@@ -514,6 +529,9 @@ message ConfigRequest {
   message Set {
     // (Required) The config key-value pairs to set.
     repeated KeyValue pairs = 1;
+
+    // (Optional) Whether to ignore failures.
+    optional bool silent = 2;
   }
 
   message Get {
@@ -913,6 +931,20 @@ message ReleaseSessionRequest {
   // can be used for language or version specific information and is only intended for
   // logging purposes and will not be interpreted by the server.
   optional string client_type = 3;
+
+  // Signals the server to allow the client to reconnect to the session after it is released.
+  //
+  // By default, the server tombstones the session upon release, preventing reconnections and
+  // fully cleaning the session state.
+  //
+  // If this flag is set to true, the server may permit the client to reconnect to the session
+  // post-release, even if the session state has been cleaned. This can result in missing state,
+  // such as Temporary Views, Temporary UDFs, or the Current Catalog, in the reconnected session.
+  //
+  // Use this option sparingly and only when the client fully understands the implications of
+  // reconnecting to a released session. The client must ensure that any queries executed do not
+  // rely on the session state prior to its release.
+  bool allow_reconnect = 4;
 }
 
 // Next ID: 3
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/commands.proto b/sql/connect/common/src/main/protobuf/spark/connect/commands.proto
index a01d4369a7aed..10033b6400b53 100644
--- a/sql/connect/common/src/main/protobuf/spark/connect/commands.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/commands.proto
@@ -21,6 +21,7 @@ import "google/protobuf/any.proto";
 import "spark/connect/common.proto";
 import "spark/connect/expressions.proto";
 import "spark/connect/relations.proto";
+import "spark/connect/ml.proto";
 
 package spark.connect;
 
@@ -48,7 +49,7 @@ message Command {
     CheckpointCommand checkpoint_command = 14;
     RemoveCachedRemoteRelationCommand remove_cached_remote_relation_command = 15;
     MergeIntoTableCommand merge_into_table_command = 16;
-
+    MlCommand ml_command = 17;
     // This field is used to mark extensions to the protocol. When plugins generate arbitrary
     // Commands they can add them here. During the planning the correct resolution is done.
     google.protobuf.Any extension = 999;
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto b/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto
index 3a91371fd3b25..bbe605a47f4ff 100644
--- a/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto
@@ -52,6 +52,8 @@ message Expression {
     NamedArgumentExpression named_argument_expression = 17;
     MergeAction merge_action = 19;
     TypedAggregateExpression typed_aggregate_expression = 20;
+    LazyExpression lazy_expression = 21;
+    SubqueryExpression subquery_expression = 22;
 
     // This field is used to mark extensions to the protocol. When plugins generate arbitrary
     // relations they can add them here. During the planning the correct resolution is done.
@@ -259,6 +261,11 @@ message Expression {
     // When it is not a user defined function, Connect will use the function name directly.
     // When it is a user defined function, Connect will parse the function name first.
     bool is_user_defined_function = 4;
+
+    // (Optional) Indicate if this function is defined in the internal function registry.
+    // If not set, the server will try to look up the function in the internal function registry
+    // and decide appropriately.
+    optional bool is_internal = 5;
   }
 
   // Expression as string.
@@ -451,3 +458,22 @@ message MergeAction {
     Expression value = 2;
   }
 }
+
+message LazyExpression {
+  // (Required) The expression to be marked as lazy.
+  Expression child = 1;
+}
+
+message SubqueryExpression {
+  // (Required) The id of corresponding connect plan.
+  int64 plan_id = 1;
+
+  // (Required) The type of the subquery.
+  SubqueryType subquery_type = 2;
+
+  enum SubqueryType {
+    SUBQUERY_TYPE_UNKNOWN = 0;
+    SUBQUERY_TYPE_SCALAR = 1;
+    SUBQUERY_TYPE_EXISTS = 2;
+  }
+}
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/ml.proto b/sql/connect/common/src/main/protobuf/spark/connect/ml.proto
new file mode 100644
index 0000000000000..48b04a6e14cd0
--- /dev/null
+++ b/sql/connect/common/src/main/protobuf/spark/connect/ml.proto
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+import "spark/connect/relations.proto";
+import "spark/connect/ml_common.proto";
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.proto";
+option go_package = "internal/generated";
+
+// Command for ML
+message MlCommand {
+  oneof command {
+    Fit fit = 1;
+    Fetch fetch = 2;
+    Delete delete = 3;
+    Write write = 4;
+    Read read = 5;
+  }
+
+  // Command for estimator.fit(dataset)
+  message Fit {
+    // Estimator information
+    MlOperator estimator = 1;
+    // parameters of the Estimator
+    MlParams params = 2;
+    // the training dataset
+    Relation dataset = 3;
+  }
+
+  // Command to delete the cached object which could be a model
+  // or summary evaluated by a model
+  message Delete {
+    ObjectRef obj_ref = 1;
+  }
+
+  // Command to write ML operator
+  message Write {
+    // It could be an estimator/evaluator or the cached model
+    oneof type {
+      // Estimator or evaluator
+      MlOperator operator = 1;
+      // The cached model
+      ObjectRef obj_ref = 2;
+    }
+    // The parameters of operator which could be estimator/evaluator or a cached model
+    MlParams params = 3;
+    // Save the ML instance to the path
+    string path = 4;
+    // Overwrites if the output path already exists.
+    bool should_overwrite = 5;
+    // The options of the writer
+    map<string, string> options = 6;
+  }
+
+  // Command to load ML operator.
+  message Read {
+    // ML operator information
+    MlOperator operator = 1;
+    // Load the ML instance from the input path
+    string path = 2;
+  }
+}
+
+// The result of MlCommand
+message MlCommandResult {
+  oneof result_type {
+    // The result of the attribute
+    Param param = 1;
+    // Evaluate a Dataset in a model and return the cached ID of summary
+    string summary = 2;
+    // Operator information
+    MlOperatorInfo operator_info = 3;
+  }
+
+  // Represents an operator info
+  message MlOperatorInfo {
+    oneof type {
+      // The cached object which could be a model or summary evaluated by a model
+      ObjectRef obj_ref = 1;
+      // Operator name
+      string name = 2;
+    }
+    string uid = 3;
+    MlParams params = 4;
+  }
+
+}
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/ml_common.proto b/sql/connect/common/src/main/protobuf/spark/connect/ml_common.proto
new file mode 100644
index 0000000000000..f91c2489ed947
--- /dev/null
+++ b/sql/connect/common/src/main/protobuf/spark/connect/ml_common.proto
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+import "spark/connect/expressions.proto";
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.proto";
+option go_package = "internal/generated";
+
+// MlParams stores param settings for ML Estimator / Transformer / Evaluator
+message MlParams {
+  // User-supplied params
+  map<string, Param> params = 1;
+}
+
+// Represents the parameter type of the ML instance, or the returned value
+// of the attribute
+message Param {
+  oneof param_type {
+    Expression.Literal literal = 1;
+    Vector vector = 2;
+    Matrix matrix = 3;
+  }
+}
+
+// MLOperator represents the ML operators like (Estimator, Transformer or Evaluator)
+message MlOperator {
+  // The qualified name of the ML operator.
+  string name = 1;
+  // Unique id of the ML operator
+  string uid = 2;
+  // Represents what the ML operator is
+  OperatorType type = 3;
+  enum OperatorType {
+    UNSPECIFIED = 0;
+    ESTIMATOR = 1;
+    TRANSFORMER = 2;
+    EVALUATOR = 3;
+    MODEL = 4;
+  }
+}
+
+// Represents a reference to the cached object which could be a model
+// or summary evaluated by a model
+message ObjectRef {
+  // The ID is used to lookup the object on the server side.
+  string id = 1;
+}
+
+// See pyspark.ml.linalg.Vector
+message Vector {
+  oneof vector_type {
+    Dense dense = 1;
+    Sparse sparse = 2;
+  }
+  // See pyspark.ml.linalg.DenseVector
+  message Dense {
+    repeated double value = 1;
+  }
+  // See pyspark.ml.linalg.SparseVector
+  message Sparse {
+    int32 size = 1;
+    repeated int32 index = 2;
+    repeated double value = 3;
+  }
+}
+
+// See pyspark.ml.linalg.Matrix
+message Matrix {
+  oneof matrix_type {
+    Dense dense = 1;
+    Sparse sparse = 2;
+  }
+  // See pyspark.ml.linalg.DenseMatrix
+  message Dense {
+    int32 num_rows = 1;
+    int32 num_cols = 2;
+    repeated double value = 3;
+    bool is_transposed = 4;
+  }
+  // See pyspark.ml.linalg.SparseMatrix
+  message Sparse {
+    int32 num_rows = 1;
+    int32 num_cols = 2;
+    repeated int32 colptr = 3;
+    repeated int32 row_index = 4;
+    repeated double value = 5;
+    bool is_transposed = 6;
+  }
+}
diff --git a/sql/connect/common/src/main/protobuf/spark/connect/relations.proto b/sql/connect/common/src/main/protobuf/spark/connect/relations.proto
index a7b9137c3400a..c2cbed0dd22ba 100644
--- a/sql/connect/common/src/main/protobuf/spark/connect/relations.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/relations.proto
@@ -24,6 +24,7 @@ import "spark/connect/expressions.proto";
 import "spark/connect/types.proto";
 import "spark/connect/catalog.proto";
 import "spark/connect/common.proto";
+import "spark/connect/ml_common.proto";
 
 option java_multiple_files = true;
 option java_package = "org.apache.spark.connect.proto";
@@ -78,6 +79,7 @@ message Relation {
     WithRelations with_relations = 41;
     Transpose transpose = 42;
     UnresolvedTableValuedFunction unresolved_table_valued_function = 43;
+    LateralJoin lateral_join = 44;
 
     // NA functions
     NAFill fill_na = 90;
@@ -97,6 +99,9 @@ message Relation {
     // Catalog API (experimental / unstable)
     Catalog catalog = 200;
 
+    // ML relation
+    MlRelation ml_relation = 300;
+
     // This field is used to mark extensions to the protocol. When plugins generate arbitrary
     // relations they can add them here. During the planning the correct resolution is done.
     google.protobuf.Any extension = 998;
@@ -104,6 +109,55 @@ message Relation {
   }
 }
 
+// Relation to represent ML world
+message MlRelation {
+  oneof ml_type {
+    Transform transform = 1;
+    Fetch fetch = 2;
+  }
+  // Relation to represent transform(input) of the operator
+  // which could be a cached model or a new transformer
+  message Transform {
+    oneof operator {
+      // Object reference
+      ObjectRef obj_ref = 1;
+      // Could be an ML transformer like VectorAssembler
+      MlOperator transformer = 2;
+    }
+    // the input dataframe
+    Relation input = 3;
+    // the operator specific parameters
+    MlParams params = 4;
+  }
+}
+
+// Message for fetching attribute from object on the server side.
+// Fetch can be represented as a Relation or a ML command
+// Command: model.coefficients, model.summary.weightedPrecision which
+// returns the final literal result
+// Relation: model.summary.roc which returns a DataFrame (Relation)
+message Fetch {
+  // (Required) reference to the object on the server side
+  ObjectRef obj_ref = 1;
+  // (Required) the calling method chains
+  repeated Method methods = 2;
+
+  // Represents a method with inclusion of method name and its arguments
+  message Method {
+    // (Required) the method name
+    string method = 1;
+    // (Optional) the arguments of the method
+    repeated Args args = 2;
+
+    message Args {
+      oneof args_type {
+        Param param = 1;
+        Relation input = 2;
+      }
+    }
+  }
+}
+
 // Used for testing purposes only.
 message Unknown {}
 
@@ -974,6 +1028,9 @@ message GroupMap {
 
   // (Optional) Timeout configuration for groups that do not receive data for a while.
   optional string timeout_conf = 9;
+
+  // (Optional) The schema for the grouped state.
+  optional DataType state_schema = 10;
 }
 
 message CoGroupMap {
@@ -1140,3 +1197,20 @@ message AsOfJoin {
   // (Required) Whether to search for prior, subsequent, or closest matches.
   string direction = 10;
 }
+
+// Relation of type [[LateralJoin]].
+//
+// `left` and `right` must be present.
+message LateralJoin {
+  // (Required) Left input relation for a Join.
+  Relation left = 1;
+
+  // (Required) Right input relation for a Join.
+  Relation right = 2;
+
+  // (Optional) The join condition.
+  Expression join_condition = 3;
+
+  // (Required) The join type.
+  Join.JoinType join_type = 4;
+}
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala
index 4ec6828d885ab..9de585503a500 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala
@@ -25,6 +25,16 @@ private[sql] trait CloseableIterator[E] extends Iterator[E] with AutoCloseable {
 
     override def close() = self.close()
   }
+
+  override def map[B](f: E => B): CloseableIterator[B] = {
+    new CloseableIterator[B] {
+      override def next(): B = f(self.next())
+
+      override def hasNext: Boolean = self.hasNext
+
+      override def close(): Unit = self.close()
+    }
+  }
 }
 
 private[sql] abstract class WrappedCloseableIterator[E] extends CloseableIterator[E] {
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
index 3aad90e96f8cd..959779b357c2d 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
@@ -41,8 +41,7 @@ private[sql] class SparkResult[T](
     responses: CloseableIterator[proto.ExecutePlanResponse],
     allocator: BufferAllocator,
     encoder: AgnosticEncoder[T],
-    timeZoneId: String,
-    setObservationMetricsOpt: Option[(Long, Row) => Unit] = None)
+    timeZoneId: String)
     extends AutoCloseable { self =>
 
   case class StageInfo(
@@ -122,7 +121,8 @@ private[sql] class SparkResult[T](
     while (!stop && responses.hasNext) {
       val response = responses.next()
 
-      // Collect metrics for this response
+      // Collect **all** metrics for this response, whether or not registered to an Observation
+      // object.
       observedMetrics ++= processObservedMetrics(response.getObservedMetricsList)
 
       // Save and validate operationId
@@ -209,23 +209,7 @@ private[sql] class SparkResult[T](
   private def processObservedMetrics(
       metrics: java.util.List[ObservedMetrics]): Iterable[(String, Row)] = {
     metrics.asScala.map { metric =>
-      assert(metric.getKeysCount == metric.getValuesCount)
-      var schema = new StructType()
-      val values = mutable.ArrayBuilder.make[Any]
-      values.sizeHint(metric.getKeysCount)
-      (0 until metric.getKeysCount).foreach { i =>
-        val key = metric.getKeys(i)
-        val value = LiteralValueProtoConverter.toCatalystValue(metric.getValues(i))
-        schema = schema.add(key, LiteralValueProtoConverter.toDataType(value.getClass))
-        values += value
-      }
-      val row = new GenericRowWithSchema(values.result(), schema)
-      // If the metrics is registered by an Observation object, attach them and unblock any
-      // blocked thread.
-      setObservationMetricsOpt.foreach { setObservationMetrics =>
-        setObservationMetrics(metric.getPlanId, row)
-      }
-      metric.getName -> row
+      metric.getName -> SparkResult.transformObservedMetrics(metric)
     }
   }
 
@@ -387,8 +371,23 @@ private[sql] class SparkResult[T](
   }
 }
 
-private object SparkResult {
+private[sql] object SparkResult {
   private val cleaner: Cleaner = Cleaner.create()
+
+  /** Return value is a Seq of pairs, to preserve the order of values. */
+  private[sql] def transformObservedMetrics(metric: ObservedMetrics): Row = {
+    assert(metric.getKeysCount == metric.getValuesCount)
+    var schema = new StructType()
+    val values = mutable.ArrayBuilder.make[Any]
+    values.sizeHint(metric.getKeysCount)
+    (0 until metric.getKeysCount).foreach { i =>
+      val key = metric.getKeys(i)
+      val value = LiteralValueProtoConverter.toCatalystValue(metric.getValues(i))
+      schema = schema.add(key, LiteralValueProtoConverter.toDataType(value.getClass))
+      values += value
+    }
+    new GenericRowWithSchema(values.result(), schema)
+  }
 }
 
 private[client] class SparkResultCloseable(
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala
index f3abaddb0110b..4618c7e24d4ac 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 import org.apache.spark.sql.connect.client.CloseableIterator
 import org.apache.spark.sql.errors.{CompilationErrors, ExecutionErrors}
 import org.apache.spark.sql.types.Decimal
+import org.apache.spark.unsafe.types.VariantVal
 
 /**
  * Helper class for converting arrow batches into user objects.
@@ -336,6 +337,34 @@ object ArrowDeserializers {
           }
         }
 
+      case (VariantEncoder, StructVectors(struct, vectors)) =>
+        assert(vectors.exists(_.getName == "value"))
+        assert(
+          vectors.exists(field =>
+            field.getName == "metadata" && field.getField.getMetadata
+              .containsKey("variant") && field.getField.getMetadata.get("variant") == "true"))
+        val valueDecoder =
+          deserializerFor(
+            BinaryEncoder,
+            vectors
+              .find(_.getName == "value")
+              .getOrElse(throw CompilationErrors.columnNotFoundError("value")),
+            timeZoneId)
+        val metadataDecoder =
+          deserializerFor(
+            BinaryEncoder,
+            vectors
+              .find(_.getName == "metadata")
+              .getOrElse(throw CompilationErrors.columnNotFoundError("metadata")),
+            timeZoneId)
+        new StructFieldSerializer[VariantVal](struct) {
+          def value(i: Int): VariantVal = {
+            new VariantVal(
+              valueDecoder.get(i).asInstanceOf[Array[Byte]],
+              metadataDecoder.get(i).asInstanceOf[Array[Byte]])
+          }
+        }
+
       case (JavaBeanEncoder(tag, fields), StructVectors(struct, vectors)) =>
         val constructor =
           methodLookup.findConstructor(tag.runtimeClass, MethodType.methodType(classOf[Unit]))
@@ -366,7 +395,7 @@ object ArrowDeserializers {
           override def get(i: Int): Any = codec.decode(deserializer.get(i))
         }
 
-      case (CalendarIntervalEncoder | VariantEncoder | _: UDTEncoder[_], _) =>
+      case (CalendarIntervalEncoder | _: UDTEncoder[_], _) =>
         throw ExecutionErrors.unsupportedDataTypeError(encoder.dataType)
 
       case _ =>
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala
index f8a5c63ac3abe..c01390bf07857 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala
@@ -42,6 +42,7 @@ import org.apache.spark.sql.connect.client.CloseableIterator
 import org.apache.spark.sql.errors.ExecutionErrors
 import org.apache.spark.sql.types.Decimal
 import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.unsafe.types.VariantVal
 
 /**
  * Helper class for converting user objects into arrow batches.
@@ -433,6 +434,22 @@ object ArrowSerializer {
       case (RowEncoder(fields), StructVectors(struct, vectors)) =>
         structSerializerFor(fields, struct, vectors) { (_, i) => r => r.asInstanceOf[Row].get(i) }
 
+      case (VariantEncoder, StructVectors(struct, vectors)) =>
+        assert(vectors.exists(_.getName == "value"))
+        assert(
+          vectors.exists(field =>
+            field.getName == "metadata" && field.getField.getMetadata
+              .containsKey("variant") && field.getField.getMetadata.get("variant") == "true"))
+        new StructSerializer(
+          struct,
+          Seq(
+            new StructFieldSerializer(
+              extractor = (v: Any) => v.asInstanceOf[VariantVal].getValue,
+              serializerFor(BinaryEncoder, struct.getChild("value"))),
+            new StructFieldSerializer(
+              extractor = (v: Any) => v.asInstanceOf[VariantVal].getMetadata,
+              serializerFor(BinaryEncoder, struct.getChild("metadata")))))
+
       case (JavaBeanEncoder(tag, fields), StructVectors(struct, vectors)) =>
         structSerializerFor(fields, struct, vectors) { (field, _) =>
           val getter = methodLookup.findVirtual(
@@ -450,7 +467,7 @@ object ArrowSerializer {
             delegate.write(index, codec.encode(value))
         }
 
-      case (CalendarIntervalEncoder | VariantEncoder | _: UDTEncoder[_], _) =>
+      case (CalendarIntervalEncoder | _: UDTEncoder[_], _) =>
         throw ExecutionErrors.unsupportedDataTypeError(encoder.dataType)
 
       case _ =>
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
index f63692717947a..3577ca228b03e 100644
--- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
@@ -175,16 +175,6 @@ object DataTypeProtoConverter {
             proto.DataType.Decimal.newBuilder().setPrecision(precision).setScale(scale).build())
           .build()
 
-      case s: StringType =>
-        proto.DataType
-          .newBuilder()
-          .setString(
-            proto.DataType.String
-              .newBuilder()
-              .setCollation(CollationFactory.fetchCollation(s.collationId).collationName)
-              .build())
-          .build()
-
       case CharType(length) =>
         proto.DataType
           .newBuilder()
@@ -197,6 +187,17 @@ object DataTypeProtoConverter {
           .setVarChar(proto.DataType.VarChar.newBuilder().setLength(length).build())
           .build()
 
+      // StringType must be matched after CharType and VarcharType
+      case s: StringType =>
+        proto.DataType
+          .newBuilder()
+          .setString(
+            proto.DataType.String
+              .newBuilder()
+              .setCollation(CollationFactory.fetchCollation(s.collationId).collationName)
+              .build())
+          .build()
+
       case DateType => ProtoDataTypes.DateType
 
       case TimestampType => ProtoDataTypes.TimestampType
diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/InvalidCommandInput.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/InvalidCommandInput.scala
new file mode 100644
index 0000000000000..313fe7262a10b
--- /dev/null
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/InvalidCommandInput.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.common
+
+/**
+ * Error thrown when a connect command is not valid.
+ */
+final case class InvalidCommandInput(
+    private val message: String = "",
+    private val cause: Throwable = null)
+    extends Exception(message, cause)
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_collation.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_collation.explain
index a6a251505652a..a6bf9ae2e71c9 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_collation.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_collation.explain
@@ -1,2 +1,2 @@
-Project [UTF8_BINARY AS collation(g)#0]
+Project [SYSTEM.BUILTIN.UTF8_BINARY AS collation(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_csv.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_csv.explain
index ef87c18948b23..89e03c8188232 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_csv.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_csv.explain
@@ -1,2 +1,2 @@
-Project [invoke(CsvToStructsEvaluator(Map(mode -> FAILFAST),StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true)),_corrupt_record,Some(America/Los_Angeles),None).evaluate(g#0)) AS from_csv(g)#0]
+Project [from_csv(StructField(id,LongType,true), StructField(a,IntegerType,true), StructField(b,DoubleType,true), (mode,FAILFAST), g#0, Some(America/Los_Angeles), None) AS from_csv(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain
index 9bc33b3b97d2c..8d1d122d156ff 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain
@@ -1,2 +1,2 @@
-Project [invoke(JsonToStructsEvaluator(Map(),StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true)),_corrupt_record,Some(America/Los_Angeles),false).evaluate(g#0)) AS from_json(g)#0]
+Project [from_json(StructField(id,LongType,true), StructField(a,IntegerType,true), StructField(b,DoubleType,true), g#0, Some(America/Los_Angeles), false) AS from_json(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_orphaned.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_orphaned.explain
index 9bc33b3b97d2c..8d1d122d156ff 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_orphaned.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_orphaned.explain
@@ -1,2 +1,2 @@
-Project [invoke(JsonToStructsEvaluator(Map(),StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true)),_corrupt_record,Some(America/Los_Angeles),false).evaluate(g#0)) AS from_json(g)#0]
+Project [from_json(StructField(id,LongType,true), StructField(a,IntegerType,true), StructField(b,DoubleType,true), g#0, Some(America/Los_Angeles), false) AS from_json(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_with_json_schema.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_with_json_schema.explain
index 9bc33b3b97d2c..8d1d122d156ff 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_with_json_schema.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_with_json_schema.explain
@@ -1,2 +1,2 @@
-Project [invoke(JsonToStructsEvaluator(Map(),StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true)),_corrupt_record,Some(America/Los_Angeles),false).evaluate(g#0)) AS from_json(g)#0]
+Project [from_json(StructField(id,LongType,true), StructField(a,IntegerType,true), StructField(b,DoubleType,true), g#0, Some(America/Los_Angeles), false) AS from_json(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain
index 928dd0bf85cc7..4491b6166afae 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain
@@ -1,2 +1,2 @@
-Project [id#0L, id#0L, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, [8,6] AS ARRAY(8, 6)#0, null AS NULL#0, 2020-10-10 AS DATE '2020-10-10'#0, 8.997620 AS 8.997620#0, 2023-02-23 04:31:59.808 AS TIMESTAMP '2023-02-23 04:31:59.808'#0, 1969-12-31 16:00:12.345 AS TIMESTAMP '1969-12-31 16:00:12.345'#0, 2023-02-23 20:36:00 AS TIMESTAMP_NTZ '2023-02-23 20:36:00'#0, 2023-02-23 AS DATE '2023-02-23'#0, ... 3 more fields]
+Project [id#0L, id#0L, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, [8,6] AS ARRAY(8, 6)#0, null AS NULL#0, 2020-10-10 AS DATE '2020-10-10'#0, 8.997620 AS 8.997620#0, 2023-02-23 04:31:59.808 AS TIMESTAMP '2023-02-23 04:31:59.808'#0, 1969-12-31 16:00:12.345 AS TIMESTAMP '1969-12-31 16:00:12.345'#0, 2023-02-23 20:36:00 AS TIMESTAMP_NTZ '2023-02-23 20:36:00'#0, 2023-02-23 AS DATE '2023-02-23'#0, INTERVAL '0 00:03:20' DAY TO SECOND AS INTERVAL '0 00:03:20' DAY TO SECOND#0, ... 2 more fields]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain
index b400aeeca5af2..d75545d8766d0 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain
@@ -1,2 +1,2 @@
-Project [static_invoke(JsonExpressionEvalUtils.schemaOfJson(com.fasterxml.jackson.core.JsonFactory, org.apache.spark.sql.catalyst.json.JSONOptions, org.apache.spark.sql.catalyst.json.JsonInferSchema, [{"col":01}])) AS schema_of_json([{"col":01}])#0]
+Project [invoke(SchemaOfJsonEvaluator(Map()).evaluate([{"col":01}])) AS schema_of_json([{"col":01}])#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain
index b400aeeca5af2..37321af1deed4 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain
@@ -1,2 +1,2 @@
-Project [static_invoke(JsonExpressionEvalUtils.schemaOfJson(com.fasterxml.jackson.core.JsonFactory, org.apache.spark.sql.catalyst.json.JSONOptions, org.apache.spark.sql.catalyst.json.JsonInferSchema, [{"col":01}])) AS schema_of_json([{"col":01}])#0]
+Project [invoke(SchemaOfJsonEvaluator(Map(allowNumericLeadingZeros -> true)).evaluate([{"col":01}])) AS schema_of_json([{"col":01}])#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_add.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_add.explain
index 36dde1393cdb2..4b46e8453a1c0 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_add.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_add.explain
@@ -1,2 +1,2 @@
-Project [timestampadd(week, cast(x#0L as int), t#0, Some(America/Los_Angeles)) AS timestampadd(week, x, t)#0]
+Project [timestampadd(week, x#0L, t#0, Some(America/Los_Angeles)) AS timestampadd(week, x, t)#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_date_with_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_date_with_format.explain
index 3557274e9de8d..51270c147549e 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_date_with_format.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_date_with_format.explain
@@ -1,2 +1,2 @@
-Project [cast(gettimestamp(s#0, yyyy-MM-dd, TimestampType, Some(America/Los_Angeles), false) as date) AS to_date(s, yyyy-MM-dd)#0]
+Project [cast(gettimestamp(s#0, yyyy-MM-dd, TimestampType, try_to_date, Some(America/Los_Angeles), false) as date) AS to_date(s, yyyy-MM-dd)#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ltz_with_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ltz_with_format.explain
index e212c8d51a62f..e66fdba89e0ff 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ltz_with_format.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ltz_with_format.explain
@@ -1,2 +1,2 @@
-Project [gettimestamp(g#0, g#0, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp_ltz(g, g)#0]
+Project [gettimestamp(g#0, g#0, TimestampType, try_to_timestamp, Some(America/Los_Angeles), false) AS to_timestamp_ltz(g, g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ntz_with_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ntz_with_format.explain
index 10ca240877fe1..f133becf78237 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ntz_with_format.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ntz_with_format.explain
@@ -1,2 +1,2 @@
-Project [gettimestamp(g#0, g#0, TimestampNTZType, Some(America/Los_Angeles), false) AS to_timestamp_ntz(g, g)#0]
+Project [gettimestamp(g#0, g#0, TimestampNTZType, try_to_timestamp, Some(America/Los_Angeles), false) AS to_timestamp_ntz(g, g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_with_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_with_format.explain
index 54e1c0348a3a9..514b6705fa8e2 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_with_format.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_with_format.explain
@@ -1,2 +1,2 @@
-Project [gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS)#0]
+Project [gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, try_to_timestamp, Some(America/Los_Angeles), false) AS to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS)#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp.explain
index 8074beab7db81..c4dd956e83427 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp.explain
@@ -1,2 +1,2 @@
-Project [gettimestamp(g#0, g#0, TimestampType, Some(America/Los_Angeles), false) AS try_to_timestamp(g, g)#0]
+Project [gettimestamp(g#0, g#0, TimestampType, try_to_timestamp, Some(America/Los_Angeles), false) AS try_to_timestamp(g, g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
index 606bb694bad47..6d854da250fcc 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
@@ -1,2 +1,2 @@
-Project [id#0L, id#0L, 1 AS 1#0, null AS NULL#0, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, [8,6] AS ARRAY(8, 6)#0, null AS NULL#0, 2020-10-10 AS DATE '2020-10-10'#0, 8.997620 AS 8.997620#0, 2023-02-23 04:31:59.808 AS TIMESTAMP '2023-02-23 04:31:59.808'#0, 1969-12-31 16:00:12.345 AS TIMESTAMP '1969-12-31 16:00:12.345'#0, ... 19 more fields]
+Project [id#0L, id#0L, 1 AS 1#0, null AS NULL#0, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, [8,6] AS ARRAY(8, 6)#0, null AS NULL#0, 2020-10-10 AS DATE '2020-10-10'#0, 8.997620 AS 8.997620#0, 2023-02-23 04:31:59.808 AS TIMESTAMP '2023-02-23 04:31:59.808'#0, 1969-12-31 16:00:12.345 AS TIMESTAMP '1969-12-31 16:00:12.345'#0, 2023-02-23 20:36:00 AS TIMESTAMP_NTZ '2023-02-23 20:36:00'#0, ... 18 more fields]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_date.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_date.explain
index a1934253d93bd..7ac1d31802baf 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_date.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_date.explain
@@ -1,2 +1,2 @@
-Project [unix_date(cast(gettimestamp(s#0, yyyy-MM-dd, TimestampType, Some(America/Los_Angeles), false) as date)) AS unix_date(to_date(s, yyyy-MM-dd))#0]
+Project [unix_date(cast(gettimestamp(s#0, yyyy-MM-dd, TimestampType, try_to_date, Some(America/Los_Angeles), false) as date)) AS unix_date(to_date(s, yyyy-MM-dd))#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_micros.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_micros.explain
index fb5cdd36f9b70..e5337b0f6c499 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_micros.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_micros.explain
@@ -1,2 +1,2 @@
-Project [unix_micros(gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, Some(America/Los_Angeles), false)) AS unix_micros(to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS))#0L]
+Project [unix_micros(gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, try_to_timestamp, Some(America/Los_Angeles), false)) AS unix_micros(to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS))#0L]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_millis.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_millis.explain
index 3382c9ed679c5..5c852467a3507 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_millis.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_millis.explain
@@ -1,2 +1,2 @@
-Project [unix_millis(gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, Some(America/Los_Angeles), false)) AS unix_millis(to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS))#0L]
+Project [unix_millis(gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, try_to_timestamp, Some(America/Los_Angeles), false)) AS unix_millis(to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS))#0L]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_seconds.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_seconds.explain
index d21c368869732..03d4386edda71 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_seconds.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_seconds.explain
@@ -1,2 +1,2 @@
-Project [unix_seconds(gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, Some(America/Los_Angeles), false)) AS unix_seconds(to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS))#0L]
+Project [unix_seconds(gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, try_to_timestamp, Some(America/Los_Angeles), false)) AS unix_seconds(to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS))#0L]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath.explain
index d9e2e55d9b12e..4752e5218bb12 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath.explain
@@ -1,2 +1,2 @@
-Project [xpath(s#0, a/b/text()) AS xpath(s, a/b/text())#0]
+Project [invoke(XPathListEvaluator(a/b/text()).evaluate(s#0)) AS xpath(s, a/b/text())#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_boolean.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_boolean.explain
index 9b75f81802467..b537366736d25 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_boolean.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_boolean.explain
@@ -1,2 +1,2 @@
-Project [xpath_boolean(s#0, a/b) AS xpath_boolean(s, a/b)#0]
+Project [invoke(XPathBooleanEvaluator(a/b).evaluate(s#0)) AS xpath_boolean(s, a/b)#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_double.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_double.explain
index 9ce47136df242..76e0b01721841 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_double.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_double.explain
@@ -1,2 +1,2 @@
-Project [xpath_double(s#0, a/b) AS xpath_double(s, a/b)#0]
+Project [invoke(XPathDoubleEvaluator(a/b).evaluate(s#0)) AS xpath_double(s, a/b)#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_float.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_float.explain
index 02b29ec4afa9c..21aebb357928f 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_float.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_float.explain
@@ -1,2 +1,2 @@
-Project [xpath_float(s#0, a/b) AS xpath_float(s, a/b)#0]
+Project [invoke(XPathFloatEvaluator(a/b).evaluate(s#0)) AS xpath_float(s, a/b)#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_int.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_int.explain
index cdd56eaa73199..eee74472b1cff 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_int.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_int.explain
@@ -1,2 +1,2 @@
-Project [xpath_int(s#0, a/b) AS xpath_int(s, a/b)#0]
+Project [invoke(XPathIntEvaluator(a/b).evaluate(s#0)) AS xpath_int(s, a/b)#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_long.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_long.explain
index 3acefb13d0f8c..8356c2c8e18c1 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_long.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_long.explain
@@ -1,2 +1,2 @@
-Project [xpath_long(s#0, a/b) AS xpath_long(s, a/b)#0L]
+Project [invoke(XPathLongEvaluator(a/b).evaluate(s#0)) AS xpath_long(s, a/b)#0L]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_number.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_number.explain
index 0a30685f0c6d2..bc32d4fefffb8 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_number.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_number.explain
@@ -1,2 +1,2 @@
-Project [xpath_number(s#0, a/b) AS xpath_number(s, a/b)#0]
+Project [invoke(XPathDoubleEvaluator(a/b).evaluate(s#0)) AS xpath_number(s, a/b)#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_short.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_short.explain
index ed440972bf490..e0ba76b3acd0e 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_short.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_short.explain
@@ -1,2 +1,2 @@
-Project [xpath_short(s#0, a/b) AS xpath_short(s, a/b)#0]
+Project [invoke(XPathShortEvaluator(a/b).evaluate(s#0)) AS xpath_short(s, a/b)#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_string.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_string.explain
index f4103f68c3bc3..80f2600e6cdd4 100644
--- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_string.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_string.explain
@@ -1,2 +1,2 @@
-Project [xpath_string(s#0, a/b) AS xpath_string(s, a/b)#0]
+Project [invoke(XPathStringEvaluator(a/b).evaluate(s#0)) AS xpath_string(s, a/b)#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_add.json b/sql/connect/common/src/test/resources/query-tests/queries/column_add.json
index cfa40fac8c6f9..3b8219884aa0b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_add.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_add.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_add.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_add.proto.bin
index 10b410b5b08b5..9b9889cf775fe 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_add.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_add.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_and.json b/sql/connect/common/src/test/resources/query-tests/queries/column_and.json
index d3f8cd0e73cbc..2fa4c654cce1d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_and.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_and.json
@@ -25,7 +25,8 @@
               "literal": {
                 "integer": 10
               }
-            }]
+            }],
+            "isInternal": false
           }
         }, {
           "unresolvedFunction": {
@@ -38,9 +39,11 @@
               "literal": {
                 "double": 0.5
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_and.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_and.proto.bin
index 241f1a9303b2c..99111eba7191c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_and.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_and.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_between.json b/sql/connect/common/src/test/resources/query-tests/queries/column_between.json
index 20927b93d8438..de970b1cdf343 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_between.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_between.json
@@ -25,7 +25,8 @@
               "literal": {
                 "integer": 10
               }
-            }]
+            }],
+            "isInternal": false
           }
         }, {
           "unresolvedFunction": {
@@ -38,9 +39,11 @@
               "literal": {
                 "integer": 20
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_between.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_between.proto.bin
index d03dd02a2f36a..324e26b35750a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_between.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_between.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.json b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.json
index bd3ac671fca33..71f6d6b3ec3ab 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 255
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.proto.bin
index 4815bc7dd1a20..13e798120873a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.json b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.json
index eaa27ffa46164..e2771dc543b12 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 7
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.proto.bin
index 9cf110da4ad61..f823efd7551a1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.json b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.json
index c51eb3140c339..108d66745d34d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 78
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.proto.bin
index 70c61f9620576..994283d7a9431 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_contains.json b/sql/connect/common/src/test/resources/query-tests/queries/column_contains.json
index 05d6ccf38b367..d8aef66bdf546 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_contains.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_contains.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "baz"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_contains.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_contains.proto.bin
index 9c796f9470c31..1126c759b0332 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_contains.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_contains.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_divide.json b/sql/connect/common/src/test/resources/query-tests/queries/column_divide.json
index 8d71061b151ca..be7a5f3c851d1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_divide.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_divide.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_divide.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_divide.proto.bin
index 49b5d8d2590dd..22b010a97dd58 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_divide.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_divide.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_endsWith.json b/sql/connect/common/src/test/resources/query-tests/queries/column_endsWith.json
index f4171c2792fbd..5ee6cfe40b1ec 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_endsWith.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_endsWith.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "suffix_"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_endsWith.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_endsWith.proto.bin
index 03f41a339f00c..3f3db0c90bc19 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_endsWith.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_endsWith.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.json b/sql/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.json
index eea1da49bc59e..44e11ad2b8942 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.proto.bin
index 22de941ad44b0..0614560048a9b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_equals.json b/sql/connect/common/src/test/resources/query-tests/queries/column_equals.json
index 7397f4fb46acd..1f05b249eb00b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_equals.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_equals.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_equals.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_equals.proto.bin
index e226de59ddcd4..cad0e9b14a814 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_equals.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_equals.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_geq.json b/sql/connect/common/src/test/resources/query-tests/queries/column_geq.json
index 9f24bc251739f..4c7f5339409f0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_geq.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_geq.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_geq.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_geq.proto.bin
index 1c4af866109ab..a68ee6cc8b6a7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_geq.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_geq.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_gt.json b/sql/connect/common/src/test/resources/query-tests/queries/column_gt.json
index 4bb8fb41f249d..74be85e709ed5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_gt.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_gt.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_gt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_gt.proto.bin
index 44ca37fbb4048..e85f5a3e23552 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_gt.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_gt.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_ilike.json b/sql/connect/common/src/test/resources/query-tests/queries/column_ilike.json
index 47c1b63abe319..60a62c4595ac8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_ilike.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_ilike.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "%fOb%"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_ilike.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_ilike.proto.bin
index 285400db7daf5..368bebd9ea48d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_ilike.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_ilike.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_isNaN.json b/sql/connect/common/src/test/resources/query-tests/queries/column_isNaN.json
index f594918ed930a..12d3d19d7797a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_isNaN.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_isNaN.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_isNaN.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_isNaN.proto.bin
index 1030abda5b8c2..8c2fad75be346 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_isNaN.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_isNaN.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_isNotNull.json b/sql/connect/common/src/test/resources/query-tests/queries/column_isNotNull.json
index f34d3f4eac552..6af0e5bfdb4e3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_isNotNull.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_isNotNull.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_isNotNull.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_isNotNull.proto.bin
index e8cccdf024934..cdc382e44ee22 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_isNotNull.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_isNotNull.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_isNull.json b/sql/connect/common/src/test/resources/query-tests/queries/column_isNull.json
index 74e990622a3a7..a6ac6534ecd55 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_isNull.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_isNull.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_isNull.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_isNull.proto.bin
index 8fc24a9e21b38..0ea4d6f2ffe89 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_isNull.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_isNull.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_isin.json b/sql/connect/common/src/test/resources/query-tests/queries/column_isin.json
index d8811a4e780b5..b34abc941cce8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_isin.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_isin.json
@@ -30,7 +30,8 @@
           "literal": {
             "string": "foo"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_isin.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_isin.proto.bin
index 365e07f35bb48..cde6686dd4064 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_isin.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_isin.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_leq.json b/sql/connect/common/src/test/resources/query-tests/queries/column_leq.json
index cda8694c0439e..55388f667c448 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_leq.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_leq.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_leq.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_leq.proto.bin
index e8463292e4040..692ccad0aa9c3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_leq.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_leq.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_like.json b/sql/connect/common/src/test/resources/query-tests/queries/column_like.json
index 1390451af55ab..be7b4e8dfeb34 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_like.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_like.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "%bob%"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_like.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_like.proto.bin
index 07382ec1643cb..a1856511eb1b2 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_like.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_like.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_lt.json b/sql/connect/common/src/test/resources/query-tests/queries/column_lt.json
index c927e75de181b..1264a0e43a54a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_lt.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_lt.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_lt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_lt.proto.bin
index f4c3a110b126b..083c8d46611d1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_lt.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_lt.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_modulo.json b/sql/connect/common/src/test/resources/query-tests/queries/column_modulo.json
index 0c5a78eea2dff..d11494f159a5a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_modulo.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_modulo.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 10
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_modulo.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_modulo.proto.bin
index 55bfeba04ed66..a86b5e5de63e4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_modulo.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_modulo.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_multiply.json b/sql/connect/common/src/test/resources/query-tests/queries/column_multiply.json
index 8c17581c67d1c..7b12a8850d5f2 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_multiply.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_multiply.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_multiply.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_multiply.proto.bin
index 8fd1b3941d1f7..b4958d84c5a05 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_multiply.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_multiply.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_not.json b/sql/connect/common/src/test/resources/query-tests/queries/column_not.json
index 2f873196ba1d0..3fa58e874d75d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_not.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_not.json
@@ -18,7 +18,8 @@
           "literal": {
             "boolean": true
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_not.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_not.proto.bin
index 19609b6ee85a5..51ea1c4d20bc7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_not.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_not.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_not_equals.json b/sql/connect/common/src/test/resources/query-tests/queries/column_not_equals.json
index 589d57a18b94b..093770f4563be 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_not_equals.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_not_equals.json
@@ -25,9 +25,11 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "b"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_not_equals.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_not_equals.proto.bin
index cdf0b4290e61e..cc13a11b48013 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_not_equals.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_not_equals.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_or.json b/sql/connect/common/src/test/resources/query-tests/queries/column_or.json
index ae1424f763feb..324bfc850d2dc 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_or.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_or.json
@@ -25,7 +25,8 @@
               "literal": {
                 "integer": 10
               }
-            }]
+            }],
+            "isInternal": false
           }
         }, {
           "unresolvedFunction": {
@@ -38,9 +39,11 @@
               "literal": {
                 "double": 0.5
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_or.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_or.proto.bin
index 69f219e938a4e..a52ba0707a755 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_or.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_or.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_rlike.json b/sql/connect/common/src/test/resources/query-tests/queries/column_rlike.json
index e53403db41cd0..6ff88b1ea6560 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_rlike.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_rlike.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "^[0-9]*$"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_rlike.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_rlike.proto.bin
index 7dd56baf04213..b4cd080c61aad 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_rlike.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_rlike.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_startsWith.json b/sql/connect/common/src/test/resources/query-tests/queries/column_startsWith.json
index 431e13d818639..9a9036b3cf963 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_startsWith.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_startsWith.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "prefix_"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_startsWith.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_startsWith.proto.bin
index fa1132c73de7b..366011b3c3968 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_startsWith.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_startsWith.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_substr.json b/sql/connect/common/src/test/resources/query-tests/queries/column_substr.json
index 3b02117cc6e5b..5beaf7c4c3711 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_substr.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_substr.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 3
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_substr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_substr.proto.bin
index 636a46a480626..5eedae63ea95c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_substr.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_substr.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_subtract.json b/sql/connect/common/src/test/resources/query-tests/queries/column_subtract.json
index d15c2941ee1bd..68faab9cbb05a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_subtract.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_subtract.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_subtract.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_subtract.proto.bin
index f5716427588ed..2a341fb5201ed 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_subtract.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_subtract.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_unary_minus.json b/sql/connect/common/src/test/resources/query-tests/queries/column_unary_minus.json
index 0db558e49e38c..b4c76e2c6719b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_unary_minus.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_unary_minus.json
@@ -18,7 +18,8 @@
           "literal": {
             "integer": 1
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_unary_minus.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_unary_minus.proto.bin
index 66343bea4e29b..53277e9dd2452 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_unary_minus.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_unary_minus.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.json b/sql/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.json
index db2ceccfd22ab..f2223c20e569c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.json
@@ -25,7 +25,8 @@
               "literal": {
                 "integer": 10
               }
-            }]
+            }],
+            "isInternal": false
           }
         }, {
           "literal": {
@@ -42,7 +43,8 @@
               "literal": {
                 "integer": 20
               }
-            }]
+            }],
+            "isInternal": false
           }
         }, {
           "literal": {
@@ -52,7 +54,8 @@
           "literal": {
             "string": "high"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.proto.bin
index 031c3683c5e6d..e22f469ea2490 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/cube_column.json b/sql/connect/common/src/test/resources/query-tests/queries/cube_column.json
index 5b9709ff06576..b4d884568354b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/cube_column.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/cube_column.json
@@ -30,7 +30,8 @@
               "literal": {
                 "integer": 1
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "name": ["count"]
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/cube_column.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/cube_column.proto.bin
index d46e40b39dcfe..99a704c0c7f07 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/cube_column.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/cube_column.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/cube_string.json b/sql/connect/common/src/test/resources/query-tests/queries/cube_string.json
index 03625861d88f2..1c9fb7b264664 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/cube_string.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/cube_string.json
@@ -32,7 +32,8 @@
               "literal": {
                 "integer": 1
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "name": ["count"]
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin
index 59c7a55571201..2e092aa640278 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/filter.json b/sql/connect/common/src/test/resources/query-tests/queries/filter.json
index 1046e1262150e..d40f8031884a5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/filter.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/filter.json
@@ -22,7 +22,8 @@
           "literal": {
             "long": "10"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/filter.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/filter.proto.bin
index 069171ead3233..36bb753fff234 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/filter.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/filter.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.json b/sql/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.json
index 662aa746af243..adbc647c186de 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.json
@@ -41,9 +41,11 @@
               "literal": {
                 "string": "zstandard"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.proto.bin
index 5da5c48b41153..eba3a4648ca60 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.json b/sql/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.json
index da2840f2d3a0b..0ef3262f1eb4a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "{\"type\": \"string\", \"name\": \"name\"}"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.proto.bin
index 4dd12e2dbe1dd..629804e8608aa 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.json b/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.json
index 375c0f9324c3f..fe2efd928ccf4 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.json
@@ -26,7 +26,8 @@
           "literal": {
             "binary": "CvwBCgxjb21tb24ucHJvdG8SDXNwYXJrLmNvbm5lY3QisAEKDFN0b3JhZ2VMZXZlbBIZCgh1c2VfZGlzaxgBIAEoCFIHdXNlRGlzaxIdCgp1c2VfbWVtb3J5GAIgASgIUgl1c2VNZW1vcnkSIAoMdXNlX29mZl9oZWFwGAMgASgIUgp1c2VPZmZIZWFwEiIKDGRlc2VyaWFsaXplZBgEIAEoCFIMZGVzZXJpYWxpemVkEiAKC3JlcGxpY2F0aW9uGAUgASgFUgtyZXBsaWNhdGlvbkIiCh5vcmcuYXBhY2hlLnNwYXJrLmNvbm5lY3QucHJvdG9QAWIGcHJvdG8z"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.proto.bin
index 07d4c6c5b286f..db667ef8ee6b2 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.json b/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.json
index db9371b64ef72..93974afec3566 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.json
@@ -37,9 +37,11 @@
               "literal": {
                 "string": "2"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.proto.bin
index 00fd58da6be84..a7262d64522c6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_abs.json b/sql/connect/common/src/test/resources/query-tests/queries/function_abs.json
index 13df3437ddabe..aa589275670b8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_abs.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_abs.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_abs.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_abs.proto.bin
index 86cfbc09a8f91..6bda0e50aa837 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_abs.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_abs.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_acos.json b/sql/connect/common/src/test/resources/query-tests/queries/function_acos.json
index 7506c0f6cb630..82543692456c1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_acos.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_acos.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_acos.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_acos.proto.bin
index cc6a279cb188e..98bc0d821d7d0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_acos.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_acos.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_acosh.json b/sql/connect/common/src/test/resources/query-tests/queries/function_acosh.json
index 6a83b4ab008bc..82a69e9f74166 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_acosh.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_acosh.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_acosh.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_acosh.proto.bin
index e16ed2ba92e3f..48c57cb1c9f9b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_acosh.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_acosh.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_add_months.json b/sql/connect/common/src/test/resources/query-tests/queries/function_add_months.json
index b1b2e78a08435..97b9a00d5ea3e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_add_months.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_add_months.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_add_months.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_add_months.proto.bin
index 6abacc9cc2b40..c396f24928cf5 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_add_months.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_add_months.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json
index 4204a44b44ce0..28beb401cd650 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin
index f635e1fc689b1..40687059a8c45 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json
index 9c630e1253494..0436dd1a60c85 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin
index 41d024cdb7eed..8e9a324c2fde5 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json
index 8f5be474ab4b3..56ad10f6f74bf 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin
index cd6764581f2ca..97091b52e6c59 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json
index 9381042b71886..305cf0b317a23 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json
@@ -34,7 +34,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin
index ca789f04ce1d4..0d4c98e59ce0a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.json
index 06469d4840547..89d07a44e8440 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.proto.bin
index c7a70b51707f3..0089323b6bbe1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.json
index 7eb9b4ed8b4ed..afef2dba9aad9 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.proto.bin
index ecd81ae44fcbd..3d89f200e609c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.json
index 59a6a5e35fd42..8617d2d9d928a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.proto.bin
index 9de01ddc5ea69..3888e9a1d075d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.json
index 285c67289d30a..b8b8e66787848 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.json
@@ -34,7 +34,8 @@
           "literal": {
             "binary": "Q0RF"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.proto.bin
index 812426f3c00d3..0ff56c7c74372 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.json
index eb0e178fd3534..cb790e822a52f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.json
@@ -38,7 +38,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.proto.bin
index ee39beb07cee4..7ff11b9868b99 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.json
index df1813aed64c5..8e113b8874a5f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.json
@@ -35,7 +35,8 @@
                   "unresolvedNamedLambdaVariable": {
                     "nameParts": ["y_2"]
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
@@ -55,7 +56,8 @@
               "nameParts": ["x_3"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin
index c43f4e6dbbc1b..12456f54ab438 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.json
index 956b42db65639..b46810e63a304 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.json
@@ -35,7 +35,8 @@
                   "unresolvedNamedLambdaVariable": {
                     "nameParts": ["y_2"]
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
@@ -57,14 +58,16 @@
                   "literal": {
                     "integer": 2
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
               "nameParts": ["x_3"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.proto.bin
index cf32ea4ddd3e7..70fbe778cb715 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_any.json b/sql/connect/common/src/test/resources/query-tests/queries/function_any.json
index 4512c060d703b..73332625c2995 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_any.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_any.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "flag"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_any.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_any.proto.bin
index 9b014b58da57c..ff038010b22c2 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_any.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_any.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.json
index 011d43a91d080..35cc2daade7a8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.json
@@ -22,7 +22,8 @@
           "literal": {
             "boolean": true
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.proto.bin
index 546c696ecfdf3..29c9a0d93723d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.json
index 7d4f5a2de38e8..9307c56feb4af 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.proto.bin
index 124a7ad7efe09..fc2e75c2015b9 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.json b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.json
index 5579faf119647..773fd7df0ab30 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.proto.bin
index bac82f670b298..9c7bd463ab943 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.json b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.json
index 851862082ca04..0224877077bc5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.json
@@ -22,7 +22,8 @@
           "literal": {
             "double": 0.1
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.proto.bin
index fd61420fd1e45..79b285eb0a28f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.json b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.json
index 490a2dcd86967..925f95e618930 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 20
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.proto.bin
index ae73716fa4319..478af7d5ff53a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array.json
index 20fe495bb9bf4..99152d4e998d1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array.proto.bin
index 2b679eb4c6db1..25fd26ed8ec81 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_agg.json
index a3197ce95068a..0e4e0fe4dd504 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_agg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_agg.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_agg.proto.bin
index c7306df86214e..4b30c105a1078 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_agg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_agg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_append.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_append.json
index cabd44c063dec..b6af59d5a1cc1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_append.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_append.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 1
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_append.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_append.proto.bin
index 76f2f0255bf25..3e2ac2115ff67 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_append.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_append.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_compact.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_compact.json
index c3ebf313190c2..93b449217eb51 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_compact.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_compact.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "e"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_compact.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_compact.proto.bin
index 949d66cb951f0..5c244efd0258f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_compact.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_compact.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_contains.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_contains.json
index a362d66d9d64d..349927b7cfd9e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_contains.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_contains.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 1
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_contains.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_contains.proto.bin
index d8764f60364c2..af333721d6944 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_contains.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_contains.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_distinct.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_distinct.json
index d38f4194bcd2b..00b65fc1665d5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_distinct.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_distinct.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "e"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_distinct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_distinct.proto.bin
index e6359c074bf23..98dfa75ac01b3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_distinct.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_distinct.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_except.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_except.json
index 17d50c87161d6..81ed93a29524d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_except.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_except.json
@@ -33,9 +33,11 @@
               "literal": {
                 "integer": 4
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_except.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_except.proto.bin
index 692511b2f74a6..1037a1ffe38db 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_except.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_except.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_insert.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_insert.json
index f4540edbf4108..5bd114b61ad40 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_insert.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_insert.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 1
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_insert.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_insert.proto.bin
index 6e2178ad124e9..a44ca96ace24a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_insert.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_insert.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_intersect.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_intersect.json
index 1b95a6724f86d..daa94e5aed678 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_intersect.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_intersect.json
@@ -29,9 +29,11 @@
               "literal": {
                 "integer": 4
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_intersect.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_intersect.proto.bin
index 67fb497cf270c..dca31097549b8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_intersect.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_intersect.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_join.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_join.json
index 94e8c176cefbf..cbecc842d0b2c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_join.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_join.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": ";"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_join.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_join.proto.bin
index fbab1b208605d..0b2959acac448 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_join.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_join.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.json
index ad580c33e476c..c5c5dce8cda9e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.json
@@ -26,7 +26,8 @@
           "literal": {
             "string": "null"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.proto.bin
index e3fb6b3bf67c3..f0045cbeb529e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_max.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_max.json
index ba67984758a5a..2bf3706f92b88 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_max.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_max.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "e"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_max.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_max.proto.bin
index f7a98c08cd175..b8009a1fb6193 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_max.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_max.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_min.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_min.json
index a342ae18f9ef7..6485fb4cf3691 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_min.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_min.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "e"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_min.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_min.proto.bin
index 02cfdfeb215d6..45ac77d97e837 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_min.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_min.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_position.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_position.json
index 4c212cb028273..20c7794a7de6b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_position.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_position.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 10
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_position.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_position.proto.bin
index 4ef2b11273f25..ee2811ae0de0f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_position.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_position.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_prepend.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_prepend.json
index ededeb015a227..ff6bd2b0e33fc 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_prepend.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_prepend.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 1
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_prepend.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_prepend.proto.bin
index 837710597e7b6..8ad00dfca7a04 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_prepend.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_prepend.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_remove.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_remove.json
index 8c562247714a4..f769471cd9791 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_remove.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_remove.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 314
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_remove.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_remove.proto.bin
index 95e2872ad77bd..fd44cfb3372ad 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_remove.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_remove.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_repeat.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_repeat.json
index c9d9f1f9ca79d..0d218470c1ec1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_repeat.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_repeat.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 10
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_repeat.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_repeat.proto.bin
index e370db16e977c..6302ae6ee3f0c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_repeat.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_repeat.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_size.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_size.json
index c1c618bc7f11f..ac279580a09e1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_size.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_size.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "e"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_size.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_size.proto.bin
index 47949dfbbda29..97554f7ecc930 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_size.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_size.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort.json
index 406dc54c8cd2f..74a038895b36a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "e"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort.proto.bin
index 2074caae16384..6ac4fd09dc0ca 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json
index f8178ddd64aaf..7a36c03476279 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json
@@ -31,7 +31,8 @@
                   "unresolvedNamedLambdaVariable": {
                     "nameParts": ["y_2"]
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
@@ -40,7 +41,8 @@
               "nameParts": ["y_2"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin
index c506889388c97..77719f7334985 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_union.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_union.json
index 7d54079cdb47e..841888bcb1497 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_array_union.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_union.json
@@ -33,9 +33,11 @@
               "literal": {
                 "integer": 3
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_array_union.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_union.proto.bin
index fc3d9d7cd0fd1..ee743e69702ae 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_array_union.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_union.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.json b/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.json
index ce1d288e00d78..73b49b729edfa 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.json
@@ -29,9 +29,11 @@
               "literal": {
                 "integer": 2
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.proto.bin
index 216f306507d40..b555679123842 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.json b/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.json
index f24ee44835eb4..ce0c0ce75ab9c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.json
@@ -29,9 +29,11 @@
               "literal": {
                 "integer": 20
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.proto.bin
index 67c867e6d450c..c1baf7e190a15 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ascii.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ascii.json
index 3c4dcb70fead3..4c98e62f9dbb9 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ascii.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ascii.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ascii.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ascii.proto.bin
index 5989bd3b5c606..cd46fe994aedd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ascii.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ascii.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_asin.json b/sql/connect/common/src/test/resources/query-tests/queries/function_asin.json
index 4bf89be753458..6d06a28966590 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_asin.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_asin.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_asin.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_asin.proto.bin
index 737ad789da268..6365765a3a45a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_asin.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_asin.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_asinh.json b/sql/connect/common/src/test/resources/query-tests/queries/function_asinh.json
index 238571b0231c6..95900eaf761fd 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_asinh.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_asinh.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_asinh.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_asinh.proto.bin
index 01ea4675b22eb..f23beb2848fae 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_asinh.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_asinh.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.json b/sql/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.json
index 5520b70a0250b..83cd89e5b9afa 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.json
@@ -25,13 +25,15 @@
               "literal": {
                 "integer": 0
               }
-            }]
+            }],
+            "isInternal": false
           }
         }, {
           "literal": {
             "string": "id negative!"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.proto.bin
index 6992604efe1b3..923478e910580 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_atan.json b/sql/connect/common/src/test/resources/query-tests/queries/function_atan.json
index 3ae4e7ef188ec..2a873025e6254 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_atan.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_atan.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_atan.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_atan.proto.bin
index b932086941f45..c458d693ca127 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_atan.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_atan.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_atan2.json b/sql/connect/common/src/test/resources/query-tests/queries/function_atan2.json
index 7d08116c40ae6..53a03d1324f25 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_atan2.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_atan2.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_atan2.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_atan2.proto.bin
index 372ae8358494e..25a25871185e7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_atan2.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_atan2.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_atanh.json b/sql/connect/common/src/test/resources/query-tests/queries/function_atanh.json
index 8daec8813917e..f78b9f6421e89 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_atanh.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_atanh.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_atanh.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_atanh.proto.bin
index 0aa2f3527ae9c..eebe635bc33e0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_atanh.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_atanh.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_avg.json b/sql/connect/common/src/test/resources/query-tests/queries/function_avg.json
index b433f1ea89c29..a3d8868671520 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_avg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_avg.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_avg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_avg.proto.bin
index 9d9bd296dbdda..eb1078cd83ba7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_avg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_avg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_base64.json b/sql/connect/common/src/test/resources/query-tests/queries/function_base64.json
index 97739dca283ef..8ba9e38dd538e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_base64.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_base64.json
@@ -26,7 +26,8 @@
               }
             }
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_base64.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_base64.proto.bin
index fc854d974752b..0ab0e038829c3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_base64.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_base64.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bin.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bin.json
index 304e56504bad9..56a5ce889d93d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bin.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bin.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bin.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bin.proto.bin
index e8d55fb8d6149..64790b75bc090 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bin.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bin.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_and.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_and.json
index 83b2bcf599f7c..3d156cec87a17 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_and.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_and.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin
index ad81bec6f0865..af643d406b41e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_count.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_count.json
index d5c6b698f7f16..277244f769144 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_count.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_count.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin
index 875e17d974e21..1eb39a6fb7346 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_get.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_get.json
index 39425c5e3ffbc..9e2b2d901308f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_get.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_get.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 0
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin
index cd0f4098374c0..e3769a795a2f4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_length.json
index df21871cb535d..1f09a52756b80 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_length.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_length.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_length.proto.bin
index 860c2eaec0e85..13617e95afd73 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_length.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_length.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_or.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_or.json
index c8e1b2acfe4e0..be21d9557e076 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_or.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_or.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin
index a52907474fb96..9e4ef6d43a175 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json
index 463e6fc5322f2..250d2d97c5b0b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin
index c4a9a5e654f2b..33f0570e72f10 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.json
index 8956c3f303fea..1e8a60bae7e3d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "id"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.proto.bin
index 8dcf7b1718d4b..849fa465d39db 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.json
index 8956c3f303fea..1e8a60bae7e3d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "id"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.proto.bin
index 8dcf7b1718d4b..849fa465d39db 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.json
index 910702e90e0ed..aac1f57b5d9b6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "id"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.proto.bin
index deae4a3aea072..8c57d776b1d63 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.json
index b2e9f11efbcbd..95095c7c28922 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "bytes"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.proto.bin
index a568b2dae4208..216c51d3c4d29 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.json
index 23e37246647b9..041ed9cb6330f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "bytes"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.proto.bin
index d27edc3766288..41309fc648cda 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.json
index 7ddf73253e0a3..736751212fa96 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.proto.bin
index bfaefb2a20075..5cfb94c1da38f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bool_and.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bool_and.json
index 9b58c898242d9..3e013976a418f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bool_and.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bool_and.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "flag"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bool_and.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bool_and.proto.bin
index 6ea3860027c86..e6e52551364e1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bool_and.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bool_and.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bool_or.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bool_or.json
index 763b019b05f1f..7cf57c57812fd 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bool_or.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bool_or.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "flag"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bool_or.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bool_or.proto.bin
index dd928c8dc8a31..207083a8a12a0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bool_or.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bool_or.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bround.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bround.json
index 585a0befb224d..d42711c424c46 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bround.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bround.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bround.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bround.proto.bin
index 8625ccb1a58f1..40e173d9df4a0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bround.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bround.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_btrim.json b/sql/connect/common/src/test/resources/query-tests/queries/function_btrim.json
index 3f35d627f9a54..8c3ec028f0a54 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_btrim.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_btrim.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_btrim.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_btrim.proto.bin
index 200dac07a0bb7..e2cbbefb551a4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_btrim.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_btrim.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.json b/sql/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.json
index cf0476340ccb3..9f8b77f176f72 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.proto.bin
index d7669c93b2b89..25e48c663b97e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bucket.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bucket.json
index 971660144a5bc..002222d2f7765 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_bucket.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_bucket.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": true
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin
index 1b389401f15e6..874d68be22a53 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.json b/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.json
index 0d78dd471f20c..a4c83ac456721 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.json
@@ -19,7 +19,8 @@
             "unparsedIdentifier": "g"
           }
         }],
-        "isUserDefinedFunction": true
+        "isUserDefinedFunction": true,
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.proto.bin
index aee05767813f9..bde6f21637bea 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_cardinality.json b/sql/connect/common/src/test/resources/query-tests/queries/function_cardinality.json
index e2b3dd0428793..82b5dadd84073 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_cardinality.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_cardinality.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "f"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_cardinality.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_cardinality.proto.bin
index 54c8cfe843433..232a89d09c6e8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_cardinality.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_cardinality.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ceil.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ceil.json
index 5a9961ab47f55..d6f4769366fe7 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ceil.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ceil.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ceil.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ceil.proto.bin
index 3761deb1663a2..2b18da5724b81 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ceil.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ceil.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.json
index bda5e85924c30..7413bb77656f5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.proto.bin
index 8db402ac167e0..a658e6f879f89 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling.json
index 99726305e8524..878003697424f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin
index cc91ac246a57c..57b7616c17930 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json
index c0b0742b12157..c1fdfb5dfad50 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin
index 30efc42b9d2bc..ba54d5d4d1edc 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_char.json b/sql/connect/common/src/test/resources/query-tests/queries/function_char.json
index 593139a0a584a..70f6fbcd475f6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_char.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_char.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_char.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_char.proto.bin
index 21c3dad55657b..df7fb55ab4ab0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_char.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_char.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_char_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_char_length.json
index 3e408260d7020..94323cf8e727e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_char_length.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_char_length.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_char_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_char_length.proto.bin
index 7f290c6ddc623..93096e7ff4051 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_char_length.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_char_length.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_character_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_character_length.json
index ad12dde8a956c..286b1e9d6040f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_character_length.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_character_length.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_character_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_character_length.proto.bin
index f1762971d4eca..12932bdb3087d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_character_length.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_character_length.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_chr.json b/sql/connect/common/src/test/resources/query-tests/queries/function_chr.json
index 28366f87e10d7..a7d551a94c194 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_chr.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_chr.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_chr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_chr.proto.bin
index dc665d294ecb6..7b51a92f63aae 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_chr.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_chr.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_coalesce.json b/sql/connect/common/src/test/resources/query-tests/queries/function_coalesce.json
index 497922b5df75c..523f853552b5e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_coalesce.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_coalesce.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 3
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_coalesce.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_coalesce.proto.bin
index ec871018489c2..6ceb52f4de47e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_coalesce.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_coalesce.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_collate.json b/sql/connect/common/src/test/resources/query-tests/queries/function_collate.json
index 8bb6c2c4c3726..f1af032978d9e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_collate.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_collate.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "UNICODE"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_collate.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_collate.proto.bin
index dda4a00a395b6..17ce43c3de332 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_collate.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_collate.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_collation.json b/sql/connect/common/src/test/resources/query-tests/queries/function_collation.json
index dac04b3b9858f..50ad1268ea9d6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_collation.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_collation.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_collation.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_collation.proto.bin
index 739994a486026..3cb87236be855 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_collation.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_collation.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_collect_list.json b/sql/connect/common/src/test/resources/query-tests/queries/function_collect_list.json
index c5bae4baef352..81210507bc861 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_collect_list.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_collect_list.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_collect_list.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_collect_list.proto.bin
index e3827b9f650ae..a66294b2c475d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_collect_list.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_collect_list.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_collect_set.json b/sql/connect/common/src/test/resources/query-tests/queries/function_collect_set.json
index 615386d050e14..4ebf94c214391 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_collect_set.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_collect_set.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_collect_set.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_collect_set.proto.bin
index 5fb97f27d25b6..abbd935086a33 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_collect_set.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_collect_set.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_concat.json b/sql/connect/common/src/test/resources/query-tests/queries/function_concat.json
index bad1ad6f3b90e..cd3702e86b3dc 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_concat.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_concat.json
@@ -29,7 +29,8 @@
               "literal": {
                 "integer": 2
               }
-            }]
+            }],
+            "isInternal": false
           }
         }, {
           "unresolvedFunction": {
@@ -42,9 +43,11 @@
               "literal": {
                 "integer": 40
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_concat.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_concat.proto.bin
index 7411f55f14747..87076f350fe42 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_concat.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_concat.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_concat_ws.json b/sql/connect/common/src/test/resources/query-tests/queries/function_concat_ws.json
index b9ba89b42185c..9468c641c0a81 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_concat_ws.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_concat_ws.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "id"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_concat_ws.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_concat_ws.proto.bin
index 2fbc4f7090448..34d0bbdeb8794 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_concat_ws.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_concat_ws.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_contains.json b/sql/connect/common/src/test/resources/query-tests/queries/function_contains.json
index b7cb12d9aa9a3..3a10154044dab 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_contains.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_contains.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_contains.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_contains.proto.bin
index 8864968a9dc3b..80eeae9cd02c2 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_contains.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_contains.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_conv.json b/sql/connect/common/src/test/resources/query-tests/queries/function_conv.json
index c6734936bfcd1..9e65edfed6ee6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_conv.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_conv.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 16
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_conv.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_conv.proto.bin
index 373b997b79240..3161da1cbca42 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_conv.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_conv.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.json
index b27d7e2b55fae..6d0b24f2c8c72 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "t"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.proto.bin
index 8ef4e3bdce29e..26e096c4c3f20 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.json
index b072c89d42bd1..40f599741a9d2 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "t"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.proto.bin
index c6d1db9b8fb1d..a7f5317512a32 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_corr.json b/sql/connect/common/src/test/resources/query-tests/queries/function_corr.json
index 6fadb0385622b..c7c44457d4e0d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_corr.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_corr.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_corr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_corr.proto.bin
index fdeeb4fd12d19..5aac92504c0bd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_corr.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_corr.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_cos.json b/sql/connect/common/src/test/resources/query-tests/queries/function_cos.json
index f7072dff03404..f9ee2077f33a0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_cos.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_cos.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_cos.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_cos.proto.bin
index 09fd198b097c0..f7592d6ab63c9 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_cos.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_cos.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_cosh.json b/sql/connect/common/src/test/resources/query-tests/queries/function_cosh.json
index 3bcab61d37a0d..78b0d73b977ab 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_cosh.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_cosh.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_cosh.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_cosh.proto.bin
index 54d5da8fabfa6..5619d48d20397 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_cosh.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_cosh.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_cot.json b/sql/connect/common/src/test/resources/query-tests/queries/function_cot.json
index 62ce963fa8737..1e521c2e6d7f6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_cot.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_cot.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_cot.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_cot.proto.bin
index e79c32660a772..e8ba807d9506a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_cot.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_cot.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_count.json b/sql/connect/common/src/test/resources/query-tests/queries/function_count.json
index 126a0ca242c52..613ba5510828f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_count.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_count.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_count.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_count.proto.bin
index 6c87a809ad0c4..1e9b69069e05d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_count.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_count.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_countDistinct.json b/sql/connect/common/src/test/resources/query-tests/queries/function_countDistinct.json
index eb211ceb239aa..6a844d694ae8f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_countDistinct.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_countDistinct.json
@@ -23,7 +23,8 @@
             "unparsedIdentifier": "g"
           }
         }],
-        "isDistinct": true
+        "isDistinct": true,
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_countDistinct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_countDistinct.proto.bin
index 591e2300ec689..b25c393cea048 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_countDistinct.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_countDistinct.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_count_if.json b/sql/connect/common/src/test/resources/query-tests/queries/function_count_if.json
index 669477bbc5dd8..f19ce9ced1a5d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_count_if.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_count_if.json
@@ -25,9 +25,11 @@
               "literal": {
                 "integer": 0
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_count_if.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_count_if.proto.bin
index 07c65ebaa4293..97306ae5253e4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_count_if.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_count_if.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.json b/sql/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.json
index 94be79dcc33e5..badc965380235 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.json
@@ -30,7 +30,8 @@
           "literal": {
             "integer": 11
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.proto.bin
index 11bcae8062e82..d2bc25513fde3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_count_typed.json b/sql/connect/common/src/test/resources/query-tests/queries/function_count_typed.json
index 1c5df90b79cd1..db67d7e86bd46 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_count_typed.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_count_typed.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_count_typed.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_count_typed.proto.bin
index 44b613eb40c6f..852290d484be3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_count_typed.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_count_typed.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_covar_pop.json b/sql/connect/common/src/test/resources/query-tests/queries/function_covar_pop.json
index 3c4df70a5fbfc..294d055796e08 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_covar_pop.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_covar_pop.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_covar_pop.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_covar_pop.proto.bin
index 4a7202f15e768..09939663ce3bf 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_covar_pop.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_covar_pop.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_covar_samp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_covar_samp.json
index 7c723069e4671..f366b2f62ae50 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_covar_samp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_covar_samp.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_covar_samp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_covar_samp.proto.bin
index ebff687730e35..c8ef18e85fe67 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_covar_samp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_covar_samp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_crc32.json b/sql/connect/common/src/test/resources/query-tests/queries/function_crc32.json
index 1892a9af85d97..7ab879e648a6c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_crc32.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_crc32.json
@@ -26,7 +26,8 @@
               }
             }
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_crc32.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_crc32.proto.bin
index 54ad14dedae4e..07cb4138077d1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_crc32.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_crc32.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_csc.json b/sql/connect/common/src/test/resources/query-tests/queries/function_csc.json
index 88504ed9c5280..68cc4e0096f4b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_csc.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_csc.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_csc.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_csc.proto.bin
index 0ed5022a73adf..8bf716cbab8a1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_csc.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_csc.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_cume_dist.json b/sql/connect/common/src/test/resources/query-tests/queries/function_cume_dist.json
index ac48841199075..a2978d3242ca6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_cume_dist.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_cume_dist.json
@@ -15,7 +15,8 @@
       "window": {
         "windowFunction": {
           "unresolvedFunction": {
-            "functionName": "cume_dist"
+            "functionName": "cume_dist",
+            "isInternal": false
           }
         },
         "partitionSpec": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_cume_dist.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_cume_dist.proto.bin
index 7578245aabe3a..1dea3404d5a55 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_cume_dist.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_cume_dist.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_curdate.json b/sql/connect/common/src/test/resources/query-tests/queries/function_curdate.json
index c344f5271704d..5b49d10fb5c56 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_curdate.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_curdate.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "curdate"
+        "functionName": "curdate",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_curdate.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_curdate.proto.bin
index 6ec58b57c2a7a..b307dc12a647e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_curdate.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_curdate.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_catalog.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_catalog.json
index 27c7b23111908..ff962a03b8053 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_current_catalog.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_current_catalog.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "current_catalog"
+        "functionName": "current_catalog",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_catalog.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_catalog.proto.bin
index bb25a49935482..b1db0f9bd74d8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_current_catalog.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_current_catalog.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_database.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_database.json
index dfa59fd5fe50a..1624c30ce4000 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_current_database.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_current_database.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "current_database"
+        "functionName": "current_database",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_database.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_database.proto.bin
index fdb11c9c8bd06..541891bc4418b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_current_database.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_current_database.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_date.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_date.json
index 6dab8c39d626c..3f3c9c7dba5c0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_current_date.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_current_date.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "current_date"
+        "functionName": "current_date",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_date.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_date.proto.bin
index f32c3f541c4c7..d6b94df786293 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_current_date.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_current_date.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_schema.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_schema.json
index 01d5126a74b39..f8ac56d6398f0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_current_schema.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_current_schema.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "current_schema"
+        "functionName": "current_schema",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_schema.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_schema.proto.bin
index 9687afe89a50b..e083896cdfeae 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_current_schema.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_current_schema.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.json
index 16af5eb9ba084..bea08c2ee6af0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "current_timestamp"
+        "functionName": "current_timestamp",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.proto.bin
index 5a1f3de6c3a9a..8691a0468fa82 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_timezone.json
index 082d7f5bae6f6..d3807c4dbbe92 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_current_timezone.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_current_timezone.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "current_timezone"
+        "functionName": "current_timezone",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_timezone.proto.bin
index a780830516bc0..bf06690e07afc 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_current_timezone.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_current_timezone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_user.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_user.json
index 30ddb80f884c2..f7bf8d9f91509 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_current_user.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_current_user.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "current_user"
+        "functionName": "current_user",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_current_user.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_user.proto.bin
index 0a79078fd7097..df4ea4e2cc4da 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_current_user.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_current_user.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_add.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_add.json
index f81ad3335242c..1c022cdafbc3b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_date_add.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_date_add.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_add.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_add.proto.bin
index f4dbc16b05c1d..d1e296f485cb7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_date_add.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_date_add.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_diff.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_diff.json
index b6094ff8734d6..a4fc25f1bf0db 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_date_diff.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_date_diff.json
@@ -33,9 +33,11 @@
               "literal": {
                 "integer": 10
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_diff.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_diff.proto.bin
index 5621af09474d8..bd37bb76df8e8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_date_diff.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_date_diff.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_format.json
index 9b3d469ed4e98..6ef48b7f015a3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_date_format.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_date_format.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "yyyy-MM-dd"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_format.proto.bin
index 7226c20974b2a..f28036fdcc2d9 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_date_format.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_date_format.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.json
index ada0747743b78..08769597e4d7c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.json
@@ -18,7 +18,8 @@
           "literal": {
             "integer": 10
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.proto.bin
index 5cbd76eda9074..ba9b9047893d7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_part.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_part.json
index 5e8d075c4e2d0..51590b395c19a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_date_part.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_date_part.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_part.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_part.proto.bin
index 368ecb676c1fe..cddd5c1954888 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_date_part.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_date_part.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_sub.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_sub.json
index f1dde0902a20a..2d74935b65010 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_date_sub.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_date_sub.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_sub.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_sub.proto.bin
index 43b630c27ed45..a033be1ab6e41 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_date_sub.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_date_sub.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_trunc.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_trunc.json
index 363da9b9b9006..649c211af8465 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_date_trunc.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_date_trunc.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "minute"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_date_trunc.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_trunc.proto.bin
index f037fb8d34a56..fad72e5dc4eb8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_date_trunc.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_date_trunc.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_dateadd.json b/sql/connect/common/src/test/resources/query-tests/queries/function_dateadd.json
index 2658c724d287a..e8d272bafa922 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_dateadd.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_dateadd.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_dateadd.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_dateadd.proto.bin
index e72a77f0e2394..1dcbd5fc64ca2 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_dateadd.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_dateadd.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_datediff.json b/sql/connect/common/src/test/resources/query-tests/queries/function_datediff.json
index b5ef560486d0d..e9e9dccf52f95 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_datediff.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_datediff.json
@@ -33,9 +33,11 @@
               "literal": {
                 "integer": 10
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_datediff.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_datediff.proto.bin
index 02e917b406838..bbcd3b621848a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_datediff.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_datediff.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_datepart.json b/sql/connect/common/src/test/resources/query-tests/queries/function_datepart.json
index cc4dca8674264..42b21d6e9c831 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_datepart.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_datepart.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_datepart.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_datepart.proto.bin
index 9d58fd3474d70..470e4796515fe 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_datepart.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_datepart.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_day.json b/sql/connect/common/src/test/resources/query-tests/queries/function_day.json
index c1e4b4d13fb7a..712ad56df109d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_day.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_day.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_day.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_day.proto.bin
index e72a4a354c31d..9115ae09ce8fc 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_day.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_day.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_dayname.json b/sql/connect/common/src/test/resources/query-tests/queries/function_dayname.json
index 7898aa53deb89..a27513443fe4a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_dayname.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_dayname.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin
index 2809f3b9b7a11..370737bbc1fb7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.json b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.json
index 3e453c1f7a652..76b0a6e03f27c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.proto.bin
index 3a2973e21e5a0..8a63ee68777e8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_dayofweek.json b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofweek.json
index 74715de151e77..7cea8fdc7745d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_dayofweek.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofweek.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_dayofweek.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofweek.proto.bin
index fceea203c790e..6c9a6252a48ed 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_dayofweek.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofweek.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_dayofyear.json b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofyear.json
index d23c6790a47dd..3579113015448 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_dayofyear.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofyear.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_dayofyear.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofyear.proto.bin
index a526b449ae0a4..ef1d60c231599 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_dayofyear.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofyear.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_days.json b/sql/connect/common/src/test/resources/query-tests/queries/function_days.json
index 9e20c48729a30..b9a73878304b6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_days.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_days.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": true
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin
index b0a8472f8c4ff..b81c13479a36f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_decode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_decode.json
index 6be60808e64f3..2aaae4466b5fc 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_decode.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_decode.json
@@ -30,7 +30,8 @@
           "literal": {
             "string": "UTF-8"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_decode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_decode.proto.bin
index 18b8bbcf6a01d..d71f018e81ee8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_decode.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_decode.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_degrees.json b/sql/connect/common/src/test/resources/query-tests/queries/function_degrees.json
index e096b07e4dc6e..f366faf9ac7c5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_degrees.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_degrees.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_degrees.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_degrees.proto.bin
index e2d264bb2e108..efe6bb3a93cca 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_degrees.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_degrees.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_dense_rank.json b/sql/connect/common/src/test/resources/query-tests/queries/function_dense_rank.json
index 46c5e1eaddfc0..2699a863e9755 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_dense_rank.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_dense_rank.json
@@ -15,7 +15,8 @@
       "window": {
         "windowFunction": {
           "unresolvedFunction": {
-            "functionName": "dense_rank"
+            "functionName": "dense_rank",
+            "isInternal": false
           }
         },
         "partitionSpec": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_dense_rank.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_dense_rank.proto.bin
index 4597e63be8379..bd6861d0514e4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_dense_rank.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_dense_rank.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_e.json b/sql/connect/common/src/test/resources/query-tests/queries/function_e.json
index c99c04a6befdb..12f1ed4146bb1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_e.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_e.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "e"
+        "functionName": "e",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin
index 49f6c12fbcc72..e08ad39fe02f7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_element_at.json b/sql/connect/common/src/test/resources/query-tests/queries/function_element_at.json
index ef5551440934c..562f84a0abfb3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_element_at.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_element_at.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "bob"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_element_at.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_element_at.proto.bin
index 993818c6cb4bf..6cbf58ebaaf63 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_element_at.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_element_at.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_elt.json b/sql/connect/common/src/test/resources/query-tests/queries/function_elt.json
index fe7dd29f91a33..0573d25e70879 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_elt.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_elt.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_elt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_elt.proto.bin
index d719db6f89c37..429c39ca81b26 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_elt.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_elt.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_encode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_encode.json
index 92e95f2c946d0..51d0c871f3494 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_encode.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_encode.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "UTF-8"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_encode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_encode.proto.bin
index 9644825af470b..a996b0e73435b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_encode.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_encode.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_endswith.json b/sql/connect/common/src/test/resources/query-tests/queries/function_endswith.json
index 1f7943f5116ba..2577b62ccbca8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_endswith.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_endswith.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_endswith.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_endswith.proto.bin
index 2dfef1c6d86bc..89619bc8c38ba 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_endswith.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_endswith.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_equal_null.json b/sql/connect/common/src/test/resources/query-tests/queries/function_equal_null.json
index bc53edfa25f5e..50963dd1c9d0a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_equal_null.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_equal_null.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_equal_null.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_equal_null.proto.bin
index 2855f3ebbf3c3..ad21a1e96b9de 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_equal_null.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_equal_null.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_every.json b/sql/connect/common/src/test/resources/query-tests/queries/function_every.json
index ffefb8cf1103f..31d78cf56c07e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_every.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_every.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "flag"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_every.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_every.proto.bin
index 1b28782200223..6ccfd9c879923 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_every.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_every.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_exists.json b/sql/connect/common/src/test/resources/query-tests/queries/function_exists.json
index 3ae49d13c5fc6..caa2c6170f251 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_exists.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_exists.json
@@ -31,14 +31,16 @@
                   "literal": {
                     "integer": 10
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
               "nameParts": ["x_1"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin
index d808227fdc659..ac30b86a396da 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_exp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_exp.json
index d317efef75eee..a814a7f3a4937 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_exp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_exp.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_exp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_exp.proto.bin
index 7def20c94df00..0803af7c262c1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_exp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_exp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_explode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_explode.json
index 35ad40ccdd04f..8c0094b3a39d5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_explode.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_explode.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "e"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_explode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_explode.proto.bin
index 9c15f942bb11d..fb5d06b544709 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_explode.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_explode.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_explode_outer.json b/sql/connect/common/src/test/resources/query-tests/queries/function_explode_outer.json
index efd7f4b524d47..12c753a38be70 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_explode_outer.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_explode_outer.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "e"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_explode_outer.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_explode_outer.proto.bin
index 9f2cf9554dd15..8e9ecc3cfacb0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_explode_outer.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_explode_outer.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_expm1.json b/sql/connect/common/src/test/resources/query-tests/queries/function_expm1.json
index d425a6de709b7..70cd7b9284654 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_expm1.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_expm1.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_expm1.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_expm1.proto.bin
index 3c310cb04ce3d..daa03048ab5b5 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_expm1.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_expm1.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_extract.json b/sql/connect/common/src/test/resources/query-tests/queries/function_extract.json
index 6ccdb2f9d7531..851ae5de2741a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_extract.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_extract.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_extract.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_extract.proto.bin
index 91553c3b94bcf..1d47c480d7708 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_extract.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_extract.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_factorial.json b/sql/connect/common/src/test/resources/query-tests/queries/function_factorial.json
index 7f13a10480915..e762531e0be86 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_factorial.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_factorial.json
@@ -25,9 +25,11 @@
               "literal": {
                 "integer": 10
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_factorial.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_factorial.proto.bin
index ac776eb60d2b0..4863eee97ac1c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_factorial.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_factorial.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_filter.json b/sql/connect/common/src/test/resources/query-tests/queries/function_filter.json
index 1c71362f75247..6963b28386740 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_filter.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_filter.json
@@ -31,14 +31,16 @@
                   "literal": {
                     "integer": 10
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
               "nameParts": ["x_1"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin
index aa776b474a4d6..b669b9ff69c2b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json b/sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json
index f2b85c21af755..94f04a9111a88 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json
@@ -34,7 +34,8 @@
                       "literal": {
                         "integer": 10
                       }
-                    }]
+                    }],
+                    "isInternal": false
                   }
                 }, {
                   "unresolvedFunction": {
@@ -47,9 +48,11 @@
                       "literal": {
                         "integer": 2
                       }
-                    }]
+                    }],
+                    "isInternal": false
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
@@ -58,7 +61,8 @@
               "nameParts": ["y_2"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin
index 8cf5f2d65cf29..b927f1c623a44 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_find_in_set.json b/sql/connect/common/src/test/resources/query-tests/queries/function_find_in_set.json
index 538651b52c424..424357c82f13d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_find_in_set.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_find_in_set.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_find_in_set.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_find_in_set.proto.bin
index 26abfa0e394c3..662aec53059a7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_find_in_set.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_find_in_set.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.json
index af55fe44ae8ca..bab44ade0cfaa 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.json
@@ -22,7 +22,8 @@
           "literal": {
             "boolean": true
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.proto.bin
index 7121c820aa737..6c72610c94265 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.json
index 8276e35893feb..85ef747ef81ba 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.proto.bin
index b843d52111528..b6c8d358976a8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.json
index dc33bad3c506a..ec4c9da731d65 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.json
@@ -22,7 +22,8 @@
           "literal": {
             "boolean": true
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.proto.bin
index cb029dfd26be9..a995bb05e6f26 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.json
index 0e315cc6b1bce..244f2d1f9575e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.json
@@ -22,7 +22,8 @@
           "literal": {
             "boolean": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.proto.bin
index bf1d48903dfab..eb52e48309944 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_flatten.json b/sql/connect/common/src/test/resources/query-tests/queries/function_flatten.json
index 1f04630fd5f31..3118b0620c0e3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_flatten.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_flatten.json
@@ -32,11 +32,14 @@
                   "literal": {
                     "integer": 10
                   }
-                }]
+                }],
+                "isInternal": false
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_flatten.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_flatten.proto.bin
index 9a684850f9cfa..1d930ab05303c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_flatten.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_flatten.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_floor.json b/sql/connect/common/src/test/resources/query-tests/queries/function_floor.json
index 78924f5f33627..9bf149ea295d9 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_floor.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_floor.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_floor.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_floor.proto.bin
index b52696ca4d00a..1f529ad20219f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_floor.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_floor.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_floor_scale.json b/sql/connect/common/src/test/resources/query-tests/queries/function_floor_scale.json
index 394621e4dd314..af37c822a21df 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_floor_scale.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_floor_scale.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_floor_scale.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_floor_scale.proto.bin
index ee0665bab644c..f2510d8540374 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_floor_scale.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_floor_scale.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_forall.json b/sql/connect/common/src/test/resources/query-tests/queries/function_forall.json
index 4a4914d6a9b1d..a9084fe59bdb6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_forall.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_forall.json
@@ -31,14 +31,16 @@
                   "literal": {
                     "integer": 10
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
               "nameParts": ["x_1"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin
index 7fc2821694589..b72863b1db28b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_format_number.json b/sql/connect/common/src/test/resources/query-tests/queries/function_format_number.json
index daa648c0a599e..6f376efc9fa81 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_format_number.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_format_number.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 1
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_format_number.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_format_number.proto.bin
index 81e2c4d5fd54d..2f156c4336703 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_format_number.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_format_number.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_csv.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_csv.json
index 798e79e6618f5..60242d45866aa 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_from_csv.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_from_csv.json
@@ -33,9 +33,11 @@
               "literal": {
                 "string": "FAILFAST"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_csv.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_csv.proto.bin
index 8acd3b619b41e..2a3a14f8914a1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_from_csv.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_from_csv.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
index ddfa91abca05e..850ae645c0497 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
index ad95d0f2b343d..5a1990c957290 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.json
index ddfa91abca05e..850ae645c0497 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.proto.bin
index ad95d0f2b343d..5a1990c957290 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.json
index 81d6608adb18f..bbe54e42d2fe8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.json
@@ -18,7 +18,8 @@
           "literal": {
             "long": "1"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.proto.bin
index b1c34caaf62f0..5b92281e1f289 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.json
index 5d63fd829f302..ce7d64ebdc541 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "-08:00"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.proto.bin
index 34bf9c64f3a97..c562454e01d11 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
index cfcd40a74b3a7..f137f14baac3a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin
index 1cc3a26c254fb..dc331f2b26074 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.json
index cfcd40a74b3a7..f137f14baac3a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.proto.bin
index 1cc3a26c254fb..dc331f2b26074 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_get.json b/sql/connect/common/src/test/resources/query-tests/queries/function_get.json
index 7a2a89447c079..fd8f86b0f518c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_get.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_get.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_get.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_get.proto.bin
index be40df955a407..9561814a9e2d2 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_get.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_get.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_get_json_object.json b/sql/connect/common/src/test/resources/query-tests/queries/function_get_json_object.json
index 17adf9230a6eb..505bfcc07bf89 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_get_json_object.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_get_json_object.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "$.device_type"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_get_json_object.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_get_json_object.proto.bin
index 08ad8f4f91bad..df7774ef54842 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_get_json_object.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_get_json_object.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_getbit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_getbit.json
index ef33382022a53..8bb777bfc37b2 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_getbit.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_getbit.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 0
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin
index 15575e4f7cbb9..b7684864503db 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_greatest.json b/sql/connect/common/src/test/resources/query-tests/queries/function_greatest.json
index bf5d50edec84f..92414ef27da5b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_greatest.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_greatest.json
@@ -31,7 +31,8 @@
               }
             }
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_greatest.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_greatest.proto.bin
index 44d9d5f8cfb2d..a1ba220f29429 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_greatest.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_greatest.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_hash.json b/sql/connect/common/src/test/resources/query-tests/queries/function_hash.json
index 6ef504a006457..04363f49f72b8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_hash.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_hash.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "id"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_hash.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_hash.proto.bin
index 284700c4c5ea9..dea61a10eeb52 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_hash.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_hash.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_hex.json b/sql/connect/common/src/test/resources/query-tests/queries/function_hex.json
index af9d0dd298277..7cfc7a5647adf 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_hex.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_hex.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_hex.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_hex.proto.bin
index 9d8c3b5e23584..52d2da3ea1e8b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_hex.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_hex.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.json b/sql/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.json
index 548b4977ddc50..2758e1a0638f6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 10
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.proto.bin
index 81dbcd476ecbc..465557aa7f48b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_hour.json b/sql/connect/common/src/test/resources/query-tests/queries/function_hour.json
index 2621b9f81913c..e13d18f244903 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_hour.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_hour.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "t"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_hour.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_hour.proto.bin
index 6cdb50364c133..f57e2333ba855 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_hour.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_hour.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_hours.json b/sql/connect/common/src/test/resources/query-tests/queries/function_hours.json
index a72a8656362fd..d521c709a93d2 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_hours.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_hours.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": true
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin
index 6e8203b89e320..1c73b8d22fbb3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_hypot.json b/sql/connect/common/src/test/resources/query-tests/queries/function_hypot.json
index 2d0d6be0164bc..42fe4ac079293 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_hypot.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_hypot.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_hypot.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_hypot.proto.bin
index 3ad07a2a1ee45..8dce8d0b02a00 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_hypot.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_hypot.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ifnull.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ifnull.json
index 2a426fe6fff3a..577d45ddfe6e8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ifnull.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ifnull.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ifnull.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ifnull.proto.bin
index c1307c2be8caa..9840001010b01 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ifnull.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ifnull.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ilike.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ilike.json
index 46b1b87e03246..aa97c83dba131 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ilike.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ilike.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ilike.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ilike.proto.bin
index b1c50e3aaf4b2..8dd2e803b2234 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ilike.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ilike.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.json
index 6392912efe85d..f3fcfd7a86b4e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.json
@@ -26,7 +26,8 @@
           "literal": {
             "string": "/"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.proto.bin
index de0d89f2c8cee..6a5714f5731c4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_initcap.json b/sql/connect/common/src/test/resources/query-tests/queries/function_initcap.json
index 896bb3d0209da..733efb38bac41 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_initcap.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_initcap.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_initcap.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_initcap.proto.bin
index 72df35bd9b387..9129dbf467c28 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_initcap.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_initcap.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_inline.json b/sql/connect/common/src/test/resources/query-tests/queries/function_inline.json
index 4abdac736d0fe..f63e0184608ee 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_inline.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_inline.json
@@ -21,9 +21,11 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "f"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_inline.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_inline.proto.bin
index 261e28e3acaa8..105edc74658df 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_inline.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_inline.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_inline_outer.json b/sql/connect/common/src/test/resources/query-tests/queries/function_inline_outer.json
index d74ee83eeff3e..8ad2752040c41 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_inline_outer.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_inline_outer.json
@@ -21,9 +21,11 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "f"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_inline_outer.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_inline_outer.proto.bin
index d757e5afe3050..fc847de167c69 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_inline_outer.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_inline_outer.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.json
index 2b478579f377d..69f7aae01de2b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "input_file_block_length"
+        "functionName": "input_file_block_length",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.proto.bin
index 55684ba7d1b02..6dd26fb322275 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.json b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.json
index a85f58c3b9aaf..d05cfb4126397 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "input_file_block_start"
+        "functionName": "input_file_block_start",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.proto.bin
index 6fa8027cc82e2..eed5dac21f88a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_name.json b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_name.json
index 47f2e461eba46..7801d2afa72b1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_name.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_name.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "input_file_name"
+        "functionName": "input_file_name",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_name.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_name.proto.bin
index c3c6414d5d881..b1eae731498e6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_name.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_name.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.json b/sql/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.json
index 7ae72f8f88e5c..2743ad7263a7f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.json
@@ -21,9 +21,11 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "g"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.proto.bin
index 4d3d2624609e7..2bf328f8a9db5 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_isnan.json b/sql/connect/common/src/test/resources/query-tests/queries/function_isnan.json
index f594918ed930a..12d3d19d7797a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_isnan.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_isnan.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_isnan.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_isnan.proto.bin
index 1030abda5b8c2..8c2fad75be346 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_isnan.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_isnan.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_isnotnull.json b/sql/connect/common/src/test/resources/query-tests/queries/function_isnotnull.json
index 6ec209e7c24fc..eb011fe4664eb 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_isnotnull.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_isnotnull.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_isnotnull.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_isnotnull.proto.bin
index 1b37308d24753..5bd84491a2b94 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_isnotnull.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_isnotnull.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_isnull.json b/sql/connect/common/src/test/resources/query-tests/queries/function_isnull.json
index 7443fc97f42cf..11608a0de2195 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_isnull.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_isnull.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_isnull.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_isnull.proto.bin
index 3d1fbd4dedfe7..05217e9e9085b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_isnull.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_isnull.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_java_method.json b/sql/connect/common/src/test/resources/query-tests/queries/function_java_method.json
index 196dd4869577f..828ccaf2c570e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_java_method.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_java_method.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_java_method.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_java_method.proto.bin
index b5cd2ea0e929b..971395d4b3573 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_java_method.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_java_method.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_json_array_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_json_array_length.json
index 36223a451e3f8..1789c0bef0840 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_json_array_length.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_json_array_length.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_json_array_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_json_array_length.proto.bin
index 817c803d83033..79aefffa10332 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_json_array_length.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_json_array_length.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.json b/sql/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.json
index f8667a1012a08..9f9f60134485c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.proto.bin
index 4be9477ec9185..023ce9ea65266 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_json_tuple.json b/sql/connect/common/src/test/resources/query-tests/queries/function_json_tuple.json
index 32de63452c364..4f2b072db3e25 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_json_tuple.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_json_tuple.json
@@ -30,7 +30,8 @@
           "literal": {
             "string": "id"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_json_tuple.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_json_tuple.proto.bin
index e51be42b38d34..60b1685ece0f1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_json_tuple.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_json_tuple.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_kurtosis.json b/sql/connect/common/src/test/resources/query-tests/queries/function_kurtosis.json
index 7399d7a6da388..386c3c8205046 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_kurtosis.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_kurtosis.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_kurtosis.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_kurtosis.proto.bin
index 848a4842e2462..30cd079db28d1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_kurtosis.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_kurtosis.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lag.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lag.json
index dd1cba376f3c7..f0f38c35c0c5f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_lag.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_lag.json
@@ -35,7 +35,8 @@
               "literal": {
                 "boolean": true
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "partitionSpec": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin
index 7fd85861fb8c8..3abeec8a65bfb 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_last_day.json b/sql/connect/common/src/test/resources/query-tests/queries/function_last_day.json
index 2cb1635caf47e..cbd450c2859fc 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_last_day.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_last_day.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "t"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_last_day.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_last_day.proto.bin
index 1afb5c02ae347..925a919daffc0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_last_day.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_last_day.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.json
index e78a456082cbd..18e55564d6ac0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.json
@@ -22,7 +22,8 @@
           "literal": {
             "boolean": true
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.proto.bin
index c04f8385995ee..0baa09d55bc20 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.json
index cb509997e6533..d1388758fe8a6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.proto.bin
index cee9838b70438..80bf3dfcc9abb 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.json
index 6d1be02c78545..cb147d6998478 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.json
@@ -22,7 +22,8 @@
           "literal": {
             "boolean": true
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.proto.bin
index f6590582c6f5a..2477c1e58803d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.json
index f26e5887ed527..dd68e3d189c03 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.json
@@ -22,7 +22,8 @@
           "literal": {
             "boolean": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.proto.bin
index 69221737be671..a4a02664b5030 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lcase.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lcase.json
index a1610815b6c7d..a3a293bc7b1b8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_lcase.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_lcase.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lcase.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lcase.proto.bin
index d5627abb0a5d0..e18e7e6781e76 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_lcase.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_lcase.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lead.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lead.json
index ef76586d381dd..aab3c54f48954 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_lead.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_lead.json
@@ -32,7 +32,8 @@
               "literal": {
                 "boolean": true
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "partitionSpec": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lead.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lead.proto.bin
index 9bcdcdb3617a9..f72eea8071743 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_lead.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_lead.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_least.json b/sql/connect/common/src/test/resources/query-tests/queries/function_least.json
index 403531c9f6958..d0bcb0b07e404 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_least.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_least.json
@@ -31,7 +31,8 @@
               }
             }
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_least.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_least.proto.bin
index c9ead802a9616..bcb27b7e02114 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_least.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_least.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_left.json b/sql/connect/common/src/test/resources/query-tests/queries/function_left.json
index e629782ba6d5b..dbf9948ee3555 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_left.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_left.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_left.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_left.proto.bin
index 497cf68194e88..c5b4ff7f56763 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_left.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_left.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_len.json b/sql/connect/common/src/test/resources/query-tests/queries/function_len.json
index 884f875a961da..3b353abe2eac6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_len.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_len.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_len.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_len.proto.bin
index 939a6c9c3360b..f787d98c0698e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_len.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_len.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_length.json
index f2c3c69255897..cbb6cf10974ad 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_length.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_length.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_length.proto.bin
index a14f94085b3b6..f4a53e9fa48d4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_length.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_length.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein.json b/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein.json
index 10caaf184fee5..878dc8f564869 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "bob"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein.proto.bin
index 75b48541b7663..0742bd0585665 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.json b/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.json
index 5cc30772e8e88..aceb63829aa8a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.proto.bin
index 22e1a3328756e..2ba96d65869c6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_like.json b/sql/connect/common/src/test/resources/query-tests/queries/function_like.json
index 3ce3431e50f8f..23b9578abf5b3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_like.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_like.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_like.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_like.proto.bin
index d9a13f5c79bce..2f27591f3d68e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_like.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_like.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.json b/sql/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.json
index 0313398f0ad60..04e27ea1de9e1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.json
@@ -26,7 +26,8 @@
           "literal": {
             "string": "/"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.proto.bin
index cc5fefe193fb9..0668e85576e04 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ln.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ln.json
index ababbc52d088d..4fe4ccd2aa34d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ln.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ln.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin
index ecb87a1fc4102..3296765cb86cd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.json
index 68281d2e6d9d1..5683069790afa 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "localtimestamp"
+        "functionName": "localtimestamp",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.proto.bin
index b1a9e70c7c802..e280db3af4ccd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_locate.json b/sql/connect/common/src/test/resources/query-tests/queries/function_locate.json
index 7939fdd2c7559..285374acfe075 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_locate.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_locate.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_locate.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_locate.proto.bin
index cc7ced9957a52..e43b34bf0665f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_locate.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_locate.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.json b/sql/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.json
index 269f39701608a..87fa8050f4005 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 10
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.proto.bin
index 162ab0108c132..b11ee8f8ae770 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_log.json b/sql/connect/common/src/test/resources/query-tests/queries/function_log.json
index ababbc52d088d..4fe4ccd2aa34d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_log.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_log.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_log.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_log.proto.bin
index ecb87a1fc4102..3296765cb86cd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_log.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_log.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_log10.json b/sql/connect/common/src/test/resources/query-tests/queries/function_log10.json
index 13292d83c4727..bced949b9aaf1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_log10.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_log10.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_log10.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_log10.proto.bin
index 22d4655a6efbd..1a363b7043dc3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_log10.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_log10.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_log1p.json b/sql/connect/common/src/test/resources/query-tests/queries/function_log1p.json
index 4e9e6847c3c36..95942299457da 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_log1p.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_log1p.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_log1p.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_log1p.proto.bin
index 9a72c377b0cc4..6097a554cee23 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_log1p.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_log1p.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_log2.json b/sql/connect/common/src/test/resources/query-tests/queries/function_log2.json
index ec29e154a0e1d..4fe3d22e03415 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_log2.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_log2.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_log2.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_log2.proto.bin
index 34e3780650540..39aab70e5ac3e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_log2.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_log2.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_log_with_base.json b/sql/connect/common/src/test/resources/query-tests/queries/function_log_with_base.json
index 6bc2a4ec3335a..3d6e05077dd6c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_log_with_base.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_log_with_base.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_log_with_base.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_log_with_base.proto.bin
index 2e64e15ed5555..95f9352d73f93 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_log_with_base.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_log_with_base.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lower.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lower.json
index f7fe5beba2c02..e2b4a5816e686 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_lower.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_lower.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lower.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lower.proto.bin
index 7c736d93f7729..f9f4d930ebc23 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_lower.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_lower.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lpad.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lpad.json
index b9f3e6700bfa4..7cdd2b75dadc6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_lpad.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_lpad.json
@@ -26,7 +26,8 @@
           "literal": {
             "string": "-"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lpad.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lpad.proto.bin
index 470ab1cc44add..8576f2b0be3b7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_lpad.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_lpad.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.json
index aeb39ba09ad20..60eaa163e632d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.json
@@ -26,7 +26,8 @@
           "literal": {
             "binary": "DAoPDg=="
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.proto.bin
index b4acebb394c7a..a8651574a6bbb 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim.json
index dd3b459520221..1c9ac2e57ffbb 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim.proto.bin
index 162b6a7337bb9..ed81a583ccaac 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.json
index 01dc81bdae7bc..f67f689d0d950 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.proto.bin
index 0cd62226c9716..6a9d3bf7b00ef 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_date.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_date.json
index a363298dd123a..0f78921e965e5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_date.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_date.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 14
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_date.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_date.proto.bin
index 0526825fccade..c47ea55dbe6cc 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_date.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_date.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.json
index 24cd85d7ae442..66e5da74d5e45 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "make_dt_interval"
+        "functionName": "make_dt_interval",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.proto.bin
index 09c5a25a10cc2..00bedb03e9dc4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.json
index fb65f1fcd9def..9fe3d9dedbc83 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.proto.bin
index 11c67d98f9f49..70cd3d296df6a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.json
index e08a9c3b083ee..f902694ef7774 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.proto.bin
index bd16de042f61d..d8107489e1e78 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.json
index a4b8c14538ae3..5a760c1b469fb 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.proto.bin
index 7595205c6bb0e..89a249a8535fc 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.json
index 20eaa7521d3d9..745012b755db3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.proto.bin
index 6db1bc8b51bea..ba6a2b4e10f82 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval.json
index cdbe616565287..eceeeddd4b15b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "make_interval"
+        "functionName": "make_interval",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval.proto.bin
index 8d4327eeff426..c569945e7b4c1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.json
index e5afa5ec3349a..e49924238ced8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.proto.bin
index 7be990a47aba6..84ed402ba145f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.json
index 9de86b70c169e..a7f87a2015bc1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.proto.bin
index 219cc5a023d45..a886b4d11afd3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.json
index c387757c6f739..7ca71d529325b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.proto.bin
index 7fb48227f69c7..af3f2d4b4519f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.json
index 54274116ee951..d0741256492d1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.proto.bin
index 58d62b76ac5f9..0d3355c516bdd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.json
index a6e343532ec2c..aecddb3dcb53f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.json
@@ -34,7 +34,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.proto.bin
index 3133c2d497ea6..52b889c8ad81b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.json
index d921fda962896..b5ed4c2e8d4e8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.json
@@ -38,7 +38,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.proto.bin
index cf2ad98b5c16c..7d06032ee8a45 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.json
index 09f95bc933b10..372b13ce44621 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.json
@@ -42,7 +42,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin
index dca680fd90b61..033d88d328ed1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.json
index a58259eefe742..7572d311648c0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.json
@@ -42,7 +42,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.proto.bin
index 3eac6534c6510..ecc490af0f3c4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.json
index 5c87a856fc6c0..9e10c499ee9ee 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.json
@@ -38,7 +38,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.proto.bin
index b35f9fd474607..2baa0d4f269d4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.json
index 4cc4f1a11acdc..6241859ae9269 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.json
@@ -38,7 +38,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.proto.bin
index 5a6554443ceca..17dc37d99f63f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.json
index a7a5ff132c083..a8d427ae58c0c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.json
@@ -42,7 +42,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.proto.bin
index 77c0d5961c804..660c4399e35f4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.json
index 286ed33f82e10..8426f3dd45143 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.json
@@ -38,7 +38,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.proto.bin
index f91efead687a8..348befe79dbf8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.json
index dc1848be0cc42..f37ca015eed91 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "make_ym_interval"
+        "functionName": "make_ym_interval",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.proto.bin
index eaffc7c237094..0c63f66caf9a0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.json
index d789064ad9b8e..81b70cf342205 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.proto.bin
index 1938b7c53bdd1..7486df5d7530d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.json
index aeffbbb4a1a14..1eb67c528682e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.proto.bin
index f03f6ecce83de..5a85f5981b948 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map.json
index ca9d3bf2bcc71..830445d3facf8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_map.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map.json
@@ -30,7 +30,8 @@
           "literal": {
             "string": "dummy"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map.proto.bin
index 229a48b75131d..32b233fa939db 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_map.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_map.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_concat.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_concat.json
index f56f6cee20ab0..6e076de494412 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_map_concat.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map_concat.json
@@ -55,11 +55,14 @@
                     },
                     "name": ["b"]
                   }
-                }]
+                }],
+                "isInternal": false
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_concat.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_concat.proto.bin
index 0a76d3a1193ea..6c04979af6da7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_map_concat.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_map_concat.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.json
index 56833f9651023..c25e508806149 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "xyz"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.proto.bin
index e517479020e16..09bd7fa7c4796 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_entries.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_entries.json
index 0226506545010..310b3980931a1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_map_entries.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map_entries.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "f"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_entries.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_entries.proto.bin
index f1451d4ad7ba4..1050436839a0a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_map_entries.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_map_entries.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.json
index b50e77c0bf8e9..48c7871244f19 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.json
@@ -31,7 +31,8 @@
                   "literal": {
                     "string": "baz"
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
@@ -40,7 +41,8 @@
               "nameParts": ["y_2"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin
index 7f3d0c31fd6fe..a6143e55099ff 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.json
index 1eb1f7d2ef066..0ce0f1f547174 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.json
@@ -25,7 +25,8 @@
               "literal": {
                 "integer": 2
               }
-            }]
+            }],
+            "isInternal": false
           }
         }, {
           "unresolvedFunction": {
@@ -38,9 +39,11 @@
               "literal": {
                 "string": "two"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.proto.bin
index f5333b1c882bc..e85aba1a35ce7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json
index 1e48a1c2082df..e505d884d85f6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json
@@ -34,7 +34,8 @@
                       "unresolvedNamedLambdaVariable": {
                         "nameParts": ["x_1"]
                       }
-                    }]
+                    }],
+                    "isInternal": false
                   }
                 },
                 "arguments": [{
@@ -43,9 +44,11 @@
                   "nameParts": ["y_2"]
                 }]
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin
index 0dd0d31350991..88d178e75c4fe 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_keys.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_keys.json
index 5af013295cd9f..1e25cd90ac88c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_map_keys.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map_keys.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "f"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_keys.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_keys.proto.bin
index ee19968bacc2c..b45fde5acd6bd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_map_keys.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_map_keys.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_values.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_values.json
index 3c5eb651801dc..96ca6b6807963 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_map_values.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map_values.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "f"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_values.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_values.proto.bin
index 4cd7c488ada48..6708875252f51 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_map_values.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_map_values.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json
index d13bd8dce75f3..9e913743009d1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json
@@ -53,7 +53,8 @@
                       }
                     }
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
@@ -64,7 +65,8 @@
               "nameParts": ["z_3"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin
index 2770b083e32ef..65dba7ed7bbd1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mask.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mask.json
index c0473466a3e1c..f4c6dad9b6385 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_mask.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_mask.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mask.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mask.proto.bin
index 5e94c2675937d..6397cc4fe4adc 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_mask.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_mask.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.json
index 571d514e72ded..6dc93c69e10e8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "X"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.proto.bin
index 0f6c4b579c4f5..699d662fcca48 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.json
index ae527d70cf162..671a19a8900af 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.json
@@ -26,7 +26,8 @@
           "literal": {
             "string": "x"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.proto.bin
index 5a6b4d7caa60e..c754da066d573 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.json
index e7fee11d3169e..f12f44b3ecb38 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.json
@@ -30,7 +30,8 @@
           "literal": {
             "string": "n"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.proto.bin
index f0a2e7cb643af..473f724d4126c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.json
index d6076ae558bc7..1ddb661b636bb 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.json
@@ -34,7 +34,8 @@
           "literal": {
             "string": "*"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.proto.bin
index cb5f090361b20..bf641173435f3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_max.json b/sql/connect/common/src/test/resources/query-tests/queries/function_max.json
index b23dd9d14c643..1514b34f8b462 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_max.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_max.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "id"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_max.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_max.proto.bin
index 788c9539b5767..d36b5d79ecbd9 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_max.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_max.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_max_by.json b/sql/connect/common/src/test/resources/query-tests/queries/function_max_by.json
index da311e340cc50..1048a30325e5a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_max_by.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_max_by.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_max_by.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_max_by.proto.bin
index 284c2453af8bd..1f1832962cb34 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_max_by.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_max_by.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_md5.json b/sql/connect/common/src/test/resources/query-tests/queries/function_md5.json
index e8718594b0be3..d954d60a9c68a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_md5.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_md5.json
@@ -26,7 +26,8 @@
               }
             }
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_md5.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_md5.proto.bin
index d3ec7c26a2ede..87ee03e940081 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_md5.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_md5.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_median.json b/sql/connect/common/src/test/resources/query-tests/queries/function_median.json
index 7331454b9ecb0..a358a25d85705 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_median.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_median.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_median.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_median.proto.bin
index 59533e5be5992..5a80a2f7cd44b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_median.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_median.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_min.json b/sql/connect/common/src/test/resources/query-tests/queries/function_min.json
index 1b7266b6774e4..3fba2b795a224 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_min.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_min.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_min.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_min.proto.bin
index b82f4c5309222..2ae3da3391fa5 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_min.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_min.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_min_by.json b/sql/connect/common/src/test/resources/query-tests/queries/function_min_by.json
index d2478f5e81abe..6c9b99ad7d43d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_min_by.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_min_by.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_min_by.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_min_by.proto.bin
index ddc642b95000c..da76415ec74a0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_min_by.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_min_by.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_minute.json b/sql/connect/common/src/test/resources/query-tests/queries/function_minute.json
index 7c749cdff82f5..c94a8703d38e9 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_minute.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_minute.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "t"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_minute.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_minute.proto.bin
index e81b7dad85331..b0b743773e902 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_minute.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_minute.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mode.json
index 8e8183e9e0883..a4f3b601ad47a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_mode.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_mode.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mode.proto.bin
index dca0953a387b1..e3dfc96922e62 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_mode.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_mode.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.json b/sql/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.json
index 0a14f1008976e..01ca4536c97f9 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "monotonically_increasing_id"
+        "functionName": "monotonically_increasing_id",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.proto.bin
index 724ce3ac6904c..2e86a0566afaf 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_month.json b/sql/connect/common/src/test/resources/query-tests/queries/function_month.json
index 7ea1e5d0375e9..6343ed28faa01 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_month.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_month.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_month.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_month.proto.bin
index b97100a6fe2ec..1c09b5d1f26c2 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_month.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_month.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_monthname.json b/sql/connect/common/src/test/resources/query-tests/queries/function_monthname.json
index c5ad3485252f1..7da09908cadaa 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_monthname.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_monthname.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_monthname.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_monthname.proto.bin
index 4518bb8d74253..958b9b34dff86 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_monthname.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_monthname.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_months.json b/sql/connect/common/src/test/resources/query-tests/queries/function_months.json
index 278bab76a6544..235893cf6cdf3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_months.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_months.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": true
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin
index fdcd96750dc9c..1a689d77c7019 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_months_between.json b/sql/connect/common/src/test/resources/query-tests/queries/function_months_between.json
index 0fa772d26cd41..f02f9c3ea416a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_months_between.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_months_between.json
@@ -16,13 +16,15 @@
         "functionName": "months_between",
         "arguments": [{
           "unresolvedFunction": {
-            "functionName": "current_date"
+            "functionName": "current_date",
+            "isInternal": false
           }
         }, {
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_months_between.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_months_between.proto.bin
index 22ddc1813e0fb..34b5c49c83375 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_months_between.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_months_between.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.json b/sql/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.json
index d11bfbd7f2426..f03709aece83c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.json
@@ -16,7 +16,8 @@
         "functionName": "months_between",
         "arguments": [{
           "unresolvedFunction": {
-            "functionName": "current_date"
+            "functionName": "current_date",
+            "isInternal": false
           }
         }, {
           "unresolvedAttribute": {
@@ -26,7 +27,8 @@
           "literal": {
             "boolean": true
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.proto.bin
index bf9c545911ffd..a072570756411 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_named_struct.json b/sql/connect/common/src/test/resources/query-tests/queries/function_named_struct.json
index c4d92131ed06c..a23893ddb0fcf 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_named_struct.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_named_struct.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "id"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_named_struct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_named_struct.proto.bin
index b595cfc282036..87c310823426b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_named_struct.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_named_struct.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_nanvl.json b/sql/connect/common/src/test/resources/query-tests/queries/function_nanvl.json
index 69daab270c2b9..151c3d830716f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_nanvl.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_nanvl.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_nanvl.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_nanvl.proto.bin
index f314a73dcae65..edddf04956a74 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_nanvl.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_nanvl.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_negate.json b/sql/connect/common/src/test/resources/query-tests/queries/function_negate.json
index e269fabe44be1..96f9359193fbd 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_negate.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_negate.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_negate.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_negate.proto.bin
index 9c56c111ceee6..f4d42e82e0c0d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_negate.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_negate.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_negative.json b/sql/connect/common/src/test/resources/query-tests/queries/function_negative.json
index e269fabe44be1..96f9359193fbd 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_negative.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_negative.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin
index 9c56c111ceee6..f4d42e82e0c0d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_next_day.json b/sql/connect/common/src/test/resources/query-tests/queries/function_next_day.json
index 486523dcad3ec..ad6f3bb22ff82 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_next_day.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_next_day.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "Mon"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_next_day.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_next_day.proto.bin
index a97bd75f129db..8cece90ab671b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_next_day.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_next_day.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_now.json b/sql/connect/common/src/test/resources/query-tests/queries/function_now.json
index 98556585c3e31..1ceb0bd1366ff 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_now.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_now.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "now"
+        "functionName": "now",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_now.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_now.proto.bin
index a8fcd67fa1982..f9ab22b653c09 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_now.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_now.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_nth_value.json b/sql/connect/common/src/test/resources/query-tests/queries/function_nth_value.json
index 4c764a5d5603c..97f434a6d71ae 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_nth_value.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_nth_value.json
@@ -28,7 +28,8 @@
               "literal": {
                 "boolean": true
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "partitionSpec": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_nth_value.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_nth_value.proto.bin
index f87e1695f22e3..cd6eeac2e054e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_nth_value.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_nth_value.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ntile.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ntile.json
index 2346a788b64bd..595cfe02b8631 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ntile.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ntile.json
@@ -20,7 +20,8 @@
               "literal": {
                 "integer": 4
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "partitionSpec": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ntile.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ntile.proto.bin
index d9ccd2e8a6007..fa7d6cac0bf17 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ntile.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ntile.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_nullif.json b/sql/connect/common/src/test/resources/query-tests/queries/function_nullif.json
index 3892eb19fc52c..ac9f5620f9243 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_nullif.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_nullif.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_nullif.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_nullif.proto.bin
index 9bbf5f4ccb8ac..0217381b686ff 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_nullif.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_nullif.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_nvl.json b/sql/connect/common/src/test/resources/query-tests/queries/function_nvl.json
index 483448c26d114..1bf1f22bcad2b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_nvl.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_nvl.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_nvl.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_nvl.proto.bin
index 21a9b37eb65ec..663f7c714883c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_nvl.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_nvl.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_nvl2.json b/sql/connect/common/src/test/resources/query-tests/queries/function_nvl2.json
index 8db7f9ba6292c..408d6eba0a05b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_nvl2.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_nvl2.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_nvl2.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_nvl2.proto.bin
index 8b7f90bf27552..627a671a085e2 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_nvl2.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_nvl2.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_octet_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_octet_length.json
index 7be9ac82662a4..bfd624216f70d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_octet_length.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_octet_length.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_octet_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_octet_length.proto.bin
index 484ebbb6487b0..3f94747ea2595 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_octet_length.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_octet_length.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_overlay.json b/sql/connect/common/src/test/resources/query-tests/queries/function_overlay.json
index b580570f923a6..28cb276fe4cb1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_overlay.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_overlay.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 4
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_overlay.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_overlay.proto.bin
index 2110ae9c14610..43d75d4a07231 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_overlay.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_overlay.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.json b/sql/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.json
index 99d5426c46fba..79f0be1011dab 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.json
@@ -30,7 +30,8 @@
           "literal": {
             "string": "3"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.proto.bin
index 9a09d28d84fde..9521c75b4a83e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_parse_json.json b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_json.json
index dfcf56c19223e..4a84cbe99b65d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_parse_json.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_json.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_parse_json.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_json.proto.bin
index a7187fa2c1af0..cea247c862173 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_parse_json.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_json.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url.json b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url.json
index e03b86c21eb94..e193266998299 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url.proto.bin
index 56917289c1ec9..f15b2316d3c2e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.json b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.json
index bd627911ef22d..862e3b5aa8d3a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.proto.bin
index 231622cbd8a6b..543b9dd6f43dc 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_percent_rank.json b/sql/connect/common/src/test/resources/query-tests/queries/function_percent_rank.json
index d8778ec8cd81d..4770d3d81d6d5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_percent_rank.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_percent_rank.json
@@ -15,7 +15,8 @@
       "window": {
         "windowFunction": {
           "unresolvedFunction": {
-            "functionName": "percent_rank"
+            "functionName": "percent_rank",
+            "isInternal": false
           }
         },
         "partitionSpec": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_percent_rank.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_percent_rank.proto.bin
index d668f7e1504cb..2dcfbf3777ee9 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_percent_rank.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_percent_rank.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.json b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.json
index 6289464de2a37..16f5f1f2b4b7b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 20
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.proto.bin
index f44ec86888f6c..159c21c647729 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.json b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.json
index f57804426643d..fb548fd0233e1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.proto.bin
index 91d6279f9bd8c..13be09fc2901a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.json b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.json
index 44e2c98a4dc60..3af4be6aad5cd 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.json
@@ -22,7 +22,8 @@
           "literal": {
             "double": 0.3
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.proto.bin
index 45b807e5ffbd2..93156e49b4556 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_pi.json b/sql/connect/common/src/test/resources/query-tests/queries/function_pi.json
index 46474dfd8e369..d73ca1d6ca691 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_pi.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_pi.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "pi"
+        "functionName": "pi",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin
index 14f018904bfb7..33fee15270257 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_pmod.json b/sql/connect/common/src/test/resources/query-tests/queries/function_pmod.json
index 1dc2cb54cbb67..fa6edbcda84f8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_pmod.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_pmod.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 10
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_pmod.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_pmod.proto.bin
index a2bb94dbb5173..068b878b77ca5 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_pmod.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_pmod.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode.json
index f8a9db37e62be..261ee78e20b43 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "e"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode.proto.bin
index fc50f5f4c85b7..9b22124951e80 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.json b/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.json
index 0e8cd4c1509e1..71d11fd72cd8e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "e"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.proto.bin
index 19d700665e7f5..d771cb8c33739 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_position.json b/sql/connect/common/src/test/resources/query-tests/queries/function_position.json
index 7b005e2bb8213..e71a363461f8d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_position.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_position.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_position.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_position.proto.bin
index 34b7e301fe943..db2530e0625ba 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_position.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_position.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_position_with_start.json b/sql/connect/common/src/test/resources/query-tests/queries/function_position_with_start.json
index 2cd04992d1da8..0a1f3fc42a29a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_position_with_start.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_position_with_start.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_position_with_start.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_position_with_start.proto.bin
index b34eaf80f8866..4f092eae4057a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_position_with_start.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_position_with_start.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_positive.json b/sql/connect/common/src/test/resources/query-tests/queries/function_positive.json
index a8b3a2d6244bb..26f8ae17bbd19 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_positive.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_positive.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin
index 5507abce8caac..32e7859676cfe 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_pow.json b/sql/connect/common/src/test/resources/query-tests/queries/function_pow.json
index 187636fb360c6..b9b24218fd99e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_pow.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_pow.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_pow.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_pow.proto.bin
index 6e1d3b06fe87a..52a249ada18f6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_pow.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_pow.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_power.json b/sql/connect/common/src/test/resources/query-tests/queries/function_power.json
index 187636fb360c6..b9b24218fd99e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_power.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_power.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin
index 6e1d3b06fe87a..52a249ada18f6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_printf.json b/sql/connect/common/src/test/resources/query-tests/queries/function_printf.json
index 73ca595e8650b..c3ead5f1388b5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_printf.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_printf.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_printf.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_printf.proto.bin
index 3fb3862f44d91..b78d5046c98dc 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_printf.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_printf.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_product.json b/sql/connect/common/src/test/resources/query-tests/queries/function_product.json
index 1dfb7f81912d3..802f3e77e8cd1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_product.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_product.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": true
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_product.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_product.proto.bin
index 8c3fbd31eb6b3..a7ff0061481b4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_product.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_product.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_quarter.json b/sql/connect/common/src/test/resources/query-tests/queries/function_quarter.json
index b95867e0be963..eb8e75f914318 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_quarter.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_quarter.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_quarter.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_quarter.proto.bin
index fdc2d96fb08ca..d9ad4e20a3aed 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_quarter.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_quarter.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_radians.json b/sql/connect/common/src/test/resources/query-tests/queries/function_radians.json
index 837960dedc653..83f211272c123 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_radians.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_radians.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_radians.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_radians.proto.bin
index 33a2521b22ac9..e371fb5947a5b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_radians.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_radians.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_raise_error.json b/sql/connect/common/src/test/resources/query-tests/queries/function_raise_error.json
index 5318466706bd8..6fd5cfe1194d3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_raise_error.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_raise_error.json
@@ -18,7 +18,8 @@
           "literal": {
             "string": "kaboom"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_raise_error.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_raise_error.proto.bin
index 7fbd33b9869ca..678fc8cda7afb 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_raise_error.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_raise_error.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.json
index 453ea54bd0ef3..67c1250a72ff8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.json
@@ -18,7 +18,8 @@
           "literal": {
             "long": "133"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.proto.bin
index 566a49d641293..cb15624497821 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.json b/sql/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.json
index ef84f05c3e193..f043d22159b20 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.json
@@ -18,7 +18,8 @@
           "literal": {
             "long": "133"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.proto.bin
index b0064842bf308..6f601e62c262b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.json b/sql/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.json
index 11238a43ec1a3..fc0c945b825ae 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.json
@@ -18,7 +18,8 @@
           "literal": {
             "integer": 1
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.proto.bin
index aa4208afedb88..918de15afa6b5 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rank.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rank.json
index 93c8dc38d668a..905af83a134f2 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_rank.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_rank.json
@@ -15,7 +15,8 @@
       "window": {
         "windowFunction": {
           "unresolvedFunction": {
-            "functionName": "rank"
+            "functionName": "rank",
+            "isInternal": false
           }
         },
         "partitionSpec": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rank.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rank.proto.bin
index 3aef331fb1739..8d72dd7fddafa 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_rank.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_rank.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_reduce.json b/sql/connect/common/src/test/resources/query-tests/queries/function_reduce.json
index 4928145bda572..cad612fbc66d2 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_reduce.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_reduce.json
@@ -35,7 +35,8 @@
                   "unresolvedNamedLambdaVariable": {
                     "nameParts": ["y_2"]
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
@@ -55,7 +56,8 @@
               "nameParts": ["x_3"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_reduce.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_reduce.proto.bin
index 2532c111e3874..89868c4ea5ef6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_reduce.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_reduce.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_reflect.json b/sql/connect/common/src/test/resources/query-tests/queries/function_reflect.json
index 2b0fe7911150c..7ae607ccbeb92 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_reflect.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_reflect.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_reflect.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_reflect.proto.bin
index 31c6c9bf13150..be0973b5020b9 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_reflect.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_reflect.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp.json
index 005d7264969f2..890dc31aa5a19 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "[a-z]+b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp.proto.bin
index 0379829055998..c138f434647ff 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_count.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_count.json
index 540f1821f50e4..23b90b66e6115 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_count.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_count.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "\\d+"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_count.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_count.proto.bin
index 3afcfd8c21e7c..ec3970a0434ef 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_count.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_count.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.json
index 5d9c7a5b4a5ab..e3b3650c16ba5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 1
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.proto.bin
index 32ba8b6dcb5e9..9fa17177b5be4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.json
index ebe2f581e3de2..04186ace547a0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 1
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.proto.bin
index 2cf31e5f75f4f..12d2feefb602c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.json
index 84a2e378ed2e3..34459b011dcf3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "(\\d+)([a-z]+)"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.proto.bin
index 529cae91ce595..3fbcb69e8ae54 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.json
index cb44dda5ba2c2..69d171eca7f55 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 1
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.proto.bin
index 55cc77eb3cd1f..32649c83ecc75 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.json
index eeab13abaa6da..e5ee8f177efdd 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "\\d+(a|b|m)"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.proto.bin
index 3aee655d92c65..791af5111d72d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_like.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_like.json
index 289fb3d9b4eab..4856ead38ac16 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_like.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_like.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "[a-z]+b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_like.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_like.proto.bin
index e7bb85bfa47d8..65e383f0e95f0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_like.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_like.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.json
index 83dd7a8569fd4..b6a237881c400 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.json
@@ -26,7 +26,8 @@
           "literal": {
             "string": "XXX"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.proto.bin
index b7d3fde25cf85..6d7dd2cb762f6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.json
index 067652959a94f..0e0c6ea3c4ff1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "\\d{2}(a|b|m)"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.proto.bin
index 43b987c612cd9..5c629e755a99b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.json
index 4fdc9b035d764..0c220a9401193 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.proto.bin
index 5771d141728ad..e71110d6a3511 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.json
index af225fdf5a895..a3ce82193c43a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.proto.bin
index 0a6dcf0106ac7..8c9084a77a0fb 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_count.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_count.json
index 510fc78140a6e..cc51b8bd0a10e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_count.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_count.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_count.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_count.proto.bin
index b1eff9f4d0329..325137df60e1b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_count.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_count.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.json
index a8596615a2d7f..961fd09d0a7d3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.proto.bin
index b9a1c0eff8943..49ee2215109cd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_r2.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_r2.json
index 9f88c6ad41268..7d6d482dd2430 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_r2.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_r2.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_r2.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_r2.proto.bin
index 0011348d3880a..aca6143f423a6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_r2.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_r2.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_slope.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_slope.json
index 9503b2c6feff3..7d93ecaf46afb 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_slope.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_slope.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_slope.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_slope.proto.bin
index 69c918a7861f2..ccfe35dbe4485 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_slope.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_slope.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.json
index fb243c9989ecf..705e3c357a7f7 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.proto.bin
index df31a2e6851f9..9ebce4d26e382 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.json
index 459deaa391e8d..4c35e57128935 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.proto.bin
index db51c0bc32a79..8d7b682aef3a2 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_syy.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_syy.json
index 877fbc3aa7c51..624dfa2bf855d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_syy.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_syy.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_syy.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_syy.proto.bin
index 6452b277a6e27..0bf9aa5bcf263 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_regr_syy.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_syy.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_replace.json b/sql/connect/common/src/test/resources/query-tests/queries/function_replace.json
index 2f6df6833f368..730207dc6e7d3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_replace.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_replace.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_replace.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_replace.proto.bin
index 0564f7ed57583..a1a5792013595 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_replace.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_replace.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.json b/sql/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.json
index 2e91450552c19..4b4039e16e5b1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.proto.bin
index 136a6b31821af..8aea298529272 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_reverse.json b/sql/connect/common/src/test/resources/query-tests/queries/function_reverse.json
index 93869adfbedca..3f028731c409e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_reverse.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_reverse.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "e"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_reverse.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_reverse.proto.bin
index dd7f2d5de513d..f03f20dbba7f0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_reverse.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_reverse.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_right.json b/sql/connect/common/src/test/resources/query-tests/queries/function_right.json
index 843f5be44a650..966e92157ed94 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_right.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_right.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_right.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_right.proto.bin
index b8d0156c98132..d48b49b52e79e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_right.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_right.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rint.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rint.json
index ea5bcebf81d72..af368a5694875 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_rint.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_rint.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rint.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rint.proto.bin
index bd47adc8476fa..3f8c7c35ceec5 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_rint.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_rint.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rlike.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rlike.json
index fe8480a0800d1..ec6188eb31ac6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_rlike.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_rlike.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "[a-z]+b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rlike.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rlike.proto.bin
index 79bbbe92c7fdb..028162c3ddcab 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_rlike.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_rlike.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_round.json b/sql/connect/common/src/test/resources/query-tests/queries/function_round.json
index 585a0befb224d..d42711c424c46 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_round.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_round.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_round.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_round.proto.bin
index 8625ccb1a58f1..40e173d9df4a0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_round.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_round.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_row_number.json b/sql/connect/common/src/test/resources/query-tests/queries/function_row_number.json
index 3d5ac8afe3db3..9972a7e942c96 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_row_number.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_row_number.json
@@ -15,7 +15,8 @@
       "window": {
         "windowFunction": {
           "unresolvedFunction": {
-            "functionName": "row_number"
+            "functionName": "row_number",
+            "isInternal": false
           }
         },
         "partitionSpec": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_row_number.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_row_number.proto.bin
index 90b4fcb27d3f1..4368883ca2e36 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_row_number.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_row_number.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rpad.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rpad.json
index d9b78a0cfd7a9..8c9aaf8242e81 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_rpad.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_rpad.json
@@ -26,7 +26,8 @@
           "literal": {
             "string": "-"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rpad.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rpad.proto.bin
index d4c355afee0b7..8f945f6329135 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_rpad.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_rpad.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.json
index 0daaf1636f13d..3e04a1bb094b0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.json
@@ -26,7 +26,8 @@
           "literal": {
             "binary": "CwoLDg=="
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.proto.bin
index c6f9f22146c61..8396bd5bc016e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim.json
index 5fe66e8e33596..39980dbd802d8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim.proto.bin
index 4320bf6ac397c..e11f621a033f6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.json
index 0ac2401f9eacf..a41ad58bb73ba 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.proto.bin
index 1332f5b330000..6755240b51672 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.json b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.json
index 6df6438a1a9ca..0e6f8f1425d6e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.json
@@ -29,9 +29,11 @@
               "literal": {
                 "string": "|"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.proto.bin
index 99475ddf30d11..6ee33f3a9e986 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.json b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.json
index 06110d326e1ef..5760bbfc038dc 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.json
@@ -18,7 +18,8 @@
           "literal": {
             "string": "[{\"col\":01}]"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.proto.bin
index c4ca00e629262..f1934d780d8fd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.json b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.json
index ab05ffa940c50..bca6b670d8b5d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.json
@@ -29,9 +29,11 @@
               "literal": {
                 "string": "true"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.proto.bin
index 482485501dd37..7b125c550aef4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.json b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.json
index c4ea467bc1a24..c0ab18fd4b4b4 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.json
@@ -21,9 +21,11 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "g"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.proto.bin
index 0971460bf4112..ecea29dcd41ac 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.json
index 19bf62f70b20f..c31f58aa3e320 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.json
@@ -21,9 +21,11 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "g"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.proto.bin
index 68c872ef0d4d2..a6a3f5bccd26b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sec.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sec.json
index 1cab2239755ca..b54347f1488ab 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sec.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sec.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sec.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sec.proto.bin
index 8760f57a6d4f0..f1b4e4f830a0c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sec.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sec.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_second.json b/sql/connect/common/src/test/resources/query-tests/queries/function_second.json
index c77a572b88aa0..5040147fa8b2b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_second.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_second.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "t"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_second.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_second.proto.bin
index 193c46e917ba2..196f57b276cad 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_second.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_second.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences.json
index 412ac0272dd57..a2ace14172722 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences.proto.bin
index 4b62f22574d32..0ce754a8375e6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.json
index 869e074ccd604..ffb3065d74494 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "en"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.proto.bin
index 7514b380a1c82..bf003eecdcd40 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language_and_country.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language_and_country.json
index 991b42faddb76..5a6cab4cfd609 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language_and_country.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language_and_country.json
@@ -26,7 +26,8 @@
           "literal": {
             "string": "US"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language_and_country.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language_and_country.proto.bin
index 01c0136c6df16..ce9b087ef03bb 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language_and_country.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language_and_country.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sequence.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sequence.json
index b8bd1b68c9a8f..e9bc5b437502d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sequence.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sequence.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 10
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sequence.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sequence.proto.bin
index 36f1980f4ec2b..190d2a8225059 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sequence.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sequence.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_session_user.json b/sql/connect/common/src/test/resources/query-tests/queries/function_session_user.json
index 07afa4a77c1b9..03ed1976a708b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_session_user.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_session_user.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "session_user"
+        "functionName": "session_user",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_session_user.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_session_user.proto.bin
index 948e3eeed60ac..62904540ce677 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_session_user.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_session_user.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.json b/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.json
index 92995656bd265..6178d6a73ab77 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "10 minutes"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.proto.bin
index 364ecdf2aaa28..b669a186b2e2b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sha.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sha.json
index 57c5cb5bbd270..2385eb652cc15 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sha.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sha.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sha.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sha.proto.bin
index e99760e49222d..66d9291a14101 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sha.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sha.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sha1.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sha1.json
index ce5014ac2f7e6..b19270ee3fdcb 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sha1.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sha1.json
@@ -26,7 +26,8 @@
               }
             }
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sha1.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sha1.proto.bin
index 3fdfdb2a072de..004ef664ee8d8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sha1.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sha1.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sha2.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sha2.json
index 5278d604e97b9..ed90b3b939ee6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sha2.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sha2.json
@@ -30,7 +30,8 @@
           "literal": {
             "integer": 512
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sha2.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sha2.proto.bin
index 20a0ee1082ae2..bf16ad9677137 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sha2.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sha2.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_shiftleft.json b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftleft.json
index 12decd300ab03..c02f85d5d56b1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_shiftleft.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftleft.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_shiftleft.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftleft.proto.bin
index 94bfbc99fce2d..5ebdbbc9996ee 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_shiftleft.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftleft.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_shiftright.json b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftright.json
index c2295c4abaaa2..eabafb977393a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_shiftright.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftright.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_shiftright.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftright.proto.bin
index 910d12f50d6a9..6a089c2ffa344 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_shiftright.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftright.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.json b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.json
index 875e26a5a5652..4b32899df264a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 2
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.proto.bin
index aba9c425dca96..d732f7244aa0e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sign.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sign.json
index 34451969078b0..3491a453f6b68 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sign.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sign.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin
index ff866c97303ed..35083f8b9a89a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_signum.json b/sql/connect/common/src/test/resources/query-tests/queries/function_signum.json
index bcf6ad7eb174d..02ab0e364fd10 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_signum.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_signum.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_signum.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_signum.proto.bin
index af52abfb7f25b..65c838e408540 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_signum.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_signum.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sin.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sin.json
index cb5b0da073456..a6be1adb3249d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sin.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sin.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sin.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sin.proto.bin
index a63f574fa59cb..1f746a3ab76f9 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sin.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sin.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sinh.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sinh.json
index e0f46b428611e..c84ac26b64222 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sinh.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sinh.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sinh.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sinh.proto.bin
index 2f17ab02a6d94..545a0749d7973 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sinh.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sinh.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_size.json b/sql/connect/common/src/test/resources/query-tests/queries/function_size.json
index 37c9cd1ac1ba7..97a996e23a790 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_size.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_size.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "f"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_size.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_size.proto.bin
index a8ae600a3dd7a..4f80765b81564 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_size.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_size.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_skewness.json b/sql/connect/common/src/test/resources/query-tests/queries/function_skewness.json
index 4b14c8d5ca79c..4ed304e15c67e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_skewness.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_skewness.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_skewness.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_skewness.proto.bin
index 889f96b2d2a39..73a8e1e4d1998 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_skewness.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_skewness.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_slice.json b/sql/connect/common/src/test/resources/query-tests/queries/function_slice.json
index b0a63248784ea..c9229b9487c7d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_slice.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_slice.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 5
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_slice.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_slice.proto.bin
index 620a006f775d6..923214f8cf339 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_slice.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_slice.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_some.json b/sql/connect/common/src/test/resources/query-tests/queries/function_some.json
index bd6e28468e357..7b440a8d7e17a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_some.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_some.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "flag"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_some.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_some.proto.bin
index 0293719148506..43a3ab993a3a4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_some.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_some.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sort_array.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sort_array.json
index b42bede5cd172..24ffc7c2d387e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sort_array.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sort_array.json
@@ -22,7 +22,8 @@
           "literal": {
             "boolean": true
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sort_array.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sort_array.proto.bin
index 994048af2afc4..0414c104be556 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sort_array.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sort_array.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.json b/sql/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.json
index 851745b32ebe0..0e428e3e199d2 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "spark_partition_id"
+        "functionName": "spark_partition_id",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.proto.bin
index df99cd64e7203..843ca4273b7d8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_split.json b/sql/connect/common/src/test/resources/query-tests/queries/function_split.json
index 001d44dcaaf6e..a00e18d77628c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_split.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_split.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": ";"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_split.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_split.proto.bin
index cab0bde7b6da2..d0da01bba86dd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_split.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_split.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_split_part.json b/sql/connect/common/src/test/resources/query-tests/queries/function_split_part.json
index 81ced1555d3e4..a9c6a3ec9e2d0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_split_part.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_split_part.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_split_part.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_split_part.proto.bin
index 2c1948f20dc22..fde88be6654b0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_split_part.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_split_part.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.json b/sql/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.json
index 98ef0e54e6211..5cdf413daf98c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.proto.bin
index a87702f83d1bd..e5a712f11a74e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.json
index 45a7588838ff8..0e2f5ac83e77c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 10
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.proto.bin
index 497297fad8715..adc0a101eaf19 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.json b/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.json
index 138f9d70b2c85..73b7355de15e7 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.proto.bin
index 04e24be40e9d8..a25da7d145181 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sqrt.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sqrt.json
index f9a2b76520c13..6ca6327142558 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sqrt.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sqrt.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sqrt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sqrt.proto.bin
index e98e3bdfdb665..8b2ed11bd0e28 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sqrt.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sqrt.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_stack.json b/sql/connect/common/src/test/resources/query-tests/queries/function_stack.json
index 14865c72df228..f714739b185cf 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_stack.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_stack.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_stack.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_stack.proto.bin
index 5e5e12478d682..f578d0d2bb952 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_stack.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_stack.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_startswith.json b/sql/connect/common/src/test/resources/query-tests/queries/function_startswith.json
index ce2b0ac658c4a..4fb08d9de4760 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_startswith.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_startswith.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_startswith.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_startswith.proto.bin
index 2f09e8095f5a0..f46d166a7c554 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_startswith.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_startswith.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_std.json b/sql/connect/common/src/test/resources/query-tests/queries/function_std.json
index cbdb4ea9e5e83..5bb7ed27a5a74 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_std.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_std.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin
index 7e34b0427c23b..26b1f4e167534 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_stddev.json b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev.json
index 1403817886ca0..d27469b26a7fe 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_stddev.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_stddev.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev.proto.bin
index 8d214eea8e74e..b24c06af4fd04 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_stddev.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.json b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.json
index 35e3a08b219f8..038b6e1fbb70a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.proto.bin
index b679f55014f97..e311b7d0311e8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.json
index 17cd0fd5e5976..3f7b829e4821f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.proto.bin
index 9f22eba5e39aa..7f888c1c07d77 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map.json b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map.json
index 2cfd095f8fe62..3c640499aba54 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map.proto.bin
index 9732a829513a8..7c125c71e5150 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.json b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.json
index 228c939a43ef2..c3b52657efd2e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": ","
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.proto.bin
index 069c15db9af76..e65abe3472b91 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.json b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.json
index 7e02c7f13d2ec..2af5fcbb3fbf2 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.json
@@ -26,7 +26,8 @@
           "literal": {
             "string": "\u003d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.proto.bin
index 86a9d15b6512d..4e90cc32e37aa 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_struct.json b/sql/connect/common/src/test/resources/query-tests/queries/function_struct.json
index ba950215a2591..f88910dc3f494 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_struct.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_struct.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_struct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_struct.proto.bin
index 079c2be3c52e5..90e12eb597175 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_struct.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_struct.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_substr.json b/sql/connect/common/src/test/resources/query-tests/queries/function_substr.json
index ef6d225821c37..510d501b5c9b0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_substr.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_substr.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_substr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_substr.proto.bin
index 934201c433381..6b0871916a8a2 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_substr.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_substr.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.json b/sql/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.json
index d8492899d69bc..b09ef5ed2723e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.proto.bin
index 0fab03c025061..e5a3de2dc6c58 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_substring.json b/sql/connect/common/src/test/resources/query-tests/queries/function_substring.json
index 84a70cf1c0236..5590cd2660922 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_substring.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_substring.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 5
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_substring.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_substring.proto.bin
index d302cd95c7434..bdb3a13e5d9c3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_substring.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_substring.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_substring_index.json b/sql/connect/common/src/test/resources/query-tests/queries/function_substring_index.json
index dc81d925957cd..a5396bb478197 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_substring_index.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_substring_index.json
@@ -26,7 +26,8 @@
           "literal": {
             "integer": 5
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_substring_index.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_substring_index.proto.bin
index 192bb2e300dc3..9ac474c32e1bd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_substring_index.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_substring_index.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.json b/sql/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.json
index ba28b1c7f5700..8f01512673cf8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.proto.bin
index f14b44ef5a501..838e1d9a8bb90 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sum.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sum.json
index e9526a20b67fb..28b4ea5bbe856 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sum.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sum.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sum.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sum.proto.bin
index 0e347bbc0a167..05c69d6f94029 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sum.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sum.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.json
index 4614cf99ad3a6..0de8f3d36c22f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.json
@@ -19,7 +19,8 @@
             "unparsedIdentifier": "a"
           }
         }],
-        "isDistinct": true
+        "isDistinct": true,
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.proto.bin
index b4cf704391a4d..6c345201d8eea 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_tan.json b/sql/connect/common/src/test/resources/query-tests/queries/function_tan.json
index ead160a7e3ac2..38ca851765599 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_tan.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_tan.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_tan.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_tan.proto.bin
index d674dc033b2cd..ba28964c9befb 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_tan.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_tan.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_tanh.json b/sql/connect/common/src/test/resources/query-tests/queries/function_tanh.json
index bcd12c664427e..e9e3996bc5aaa 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_tanh.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_tanh.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_tanh.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_tanh.proto.bin
index 21c28c3ef88e6..d635c5020a53f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_tanh.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_tanh.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.json b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.json
index 8fd71bb36d85e..c23a5c3bfa129 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "t"
           }
-        }]
+        }],
+        "isInternal": true
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.proto.bin
index 5ab8ec531e073..142672a0929e8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.json b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.json
index 635cbb45460e6..c779b0936dc63 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "t"
           }
-        }]
+        }],
+        "isInternal": true
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.proto.bin
index 3a81fd8b318c0..c2053f46f6a55 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.json b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.json
index e43aa6d7115bd..985a23c536e1c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "x"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.proto.bin
index c8ca8eedef3c0..f17fc48d4418b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.json b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.json
index afcdf42d7b3be..52389f7fe5fab 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "x"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.proto.bin
index bbe401c39f3d1..f63af6ecb1fb8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.json b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.json
index e6892d17708b3..526d22229facf 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "x"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.proto.bin
index 102afbdda9021..0c7647735eed5 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary.json
index 156c3a5b3ca65..fcf1d35f42169 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary.proto.bin
index a1da0e6e2eda1..2b02883e03a30 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.json
index 8c78cc6f8b99f..325e92437d515 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "utf-8"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.proto.bin
index 2f2364e5abab1..5be46a049a535 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_char.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_char.json
index 404a89a87ecb2..117955fa60c3a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_char.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_char.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "$99.99"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_char.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_char.proto.bin
index 087e212c39f4e..6ed0c2cdde8a9 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_char.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_char.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_csv.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_csv.json
index 6b3856f5ac0af..cfbce992619b8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_csv.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_csv.json
@@ -29,9 +29,11 @@
               "literal": {
                 "string": "|"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_csv.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_csv.proto.bin
index a3017643a330a..318966eb2b58b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_csv.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_csv.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_date.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_date.json
index 8b9d50aa578b8..0da88a6158438 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_date.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_date.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "s"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_date.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_date.proto.bin
index 59178487eef58..bfd79f65053c8 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_date.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_date.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.json
index 48ae80d1e70ed..1f6250ec0656b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "yyyy-MM-dd"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.proto.bin
index 2641d660ff69f..8118ff7ee4705 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_json.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_json.json
index 7ceeb9d113cd3..0482c5cba9500 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_json.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_json.json
@@ -29,9 +29,11 @@
               "literal": {
                 "string": "dd/MM/yyyy"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_json.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_json.proto.bin
index c9461c1aa961c..f3d94b476135a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_json.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_json.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_number.json
index abb71e80a769c..8df8436bf647d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_number.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_number.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "$99.99"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
index 189c73553c5db..0d160ed3239c1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.json
index 323c57e2ef58a..4e9013a1c0ff2 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "s"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.proto.bin
index ec6bd64f98187..1c8f18761c5c6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.json
index 59a79f39eb612..c994b0968d099 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.proto.bin
index 9cabae3e75657..45d6d1549d98a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.json
index 08cb9c153f77f..6ff1dca87de6a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.proto.bin
index 22fd3d07dfc43..6aebdbd4f8667 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.json
index 6808047ef2094..53678e814da88 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.proto.bin
index 5cd4cfddbd164..9a0c00065da25 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.json
index 03e38801bfa56..240c53dd5c31c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.proto.bin
index 3a5d3dd970200..f38ad9460ff52 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.json
index 30f34528319c7..1988d2fb5a863 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "yyyy-MM-dd HH:mm:ss.SSSS"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.proto.bin
index 9c2d6d354ca73..939f9151de2a4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.json
index 15a42b814a629..6a1c5f3677e7f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.proto.bin
index 1c70f303e6fc2..2887468cea205 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.json
index d6f4280d4464e..ce072b6395620 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.proto.bin
index 141ff1fa320d5..803ec8311f552 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.json
index 015fbb5cf534a..fbd86a28a12f3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "-04:00"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.proto.bin
index b2b65089604a2..bfaf5d2af8a9c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_varchar.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_varchar.json
index 3694a68dc8f5c..732eb4b426dde 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_to_varchar.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_to_varchar.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "$99.99"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_to_varchar.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_varchar.proto.bin
index 005c9ab064c9b..b912951a75519 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_to_varchar.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_to_varchar.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_transform.json b/sql/connect/common/src/test/resources/query-tests/queries/function_transform.json
index 3ad6fe9435644..c8ec0608b13af 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_transform.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_transform.json
@@ -31,14 +31,16 @@
                   "literal": {
                     "integer": 1
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
               "nameParts": ["x_1"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin
index 266b093f7a99b..e4efe8865b124 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.json b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.json
index 86349f460adaa..ed7487a58d3b8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.json
@@ -40,7 +40,8 @@
                       }
                     }
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
@@ -49,7 +50,8 @@
               "nameParts": ["y_2"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin
index 827b6f273ceea..e04eabb98195f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.json b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.json
index 02aeca229ce5d..6e76f8a0554f9 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.json
@@ -41,7 +41,8 @@
               "nameParts": ["y_2"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin
index b4a653ff77a5d..e24a6320c40b3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json
index df5e15b44fdd3..49dec8db7da73 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json
@@ -31,7 +31,8 @@
                   "unresolvedNamedLambdaVariable": {
                     "nameParts": ["y_2"]
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
@@ -40,7 +41,8 @@
               "nameParts": ["y_2"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin
index e502c18dcd9e8..30b1901f42f58 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_translate.json b/sql/connect/common/src/test/resources/query-tests/queries/function_translate.json
index 93d155c2857fb..ad5f98152258e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_translate.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_translate.json
@@ -26,7 +26,8 @@
           "literal": {
             "string": "bar"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_translate.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_translate.proto.bin
index 1ce32c8d2843e..ec9c556cfef09 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_translate.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_translate.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_trim.json b/sql/connect/common/src/test/resources/query-tests/queries/function_trim.json
index d2700174bca3d..a7925c2c7b5d2 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_trim.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_trim.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_trim.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_trim.proto.bin
index d5f4f21510fc6..0ea9051f33837 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_trim.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_trim.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.json b/sql/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.json
index fc3281c921531..cb566a6b98dfe 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.proto.bin
index 2136b55656212..4423ab1a02a37 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_trunc.json b/sql/connect/common/src/test/resources/query-tests/queries/function_trunc.json
index 4c596cd863261..01e528241eedd 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_trunc.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_trunc.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "mm"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_trunc.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_trunc.proto.bin
index cdcee95af6344..acfd2bb94483b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_trunc.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_trunc.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_add.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_add.json
index 80300b5b5778a..2a34feb64a6d5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_add.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_add.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_add.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_add.proto.bin
index c1cb613b3943f..f9efaff46f3ec 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_add.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_add.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.json
index 80e10f4786a81..fd2ba4bd66bdf 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.proto.bin
index c2a477e5320c7..cc94446a39c8d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.json
index cc4ea4bfe5fb9..360742eaf0b69 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.proto.bin
index 22919795e3e6a..b1fde39e2b632 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.json
index 1f1fc777959a2..f02b625bd9e6a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.proto.bin
index b16d49e2428a2..1774a4dfaba19 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.json
index b7e7cd41bda8f..b0c434231378b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.json
@@ -34,7 +34,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.proto.bin
index d406961d5ccfc..6f75571c79ff3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_avg.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_avg.json
index 1216f4b5c635f..9ac56e71ad8b6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_avg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_avg.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_avg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_avg.proto.bin
index 8ab7a5d19e380..1b378b28e9ca7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_avg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_avg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_divide.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_divide.json
index d7d012756e62f..d7bd1d47d1b2a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_divide.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_divide.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_divide.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_divide.proto.bin
index 05c8d4a193adb..3ba2e35029a78 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_divide.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_divide.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.json
index c2651e4ad7253..8fb878d1358f2 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.proto.bin
index b86d5efd4096b..1fc1045fc085e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.json
index c4e5bc2f415ee..e200f3fa9d278 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.proto.bin
index 2f6c54f2fa5ec..1f8fdb10899a6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.json
index a7a2348496040..4c3bafee572d1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.proto.bin
index d459b6e8ec677..acb1952d621d7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.json
index 14aaa41ee2cb5..4508e5610dc81 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.proto.bin
index 5123b995417ba..f12d4affbff93 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.json
index a6ac2f27e3dc5..672acdf6b2d3d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.proto.bin
index cecfca97f7e20..555eb56ff0438 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.json
index c9d4f1d4d2f1f..70782ac30ea49 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.proto.bin
index 423172405c397..b9d6de2d00662 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.json
index 7f2a42f01db45..fda1b3ea21a36 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.json
@@ -34,7 +34,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.proto.bin
index 71259b402aa51..3f477ddeab7db 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.json
index 35ab05a90b3cd..b2db53de2c58e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.json
@@ -38,7 +38,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.proto.bin
index f8cf29d15aabf..72dd0ffd9d539 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.json
index 2f9c1d019359b..5a1389ff2665e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.json
@@ -42,7 +42,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin
index d7343a059b53d..90bf07eb4e9e3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.json
index 179f6e06988fc..36559a50d7aa4 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.json
@@ -42,7 +42,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.proto.bin
index d0c60ba1c7bf8..34dcc6bf46092 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.json
index 29aa2096c2273..36e121b976dbf 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.json
@@ -38,7 +38,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.proto.bin
index 9caf6f6ba5285..c0eaf743e46fe 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.json
index 6b8d31d0c58e5..b131e0d07fdea 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.json
@@ -38,7 +38,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.proto.bin
index 7d7e2a8029def..a350a6861ed9a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.json
index 79e11efc20d41..b2fe4db0dabab 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.json
@@ -42,7 +42,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.proto.bin
index 53b9839cf8c1f..8519bccb5a1a7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.json
index 39ce728a38862..549071a8a964c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.json
@@ -38,7 +38,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.proto.bin
index 74918d42f89c6..2d8c06fb5a2a5 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_multiply.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_multiply.json
index df22654c82031..0838efb2c1eb3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_multiply.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_multiply.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_multiply.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_multiply.proto.bin
index 8912423235e0b..703d897792bcb 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_multiply.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_multiply.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.json
index 91177eb4a5857..fca2a95a83ab8 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.proto.bin
index cc1f159cfd78c..a97f0801944be 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.json
index b9603d5af2634..0b4e011421d6a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.proto.bin
index 696c4ddde519c..080be30a46d23 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.json
index 137ed4bd9bc80..d173704b73354 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.proto.bin
index f4a13872e3c8f..b39bd2cb28cbb 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_reflect.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_reflect.json
index de3fae90c2c4b..0d787f37493b6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_reflect.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_reflect.json
@@ -26,7 +26,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_reflect.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_reflect.proto.bin
index e38e0e5c06548..5971e17d1041b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_reflect.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_reflect.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_subtract.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_subtract.json
index f3a5df24cce88..d1bae052a945c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_subtract.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_subtract.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_subtract.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_subtract.proto.bin
index f0cb5f5027873..4951a8e3c5fd6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_subtract.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_subtract.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_sum.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_sum.json
index 41e93d1fcf956..7961eb8eb5596 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_sum.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_sum.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_sum.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_sum.proto.bin
index dce7d9df359c9..6e4cbe5a01090 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_sum.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_sum.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.json
index 9b57b6b26b562..164304ef4b0ae 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "format"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.proto.bin
index 28b7059160757..3a1b90ca42f70 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.json
index 2498ff9a7872f..6c676dc702a35 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.proto.bin
index 682eb1821a3a1..4a7ed3da5738c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_number.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_number.json
index 44e894743dfc8..f206393079de7 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_number.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_number.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "99,999"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_number.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_number.proto.bin
index c2eba8a19d5df..2eb9ff68df8fe 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_number.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_number.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.json
index d00967823a33c..e0a532043e00c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.proto.bin
index 4f0300d48a6fc..ff61c6147cd84 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.json
index 4fdfc38ca539b..8589656d5ed52 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.proto.bin
index 91a4156e305f6..72a6868870487 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.json
index d51704c8f62e2..e73de5e669362 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.proto.bin
index 3e84921b12206..e15e5e2d902da 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.json
index 9a4a4e25f19e6..b8ce69c4bcf39 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.json
@@ -21,7 +21,8 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "g"
               }
-            }]
+            }],
+            "isInternal": false
           }
         }, {
           "literal": {
@@ -31,7 +32,8 @@
           "literal": {
             "string": "int"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.proto.bin
index b16bbf4c7a4e9..82584c937aaa7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typeof.json b/sql/connect/common/src/test/resources/query-tests/queries/function_typeof.json
index 7a6fcfcbcf898..7f9808d9fd947 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_typeof.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_typeof.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typeof.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_typeof.proto.bin
index a042a6e8d7607..585d98f767904 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_typeof.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_typeof.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ucase.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ucase.json
index 7193142acdb6f..5580e31e26ffa 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_ucase.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_ucase.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_ucase.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ucase.proto.bin
index 3e17a01d4b1f5..8a2b70936e0f9 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_ucase.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_ucase.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unbase64.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unbase64.json
index 6af2a00ed160e..af85c7b64779c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_unbase64.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_unbase64.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unbase64.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unbase64.proto.bin
index f37ceb91bf42b..f446e0ad73f45 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_unbase64.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_unbase64.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unhex.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unhex.json
index 7c409d023f76a..1cea642cc9c68 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_unhex.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_unhex.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unhex.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unhex.proto.bin
index fbac2821fdb07..757eca2dc04d5 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_unhex.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_unhex.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_date.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_date.json
index 1a7ae09f46dad..2e617c4e6b8b1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_date.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_date.json
@@ -25,9 +25,11 @@
               "literal": {
                 "string": "yyyy-MM-dd"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_date.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_date.proto.bin
index 9c05e42bfad30..6ed08ec71d76a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_date.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_date.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_micros.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_micros.json
index 07f5cd1d53dbd..f7bb6d9ba6264 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_micros.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_micros.json
@@ -25,9 +25,11 @@
               "literal": {
                 "string": "yyyy-MM-dd HH:mm:ss.SSSS"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_micros.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_micros.proto.bin
index c3f44d766f8b1..2574acb3c8d95 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_micros.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_micros.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_millis.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_millis.json
index aac02cc807aa0..4a7c077e88bc1 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_millis.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_millis.json
@@ -25,9 +25,11 @@
               "literal": {
                 "string": "yyyy-MM-dd HH:mm:ss.SSSS"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_millis.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_millis.proto.bin
index f0456e03e3fc1..1865aac8a7340 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_millis.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_millis.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.json
index 428cb26cd9c86..dc3fc3fae0c05 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.json
@@ -25,9 +25,11 @@
               "literal": {
                 "string": "yyyy-MM-dd HH:mm:ss.SSSS"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.proto.bin
index fdaf50e7322bb..2ca04f640cda4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.json
index e590f7778f2ea..0780a83d74088 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.json
@@ -16,9 +16,11 @@
         "functionName": "unix_timestamp",
         "arguments": [{
           "unresolvedFunction": {
-            "functionName": "current_timestamp"
+            "functionName": "current_timestamp",
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.proto.bin
index cb3d967ae0123..6f8cf9115629f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.json
index d2e087a5d8a24..93699abb33a7b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "yyyy-MM-dd HH:mm:ss.SSSS"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.proto.bin
index ddfcdff63d11a..f838c9dd31912 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_upper.json b/sql/connect/common/src/test/resources/query-tests/queries/function_upper.json
index 208ee9231a13c..36f1f0258ca82 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_upper.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_upper.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_upper.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_upper.proto.bin
index 5ddbfce96e71b..a7d6be43571a0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_upper.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_upper.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_url_decode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_url_decode.json
index d4cdeeb6c48c5..2e8003e4e9a9f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_url_decode.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_url_decode.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_url_decode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_url_decode.proto.bin
index e347e73c3aef1..ff6fb3793e671 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_url_decode.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_url_decode.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_url_encode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_url_encode.json
index 5d221e0fea6f4..9df8a4683ea65 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_url_encode.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_url_encode.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_url_encode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_url_encode.proto.bin
index 9313fb8249859..eb9e31e63697b 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_url_encode.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_url_encode.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_user.json b/sql/connect/common/src/test/resources/query-tests/queries/function_user.json
index aaf3de9ba034e..c3ab4ae4be94c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_user.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_user.json
@@ -13,7 +13,8 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "user"
+        "functionName": "user",
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_user.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_user.proto.bin
index dbd64cae9f360..17b0d6a4ecc90 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_user.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_user.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_var_pop.json b/sql/connect/common/src/test/resources/query-tests/queries/function_var_pop.json
index 9c74ce4a984f8..d91c325ef41ba 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_var_pop.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_var_pop.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_var_pop.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_var_pop.proto.bin
index 7ca6e8d3b811b..603e856366f10 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_var_pop.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_var_pop.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_var_samp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_var_samp.json
index 979313dd0510d..8132510e61129 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_var_samp.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_var_samp.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_var_samp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_var_samp.proto.bin
index 9bd042ad339e7..99a8d28ec0e72 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_var_samp.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_var_samp.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_variance.json b/sql/connect/common/src/test/resources/query-tests/queries/function_variance.json
index 90a97c3becf4d..4bfce573c50c5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_variance.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_variance.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_variance.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_variance.proto.bin
index fd494fc496391..e3c0f8512c0c6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_variance.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_variance.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_variant_get.json b/sql/connect/common/src/test/resources/query-tests/queries/function_variant_get.json
index ab0acd29d505b..d44a94f365b56 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_variant_get.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_variant_get.json
@@ -21,7 +21,8 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "g"
               }
-            }]
+            }],
+            "isInternal": false
           }
         }, {
           "literal": {
@@ -31,7 +32,8 @@
           "literal": {
             "string": "int"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_variant_get.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_variant_get.proto.bin
index fe9b76bb97c4a..f1ac3c8c0ad63 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_variant_get.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_variant_get.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_weekday.json b/sql/connect/common/src/test/resources/query-tests/queries/function_weekday.json
index b757700291752..82f37d343207e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_weekday.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_weekday.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_weekday.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_weekday.proto.bin
index 1954103269eb2..cc47e1928103a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_weekday.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_weekday.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_weekofyear.json b/sql/connect/common/src/test/resources/query-tests/queries/function_weekofyear.json
index 3f46a98569e24..de4ce19a12b5e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_weekofyear.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_weekofyear.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_weekofyear.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_weekofyear.proto.bin
index ec9b22522360e..e5c742732e76d 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_weekofyear.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_weekofyear.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_window.json b/sql/connect/common/src/test/resources/query-tests/queries/function_window.json
index bdcb6a398800f..95bde679468e0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_window.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_window.json
@@ -30,7 +30,8 @@
           "literal": {
             "string": "0 second"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_window.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_window.proto.bin
index 8cffcc1e9f673..303b9673c8ab5 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_window.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_window.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_window_time.json b/sql/connect/common/src/test/resources/query-tests/queries/function_window_time.json
index 4809ea21261c4..2107ac2f12ecd 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_window_time.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_window_time.json
@@ -35,7 +35,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "wt"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_window_time.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_window_time.proto.bin
index c143520df08ce..3de3a3e156a2c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_window_time.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_window_time.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath.json
index 3dea90a13653d..fd36b378137c6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "a/b/text()"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath.proto.bin
index aabfc76f8a7e1..d07c5d50fc3a4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.json
index 793d459ec165b..0749c2d422314 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "a/b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.proto.bin
index 544caab4ecc5b..e70d2e5c3edd6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_double.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_double.json
index f88a06641b8f4..d1c77ae96a86b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_double.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_double.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "a/b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_double.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_double.proto.bin
index 9c4ea31712021..de580971683b3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_double.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_double.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_float.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_float.json
index 94932891225d7..b13e12a2d7e9f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_float.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_float.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "a/b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_float.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_float.proto.bin
index 32dfbc00cfa44..4285deba56d7c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_float.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_float.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_int.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_int.json
index 0dcef00ed20d4..2a55744cb38c9 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_int.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_int.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "a/b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_int.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_int.proto.bin
index e6298b37dbe36..afe3b10e4cd86 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_int.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_int.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_long.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_long.json
index c740d2bad4f5f..3d4d9267a6a50 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_long.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_long.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "a/b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_long.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_long.proto.bin
index d240600eabbae..7cb6efd6ab2d0 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_long.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_long.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_number.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_number.json
index b164bb6a32ac7..cf1303b54d160 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_number.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_number.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "a/b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_number.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_number.proto.bin
index b967d3e55cc5f..c589c8ecc775a 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_number.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_number.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_short.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_short.json
index 5d3a3e9983707..4aa5e3aae7fc9 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_short.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_short.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "a/b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_short.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_short.proto.bin
index 9ae27bd973853..f407b525cdfa7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_short.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_short.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_string.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_string.json
index 26e4130ae2c4b..94fad0de2851f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_string.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_string.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "a/b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_string.proto.bin
index 5384301238b1e..c31ae5065a513 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_string.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_string.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xxhash64.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xxhash64.json
index c20739d09ff10..5000f3b164766 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_xxhash64.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_xxhash64.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_xxhash64.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xxhash64.proto.bin
index 414c76fc5ce7f..de84e70acef5e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_xxhash64.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_xxhash64.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_year.json b/sql/connect/common/src/test/resources/query-tests/queries/function_year.json
index b8a4ee5a16525..9fadb5c411b9b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_year.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_year.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "d"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_year.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_year.proto.bin
index 623bc9ac6d81f..91bbfdc180efc 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_year.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_year.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_years.json b/sql/connect/common/src/test/resources/query-tests/queries/function_years.json
index 2e87307320271..7b0ab3d287ece 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_years.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_years.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": true
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin
index 30c25423fd563..575f56951f017 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.json b/sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.json
index 660ca1931137e..66d53c39742ba 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.json
@@ -35,7 +35,8 @@
                   "unresolvedNamedLambdaVariable": {
                     "nameParts": ["y_2"]
                   }
-                }]
+                }],
+                "isInternal": false
               }
             },
             "arguments": [{
@@ -44,7 +45,8 @@
               "nameParts": ["y_2"]
             }]
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin
index edbfe197af4dc..1bf478358f357 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.json
index b7b4c98518e6b..e36ad1de4960d 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.json
@@ -25,7 +25,8 @@
             "unparsedIdentifier": "a",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -35,7 +36,8 @@
             "unparsedIdentifier": "b",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -45,7 +47,8 @@
             "unparsedIdentifier": "b",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -55,7 +58,8 @@
             "unparsedIdentifier": "b",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -65,7 +69,8 @@
             "unparsedIdentifier": "b",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -75,7 +80,8 @@
             "unparsedIdentifier": "b",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -84,7 +90,8 @@
           "unresolvedStar": {
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -94,7 +101,8 @@
             "unparsedIdentifier": "a",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin
index d7b1b94ed04a2..22eacb3a01b03 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json
index e61616786158e..cf42aabd68160 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json
@@ -24,7 +24,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -33,7 +34,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin
index d6daa1cc31f7d..a12bd0699df1f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json
index 285c13f4bc8b3..e5261b8a18a09 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json
@@ -31,7 +31,8 @@
             "unparsedIdentifier": "a",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -41,7 +42,8 @@
             "unparsedIdentifier": "a",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin
index 674d506fa4a07..93bb39f16d1a6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.json
index 0ded46cf6cc7c..4110779d80c56 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.json
@@ -25,7 +25,8 @@
             "unparsedIdentifier": "a",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -35,7 +36,8 @@
             "unparsedIdentifier": "b",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin
index 444b0c3853f16..d43d816891aa6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_count.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_count.json
index f92e22493e07b..04d6e91f9c7cf 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/groupby_count.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_count.json
@@ -26,7 +26,8 @@
               "literal": {
                 "integer": 1
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "name": ["count"]
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin
index 5bb539195df9a..fd009c3a636ca 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_max.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_max.json
index ed186ff713519..643f6c0676a30 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/groupby_max.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_max.json
@@ -25,7 +25,8 @@
             "unparsedIdentifier": "a",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -35,7 +36,8 @@
             "unparsedIdentifier": "b",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin
index 11cd163e91738..7bb503d75e92f 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.json
index 0ded46cf6cc7c..4110779d80c56 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.json
@@ -25,7 +25,8 @@
             "unparsedIdentifier": "a",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -35,7 +36,8 @@
             "unparsedIdentifier": "b",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin
index 444b0c3853f16..d43d816891aa6 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_min.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_min.json
index 8c0ad283cb0a4..3d7546547d98f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/groupby_min.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_min.json
@@ -25,7 +25,8 @@
             "unparsedIdentifier": "a",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -35,7 +36,8 @@
             "unparsedIdentifier": "b",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin
index 2bc985a1fe9f3..cd0488b381612 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.json
index 788b964491c6a..61133d3fe4321 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.json
@@ -25,7 +25,8 @@
             "unparsedIdentifier": "a",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -35,7 +36,8 @@
             "unparsedIdentifier": "b",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin
index e92041399cbca..d5f8bc8c47698 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupingSets.json b/sql/connect/common/src/test/resources/query-tests/queries/groupingSets.json
index 6e84824ec7a3a..a81c24a9fa077 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/groupingSets.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupingSets.json
@@ -25,7 +25,8 @@
             "unparsedIdentifier": "a",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -35,7 +36,8 @@
             "unparsedIdentifier": "a",
             "planId": "0"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }],
     "groupingSets": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupingSets.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupingSets.proto.bin
index ce0294096706e..48d51737b9a41 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/groupingSets.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupingSets.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json b/sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json
index 8ff81d95d2988..c1c1bcdf0dc87 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -39,7 +40,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -52,7 +54,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin
index d1dded43ddf99..41e1a426e25b9 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.json
index 52f66cf2dc6b9..914f304f56a2e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "bytes"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.proto.bin
index 68b74817c3268..ba473ed2e2855 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.json
index 52f66cf2dc6b9..914f304f56a2e 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "bytes"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.proto.bin
index 68b74817c3268..ba473ed2e2855 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.json
index fbd4ca05d9e99..590187f7fe63b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 0
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.proto.bin
index bea4e1a642ab2..5b83337b80bee 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.json
index fbd4ca05d9e99..590187f7fe63b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 0
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.proto.bin
index bea4e1a642ab2..5b83337b80bee 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.json
index fbd4ca05d9e99..590187f7fe63b 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.json
@@ -22,7 +22,8 @@
           "literal": {
             "integer": 0
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.proto.bin
index bea4e1a642ab2..5b83337b80bee 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg.json
index 74b3e7c4a7410..216afd0f5975a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "bytes"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg.proto.bin
index e19b476247a24..309e80a86ee38 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.json
index 74b3e7c4a7410..216afd0f5975a 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "bytes"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.proto.bin
index e19b476247a24..309e80a86ee38 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.json
index bb6413a94ced3..e733e086af5d6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.json
@@ -22,7 +22,8 @@
           "literal": {
             "boolean": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.proto.bin
index 4f7f236583949..c71e656127200 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.json
index bb6413a94ced3..e733e086af5d6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.json
@@ -22,7 +22,8 @@
           "literal": {
             "boolean": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.proto.bin
index 4f7f236583949..c71e656127200 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.json
index bb6413a94ced3..e733e086af5d6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.json
@@ -22,7 +22,8 @@
           "literal": {
             "boolean": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.proto.bin
index 4f7f236583949..c71e656127200 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/join_condition.json b/sql/connect/common/src/test/resources/query-tests/queries/join_condition.json
index 993cd98a7dd16..7151d0420f6b5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/join_condition.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/join_condition.json
@@ -46,7 +46,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "r.id"
           }
-        }]
+        }],
+        "isInternal": false
       }
     },
     "joinType": "JOIN_TYPE_LEFT_ANTI"
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/join_condition.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/join_condition.proto.bin
index 1d11fe5e75bcc..4784998b49cca 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/join_condition.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/join_condition.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/join_inner_condition.json b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_condition.json
index 527338c56ae60..9308d6babdb25 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/join_inner_condition.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_condition.json
@@ -46,7 +46,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "r.a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     },
     "joinType": "JOIN_TYPE_INNER"
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/join_inner_condition.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_condition.proto.bin
index 5d3de55da9cf8..a49cc6ef30806 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/join_inner_condition.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_condition.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/pivot.json b/sql/connect/common/src/test/resources/query-tests/queries/pivot.json
index 2af86606b9fcb..f085d1a43b678 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/pivot.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/pivot.json
@@ -24,7 +24,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }],
     "pivot": {
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/pivot.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/pivot.proto.bin
index f545179e84968..73c88bf97535e 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/pivot.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/pivot.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json b/sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json
index aa043613795c4..9d5b22bce6e89 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json
@@ -24,7 +24,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }],
     "pivot": {
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin
index 588b56f247e07..d722db0e17ea9 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/rollup_column.json b/sql/connect/common/src/test/resources/query-tests/queries/rollup_column.json
index 1102db18830bd..146904dc898e3 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/rollup_column.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/rollup_column.json
@@ -30,7 +30,8 @@
               "literal": {
                 "integer": 1
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "name": ["count"]
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/rollup_column.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/rollup_column.proto.bin
index 64dbb597c3650..8949050821a12 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/rollup_column.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/rollup_column.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.json b/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.json
index 5082051031f81..6fe3659064e79 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.json
@@ -32,7 +32,8 @@
               "literal": {
                 "integer": 1
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "name": ["count"]
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin
index 63fdead641dad..3843ae77a9bfc 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.json b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.json
index 90ef62c5f415b..e3dcf84ae9c39 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.json
@@ -28,11 +28,14 @@
                   "unresolvedAttribute": {
                     "unparsedIdentifier": "a"
                   }
-                }]
+                }],
+                "isInternal": false
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.proto.bin
index 2273a16d4e6a8..b72a7233c4c04 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.json b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.json
index c9c6c75235694..d144dcf8b8af5 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -35,7 +36,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.proto.bin
index 37f3915cd8d18..18763400b4abb 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.json b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.json
index 23850dcb136ef..55b64d26d4904 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -35,7 +36,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -48,7 +50,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.proto.bin
index b3b56953a8586..d535835523de3 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.json b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.json
index 2bbdb60794db5..da0adf605f977 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -35,7 +36,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -48,7 +50,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -61,7 +64,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.proto.bin
index bacccff22ae0a..50197b862ad14 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.json b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.json
index 4f57c0ef82145..196a91b9fbb81 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.json
@@ -22,7 +22,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -35,7 +36,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -48,7 +50,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -61,7 +64,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }, {
       "unresolvedFunction": {
@@ -74,7 +78,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.proto.bin
index 2c51e2088885f..e2ff25edd34cd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/toJSON.json b/sql/connect/common/src/test/resources/query-tests/queries/toJSON.json
index 9a99a18853cf1..9faba08d9792c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/toJSON.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/toJSON.json
@@ -21,9 +21,11 @@
               "unresolvedStar": {
                 "planId": "0"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin
index e930ee76aae97..0cd2c3d35c6b4 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.json b/sql/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.json
index 6079e13bbfc93..b632fba4a0192 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "{\"type\": \"int\", \"name\": \"id\"}"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.proto.bin
index 2843fbb67fecf..6c3907802968c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.json b/sql/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.json
index fa19d2120b94f..dd289a6abcc16 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.json
@@ -18,7 +18,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "id"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.proto.bin
index 4e7251125e4ce..59bce6aac25c7 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.json b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.json
index 921c1b800a089..e71bddc0b19a0 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.json
@@ -22,7 +22,8 @@
           "literal": {
             "string": "org.apache.spark.connect.proto.StorageLevel"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.proto.bin
index 5cc7c49882c03..f49e6d227ddcd 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.json b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.json
index 0843b469384e0..c6ccee6f35c3f 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.json
@@ -26,7 +26,8 @@
           "literal": {
             "binary": "CvwBCgxjb21tb24ucHJvdG8SDXNwYXJrLmNvbm5lY3QisAEKDFN0b3JhZ2VMZXZlbBIZCgh1c2VfZGlzaxgBIAEoCFIHdXNlRGlzaxIdCgp1c2VfbWVtb3J5GAIgASgIUgl1c2VNZW1vcnkSIAoMdXNlX29mZl9oZWFwGAMgASgIUgp1c2VPZmZIZWFwEiIKDGRlc2VyaWFsaXplZBgEIAEoCFIMZGVzZXJpYWxpemVkEiAKC3JlcGxpY2F0aW9uGAUgASgFUgtyZXBsaWNhdGlvbkIiCh5vcmcuYXBhY2hlLnNwYXJrLmNvbm5lY3QucHJvdG9QAWIGcHJvdG8z"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.proto.bin
index c3fe14aef47da..c30bc963ce0eb 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.json b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.json
index 76307b3141f7f..bc676b7aa5b1c 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.json
@@ -37,9 +37,11 @@
               "literal": {
                 "string": "2"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.proto.bin
index a387611c1ad55..971d6b358711c 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.json b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.json
index 8787f0fc15d77..fa3d57a251cb6 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.json
@@ -33,9 +33,11 @@
               "literal": {
                 "string": "2"
               }
-            }]
+            }],
+            "isInternal": false
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.proto.bin
index 9ef8348446ad4..7f955b1013fe1 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/where_column.json b/sql/connect/common/src/test/resources/query-tests/queries/where_column.json
index bef80a7e6ed5a..15cca60f2a407 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/where_column.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/where_column.json
@@ -22,7 +22,8 @@
           "literal": {
             "long": "1"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/where_column.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/where_column.proto.bin
index e472ed0715b62..ef377b7044366 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/where_column.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/where_column.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/width_bucket.json b/sql/connect/common/src/test/resources/query-tests/queries/width_bucket.json
index 93d3b5297d9e1..3e7c112776655 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/width_bucket.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/width_bucket.json
@@ -30,7 +30,8 @@
           "unresolvedAttribute": {
             "unparsedIdentifier": "a"
           }
-        }]
+        }],
+        "isInternal": false
       }
     }]
   }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin
index f212e97bc1c5a..2667912763248 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/window.json b/sql/connect/common/src/test/resources/query-tests/queries/window.json
index 23fd5c1556ec5..ad9555f3ae898 100644
--- a/sql/connect/common/src/test/resources/query-tests/queries/window.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/window.json
@@ -20,7 +20,8 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "id"
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "partitionSpec": [{
@@ -42,7 +43,8 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "id"
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "partitionSpec": [{
@@ -64,7 +66,8 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "id"
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "orderSpec": [{
@@ -94,7 +97,8 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "id"
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "orderSpec": [{
@@ -124,7 +128,8 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "id"
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "orderSpec": [{
@@ -163,7 +168,8 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "id"
               }
-            }]
+            }],
+            "isInternal": false
           }
         },
         "orderSpec": [{
@@ -202,7 +208,8 @@
               "unresolvedAttribute": {
                 "unparsedIdentifier": "id"
               }
-            }]
+            }],
+            "isInternal": false
           }
         }
       }
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/window.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/window.proto.bin
index a89c0d6a6a3f4..01616601af0ea 100644
Binary files a/sql/connect/common/src/test/resources/query-tests/queries/window.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/window.proto.bin differ
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
index 051093fcad277..21b5e057fb77e 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
@@ -241,14 +241,13 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
           // The state of interrupted, response and lastIndex are changed under executionObserver
           // monitor, and will notify upon state change.
           if (response.isEmpty) {
+            var timeout = Math.max(1, deadlineTimeMillis - System.currentTimeMillis())
             // Wake up more frequently to send the progress updates.
             val progressTimeout = executeHolder.sessionHolder.session.sessionState.conf
               .getConf(CONNECT_PROGRESS_REPORT_INTERVAL)
             // If the progress feature is disabled, wait for the deadline.
-            val timeout = if (progressTimeout > 0) {
-              progressTimeout
-            } else {
-              Math.max(1, deadlineTimeMillis - System.currentTimeMillis())
+            if (progressTimeout > 0L) {
+              timeout = Math.min(progressTimeout, timeout)
             }
             logTrace(s"Wait for response to become available with timeout=$timeout ms.")
             executionObserver.responseLock.wait(timeout)
@@ -291,7 +290,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
           assert(finished == false)
         } else {
           // If it wasn't sent, time deadline must have been reached before stream became available,
-          // or it was intterupted. Will exit in the next loop iterattion.
+          // or it was interrupted. Will exit in the next loop iterattion.
           assert(deadlineLimitReached || interrupted)
         }
       } else if (streamFinished) {
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
index d27f390a23f95..05e3395a53169 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
@@ -245,7 +245,7 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends
             .createObservedMetricsResponse(
               executeHolder.sessionHolder.sessionId,
               executeHolder.sessionHolder.serverSessionId,
-              executeHolder.request.getPlan.getRoot.getCommon.getPlanId,
+              executeHolder.allObservationAndPlanIds,
               observedMetrics ++ accumulatedInPython))
       }
 
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
index c0fd00b2eeaa7..5e3499573e9d9 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
@@ -77,8 +77,10 @@ private[execution] class SparkConnectPlanExecution(executeHolder: ExecuteHolder)
     responseObserver.onNext(createSchemaResponse(request.getSessionId, dataframe.schema))
     processAsArrowBatches(dataframe, responseObserver, executeHolder)
     responseObserver.onNext(MetricGenerator.createMetricsResponse(sessionHolder, dataframe))
-    createObservedMetricsResponse(request.getSessionId, dataframe).foreach(
-      responseObserver.onNext)
+    createObservedMetricsResponse(
+      request.getSessionId,
+      executeHolder.allObservationAndPlanIds,
+      dataframe).foreach(responseObserver.onNext)
   }
 
   type Batch = (Array[Byte], Long)
@@ -255,6 +257,7 @@ private[execution] class SparkConnectPlanExecution(executeHolder: ExecuteHolder)
 
   private def createObservedMetricsResponse(
       sessionId: String,
+      observationAndPlanIds: Map[String, Long],
       dataframe: DataFrame): Option[ExecutePlanResponse] = {
     val observedMetrics = dataframe.queryExecution.observedMetrics.collect {
       case (name, row) if !executeHolder.observations.contains(name) =>
@@ -264,13 +267,12 @@ private[execution] class SparkConnectPlanExecution(executeHolder: ExecuteHolder)
         name -> values
     }
     if (observedMetrics.nonEmpty) {
-      val planId = executeHolder.request.getPlan.getRoot.getCommon.getPlanId
       Some(
         SparkConnectPlanExecution
           .createObservedMetricsResponse(
             sessionId,
             sessionHolder.serverSessionId,
-            planId,
+            observationAndPlanIds,
             observedMetrics))
     } else None
   }
@@ -280,17 +282,17 @@ object SparkConnectPlanExecution {
   def createObservedMetricsResponse(
       sessionId: String,
       serverSessionId: String,
-      planId: Long,
+      observationAndPlanIds: Map[String, Long],
       metrics: Map[String, Seq[(Option[String], Any)]]): ExecutePlanResponse = {
     val observedMetrics = metrics.map { case (name, values) =>
       val metrics = ExecutePlanResponse.ObservedMetrics
         .newBuilder()
         .setName(name)
-        .setPlanId(planId)
       values.foreach { case (key, value) =>
         metrics.addValues(toLiteralProto(value))
         key.foreach(metrics.addKeys)
       }
+      observationAndPlanIds.get(name).foreach(metrics.setPlanId)
       metrics.build()
     }
     // Prepare a response with the observed metrics.
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLCache.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLCache.scala
new file mode 100644
index 0000000000000..a036f8b67350d
--- /dev/null
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLCache.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.ml
+
+import java.util.UUID
+import java.util.concurrent.ConcurrentHashMap
+
+import org.apache.spark.internal.Logging
+
+/**
+ * MLCache is for caching ML objects, typically for models and summaries evaluated by a model.
+ */
+private[connect] class MLCache extends Logging {
+  private val cachedModel: ConcurrentHashMap[String, Object] =
+    new ConcurrentHashMap[String, Object]()
+
+  /**
+   * Cache an object into a map of MLCache, and return its key
+   * @param obj
+   *   the object to be cached
+   * @return
+   *   the key
+   */
+  def register(obj: Object): String = {
+    val objectId = UUID.randomUUID().toString
+    cachedModel.put(objectId, obj)
+    objectId
+  }
+
+  /**
+   * Get the object by the key
+   * @param refId
+   *   the key used to look up the corresponding object
+   * @return
+   *   the cached object
+   */
+  def get(refId: String): Object = {
+    cachedModel.get(refId)
+  }
+
+  /**
+   * Remove the object from MLCache
+   * @param refId
+   *   the key used to look up the corresponding object
+   */
+  def remove(refId: String): Unit = {
+    cachedModel.remove(refId)
+  }
+
+  /**
+   * Clear all the caches
+   */
+  def clear(): Unit = {
+    cachedModel.clear()
+  }
+}
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLException.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLException.scala
new file mode 100644
index 0000000000000..eb88bf9169d3d
--- /dev/null
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLException.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.ml
+
+import org.apache.spark.SparkException
+
+private[spark] case class MlUnsupportedException(message: String)
+    extends SparkException(
+      errorClass = "CONNECT_ML.UNSUPPORTED_EXCEPTION",
+      messageParameters = Map("message" -> message),
+      cause = null)
+
+private[spark] case class MLAttributeNotAllowedException(attribute: String)
+    extends SparkException(
+      errorClass = "CONNECT_ML.ATTRIBUTE_NOT_ALLOWED",
+      messageParameters = Map("attribute" -> attribute),
+      cause = null)
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLHandler.scala
new file mode 100644
index 0000000000000..b4bc6bfdc66b4
--- /dev/null
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLHandler.scala
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.ml
+
+import scala.jdk.CollectionConverters.CollectionHasAsScala
+
+import org.apache.spark.connect.proto
+import org.apache.spark.internal.Logging
+import org.apache.spark.ml.{Estimator, Model}
+import org.apache.spark.ml.param.ParamMap
+import org.apache.spark.ml.util.{MLWritable, Summary}
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.connect.common.LiteralValueProtoConverter
+import org.apache.spark.sql.connect.ml.Serializer.deserializeMethodArguments
+import org.apache.spark.sql.connect.service.SessionHolder
+
+private case class Method(
+    name: String,
+    argValues: Array[Object] = Array.empty,
+    argClasses: Array[Class[_]] = Array.empty)
+
+/**
+ * Helper function to get the attribute from an object by reflection
+ */
+private class AttributeHelper(
+    val sessionHolder: SessionHolder,
+    val objRef: String,
+    val methods: Array[Method]) {
+  protected lazy val instance = sessionHolder.mlCache.get(objRef)
+  // Get the attribute by reflection
+  def getAttribute: Any = {
+    assert(methods.length >= 1)
+    methods.foldLeft(instance) { (obj, m) =>
+      if (m.argValues.isEmpty) {
+        MLUtils.invokeMethodAllowed(obj, m.name)
+      } else {
+        MLUtils.invokeMethodAllowed(obj, m.name, m.argValues, m.argClasses)
+      }
+    }
+  }
+}
+
+// Model specific attribute helper with transform supported
+private class ModelAttributeHelper(
+    sessionHolder: SessionHolder,
+    objRef: String,
+    methods: Array[Method])
+    extends AttributeHelper(sessionHolder, objRef, methods) {
+
+  def transform(relation: proto.MlRelation.Transform): DataFrame = {
+    // Create a copied model to avoid concurrently modify model params.
+    val model = instance.asInstanceOf[Model[_]]
+    val copiedModel = model.copy(ParamMap.empty).asInstanceOf[Model[_]]
+    MLUtils.setInstanceParams(copiedModel, relation.getParams)
+    val inputDF = MLUtils.parseRelationProto(relation.getInput, sessionHolder)
+    copiedModel.transform(inputDF)
+  }
+}
+
+private object AttributeHelper {
+  def parseMethods(
+      sessionHolder: SessionHolder,
+      methodsProto: Array[proto.Fetch.Method] = Array.empty): Array[Method] = {
+    methodsProto.map { m =>
+      val (argValues, argClasses) =
+        deserializeMethodArguments(m.getArgsList.asScala.toArray, sessionHolder).unzip
+      Method(m.getMethod, argValues, argClasses)
+    }
+  }
+  def apply(
+      sessionHolder: SessionHolder,
+      objId: String,
+      methodsProto: Array[proto.Fetch.Method] = Array.empty): AttributeHelper = {
+    new AttributeHelper(sessionHolder, objId, parseMethods(sessionHolder, methodsProto))
+  }
+}
+
+private object ModelAttributeHelper {
+  def apply(
+      sessionHolder: SessionHolder,
+      objId: String,
+      methodsProto: Array[proto.Fetch.Method] = Array.empty): ModelAttributeHelper = {
+    new ModelAttributeHelper(
+      sessionHolder,
+      objId,
+      AttributeHelper.parseMethods(sessionHolder, methodsProto))
+  }
+}
+
+// MLHandler is a utility to group all ML operations
+private[connect] object MLHandler extends Logging {
+  def handleMlCommand(
+      sessionHolder: SessionHolder,
+      mlCommand: proto.MlCommand): proto.MlCommandResult = {
+
+    val mlCache = sessionHolder.mlCache
+
+    mlCommand.getCommandCase match {
+      case proto.MlCommand.CommandCase.FIT =>
+        val fitCmd = mlCommand.getFit
+        val estimatorProto = fitCmd.getEstimator
+        assert(estimatorProto.getType == proto.MlOperator.OperatorType.ESTIMATOR)
+
+        val dataset = MLUtils.parseRelationProto(fitCmd.getDataset, sessionHolder)
+        val estimator = MLUtils.getEstimator(estimatorProto, Some(fitCmd.getParams))
+        val model = estimator.fit(dataset).asInstanceOf[Model[_]]
+        val id = mlCache.register(model)
+        proto.MlCommandResult
+          .newBuilder()
+          .setOperatorInfo(
+            proto.MlCommandResult.MlOperatorInfo
+              .newBuilder()
+              .setObjRef(proto.ObjectRef.newBuilder().setId(id)))
+          .build()
+
+      case proto.MlCommand.CommandCase.FETCH =>
+        val helper = AttributeHelper(
+          sessionHolder,
+          mlCommand.getFetch.getObjRef.getId,
+          mlCommand.getFetch.getMethodsList.asScala.toArray)
+        val attrResult = helper.getAttribute
+        attrResult match {
+          case s: Summary =>
+            val id = mlCache.register(s)
+            proto.MlCommandResult.newBuilder().setSummary(id).build()
+          case _ =>
+            val param = Serializer.serializeParam(attrResult)
+            proto.MlCommandResult.newBuilder().setParam(param).build()
+        }
+
+      case proto.MlCommand.CommandCase.DELETE =>
+        val objId = mlCommand.getDelete.getObjRef.getId
+        var result = false
+        if (!objId.contains(".")) {
+          mlCache.remove(objId)
+          result = true
+        }
+        proto.MlCommandResult
+          .newBuilder()
+          .setParam(
+            proto.Param
+              .newBuilder()
+              .setLiteral(LiteralValueProtoConverter.toLiteralProto(result))
+              .build())
+          .build()
+
+      case proto.MlCommand.CommandCase.WRITE =>
+        mlCommand.getWrite.getTypeCase match {
+          case proto.MlCommand.Write.TypeCase.OBJ_REF => // save a model
+            val objId = mlCommand.getWrite.getObjRef.getId
+            val model = mlCache.get(objId).asInstanceOf[Model[_]]
+            val copiedModel = model.copy(ParamMap.empty).asInstanceOf[Model[_]]
+            MLUtils.setInstanceParams(copiedModel, mlCommand.getWrite.getParams)
+
+            copiedModel match {
+              case m: MLWritable => MLUtils.write(m, mlCommand.getWrite)
+              case other => throw MlUnsupportedException(s"$other is not writable")
+            }
+
+          // save an estimator/evaluator/transformer
+          case proto.MlCommand.Write.TypeCase.OPERATOR =>
+            val writer = mlCommand.getWrite
+            if (writer.getOperator.getType == proto.MlOperator.OperatorType.ESTIMATOR) {
+              val estimator = MLUtils.getEstimator(writer.getOperator, Some(writer.getParams))
+              estimator match {
+                case m: MLWritable => MLUtils.write(m, mlCommand.getWrite)
+                case other => throw MlUnsupportedException(s"Estimator $other is not writable")
+              }
+            } else {
+              throw MlUnsupportedException(s"${writer.getOperator.getName} not supported")
+            }
+
+          case other => throw MlUnsupportedException(s"$other not supported")
+        }
+        proto.MlCommandResult.newBuilder().build()
+
+      case proto.MlCommand.CommandCase.READ =>
+        val operator = mlCommand.getRead.getOperator
+        val name = operator.getName
+        val path = mlCommand.getRead.getPath
+
+        if (operator.getType == proto.MlOperator.OperatorType.MODEL) {
+          val model = MLUtils.load(name, path).asInstanceOf[Model[_]]
+          val id = mlCache.register(model)
+          proto.MlCommandResult
+            .newBuilder()
+            .setOperatorInfo(
+              proto.MlCommandResult.MlOperatorInfo
+                .newBuilder()
+                .setObjRef(proto.ObjectRef.newBuilder().setId(id))
+                .setUid(model.uid)
+                .setParams(Serializer.serializeParams(model)))
+            .build()
+
+        } else if (operator.getType == proto.MlOperator.OperatorType.ESTIMATOR) {
+          val estimator = MLUtils.load(name, path).asInstanceOf[Estimator[_]]
+          proto.MlCommandResult
+            .newBuilder()
+            .setOperatorInfo(
+              proto.MlCommandResult.MlOperatorInfo
+                .newBuilder()
+                .setName(name)
+                .setUid(estimator.uid)
+                .setParams(Serializer.serializeParams(estimator)))
+            .build()
+        } else {
+          throw MlUnsupportedException(s"${operator.getType} not supported")
+        }
+
+      case other => throw MlUnsupportedException(s"$other not supported")
+    }
+  }
+
+  def transformMLRelation(relation: proto.MlRelation, sessionHolder: SessionHolder): DataFrame = {
+    relation.getMlTypeCase match {
+      // Ml transform
+      case proto.MlRelation.MlTypeCase.TRANSFORM =>
+        relation.getTransform.getOperatorCase match {
+          // transform for a new ML transformer
+          case proto.MlRelation.Transform.OperatorCase.TRANSFORMER =>
+            val transformProto = relation.getTransform
+            assert(
+              transformProto.getTransformer.getType ==
+                proto.MlOperator.OperatorType.TRANSFORMER)
+            val dataset = MLUtils.parseRelationProto(transformProto.getInput, sessionHolder)
+            val transformer = MLUtils.getTransformer(transformProto)
+            transformer.transform(dataset)
+
+          // transform on a cached model
+          case proto.MlRelation.Transform.OperatorCase.OBJ_REF =>
+            val helper =
+              ModelAttributeHelper(
+                sessionHolder,
+                relation.getTransform.getObjRef.getId,
+                Array.empty)
+            helper.transform(relation.getTransform)
+
+          case other => throw new IllegalArgumentException(s"$other not supported")
+        }
+
+      // Get the attribute from a cached object which could be a model or summary
+      case proto.MlRelation.MlTypeCase.FETCH =>
+        val helper = AttributeHelper(
+          sessionHolder,
+          relation.getFetch.getObjRef.getId,
+          relation.getFetch.getMethodsList.asScala.toArray)
+        helper.getAttribute.asInstanceOf[DataFrame]
+
+      case other => throw MlUnsupportedException(s"$other not supported")
+    }
+  }
+
+}
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLUtils.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLUtils.scala
new file mode 100644
index 0000000000000..72c86401eb631
--- /dev/null
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/MLUtils.scala
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.ml
+
+import java.util.ServiceLoader
+
+import scala.collection.immutable.HashSet
+import scala.jdk.CollectionConverters._
+
+import org.apache.commons.lang3.reflect.MethodUtils.invokeMethod
+
+import org.apache.spark.connect.proto
+import org.apache.spark.ml.{Estimator, Transformer}
+import org.apache.spark.ml.linalg.{Matrices, Matrix, Vector, Vectors}
+import org.apache.spark.ml.param.Params
+import org.apache.spark.ml.util.MLWritable
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.connect.common.LiteralValueProtoConverter
+import org.apache.spark.sql.connect.planner.SparkConnectPlanner
+import org.apache.spark.sql.connect.service.SessionHolder
+import org.apache.spark.util.{SparkClassUtils, Utils}
+
+private[ml] object MLUtils {
+
+  /**
+   * Load the registered ML operators via ServiceLoader
+   *
+   * @param mlCls
+   *   the operator class
+   * @return
+   *   a Map with name and class
+   */
+  private def loadOperators(mlCls: Class[_]): Map[String, Class[_]] = {
+    val loader = Utils.getContextOrSparkClassLoader
+    val serviceLoader = ServiceLoader.load(mlCls, loader)
+    val providers = serviceLoader.asScala.toList
+    providers.map(est => est.getClass.getName -> est.getClass).toMap
+  }
+
+  private lazy val estimators = loadOperators(classOf[Estimator[_]])
+
+  private lazy val transformers = loadOperators(classOf[Transformer])
+
+  def deserializeVector(vector: proto.Vector): Vector = {
+    if (vector.hasDense) {
+      val values = vector.getDense.getValueList.asScala.map(_.toDouble).toArray
+      Vectors.dense(values)
+    } else {
+      val size = vector.getSparse.getSize
+      val indices = vector.getSparse.getIndexList.asScala.map(_.toInt).toArray
+      val values = vector.getSparse.getValueList.asScala.map(_.toDouble).toArray
+      Vectors.sparse(size, indices, values)
+    }
+  }
+
+  def deserializeMatrix(matrix: proto.Matrix): Matrix = {
+    if (matrix.hasDense) {
+      val values = matrix.getDense.getValueList.asScala.map(_.toDouble).toArray
+      Matrices.dense(matrix.getDense.getNumRows, matrix.getDense.getNumCols, values)
+    } else {
+      val sparse = matrix.getSparse
+      val colPtrs = sparse.getColptrList.asScala.map(_.toInt).toArray
+      val rowIndices = sparse.getRowIndexList.asScala.map(_.toInt).toArray
+      val values = sparse.getValueList.asScala.map(_.toDouble).toArray
+      Matrices.sparse(sparse.getNumRows, sparse.getNumCols, colPtrs, rowIndices, values)
+    }
+  }
+
+  /**
+   * Set the parameters to the ML instance
+   *
+   * @param instance
+   *   an ML operator
+   * @param params
+   *   the parameters of the ML operator
+   */
+  def setInstanceParams(instance: Params, params: proto.MlParams): Unit = {
+    params.getParamsMap.asScala.foreach { case (name, paramProto) =>
+      val p = instance.getParam(name)
+      val value = if (paramProto.hasLiteral) {
+        reconcileParam(
+          p.paramValueClassTag.runtimeClass,
+          LiteralValueProtoConverter.toCatalystValue(paramProto.getLiteral))
+      } else if (paramProto.hasVector) {
+        deserializeVector(paramProto.getVector)
+      } else if (paramProto.hasMatrix) {
+        deserializeMatrix(paramProto.getMatrix)
+      } else {
+        throw MlUnsupportedException(s"Unsupported parameter type for ${name}")
+      }
+      instance.set(p, value)
+    }
+  }
+
+  /**
+   * Convert the array from Object[] to Array[_]
+   * @param elementType
+   *   the element type of the array
+   * @param array
+   *   to be reconciled
+   * @return
+   *   the reconciled array
+   */
+  private def reconcileArray(elementType: Class[_], array: Array[_]): Array[_] = {
+    if (elementType == classOf[Byte]) {
+      array.map(_.asInstanceOf[Byte])
+    } else if (elementType == classOf[Short]) {
+      array.map(_.asInstanceOf[Short])
+    } else if (elementType == classOf[Int]) {
+      array.map(_.asInstanceOf[Int])
+    } else if (elementType == classOf[Long]) {
+      array.map(_.asInstanceOf[Long])
+    } else if (elementType == classOf[Float]) {
+      array.map(_.asInstanceOf[Float])
+    } else if (elementType == classOf[Double]) {
+      array.map(_.asInstanceOf[Double])
+    } else if (elementType == classOf[String]) {
+      array.map(_.asInstanceOf[String])
+    } else {
+      throw MlUnsupportedException(
+        s"array element type unsupported, " +
+          s"found ${elementType.getName}")
+    }
+  }
+
+  /**
+   * Reconcile the parameter value given the provided parameter type. Currently, support
+   * byte/short/int/long/float/double/string and array. Note that, array of array is not supported
+   * yet.
+   */
+  private def reconcileParam(paramType: Class[_], value: Any): Any = {
+    // Some cases the param type might be mismatched with the value type.
+    // Because in python side we only have int / float type for numeric params.
+    // e.g.:
+    // param type is Int but client sends a Long type.
+    // param type is Long but client sends a Int type.
+    // param type is Float but client sends a Double type.
+    // param type is Array[Int] but client sends a Array[Long] type.
+    // param type is Array[Float] but client sends a Array[Double] type.
+    // param type is Array[Array[Int]] but client sends a Array[Array[Long]] type.
+    // param type is Array[Array[Float]] but client sends a Array[Array[Double]] type.
+    if (paramType == classOf[Byte]) {
+      value.asInstanceOf[java.lang.Number].byteValue()
+    } else if (paramType == classOf[Short]) {
+      value.asInstanceOf[java.lang.Number].shortValue()
+    } else if (paramType == classOf[Int]) {
+      value.asInstanceOf[java.lang.Number].intValue()
+    } else if (paramType == classOf[Long]) {
+      value.asInstanceOf[java.lang.Number].longValue()
+    } else if (paramType == classOf[Float]) {
+      value.asInstanceOf[java.lang.Number].floatValue()
+    } else if (paramType == classOf[Double]) {
+      value.asInstanceOf[java.lang.Number].doubleValue()
+    } else if (paramType == classOf[Boolean]) {
+      value.asInstanceOf[Boolean]
+    } else if (paramType == classOf[String]) {
+      value.asInstanceOf[String]
+    } else if (paramType.isArray) {
+      val compType = paramType.getComponentType
+      if (compType.isArray) {
+        throw MlUnsupportedException(s"Array of array unsupported")
+      } else {
+        val array = value.asInstanceOf[Array[_]].map { e =>
+          reconcileParam(compType, e)
+        }
+        reconcileArray(compType, array)
+      }
+    } else {
+      throw MlUnsupportedException(s"Unsupported parameter type, found ${paramType.getName}")
+    }
+  }
+
+  def parseRelationProto(relation: proto.Relation, sessionHolder: SessionHolder): DataFrame = {
+    val planner = new SparkConnectPlanner(sessionHolder)
+    val plan = planner.transformRelation(relation)
+    Dataset.ofRows(sessionHolder.session, plan)
+  }
+
+  /**
+   * Get the instance according to the provided proto information.
+   *
+   * @param name
+   *   The name of the instance (either estimator or transformer).
+   * @param uid
+   *   The unique identifier for the instance.
+   * @param instanceMap
+   *   A map of instance names to constructors.
+   * @param params
+   *   Optional parameters for the instance.
+   * @tparam T
+   *   The type of the instance (Estimator or Transformer).
+   * @return
+   *   The instance of the requested type.
+   * @throws MlUnsupportedException
+   *   If the instance is not supported.
+   */
+  private def getInstance[T](
+      name: String,
+      uid: String,
+      instanceMap: Map[String, Class[_]],
+      params: Option[proto.MlParams]): T = {
+    if (instanceMap.isEmpty || !instanceMap.contains(name)) {
+      throw MlUnsupportedException(s"Unsupported ML operator, found $name")
+    }
+
+    val instance = instanceMap(name)
+      .getConstructor(classOf[String])
+      .newInstance(uid)
+      .asInstanceOf[T]
+
+    // Set parameters for the instance if they are provided
+    params.foreach(p => MLUtils.setInstanceParams(instance.asInstanceOf[Params], p))
+    instance
+  }
+
+  /**
+   * Get the Estimator instance according to the proto information
+   *
+   * @param operator
+   *   MlOperator information
+   * @param params
+   *   The optional parameters of the estimator
+   * @return
+   *   the estimator
+   */
+  def getEstimator(operator: proto.MlOperator, params: Option[proto.MlParams]): Estimator[_] = {
+    val name = operator.getName
+    val uid = operator.getUid
+    getInstance[Estimator[_]](name, uid, estimators, params)
+  }
+
+  /**
+   * Get the transformer instance according to the transform proto
+   *
+   * @param transformProto
+   *   transform proto
+   * @return
+   *   a transformer
+   */
+  def getTransformer(transformProto: proto.MlRelation.Transform): Transformer = {
+    val name = transformProto.getTransformer.getName
+    val uid = transformProto.getTransformer.getUid
+    val params = transformProto.getParams
+    getInstance[Transformer](name, uid, transformers, Some(params))
+  }
+
+  /**
+   * Call "load: function on the ML operator given the operator name
+   *
+   * @param className
+   *   the ML operator name
+   * @param path
+   *   the path to be loaded
+   * @return
+   *   the ML instance
+   */
+  def load(className: String, path: String): Object = {
+    val loadedMethod = SparkClassUtils.classForName(className).getMethod("load", classOf[String])
+    loadedMethod.invoke(null, path)
+  }
+
+  // Since we're using reflection way to get the attribute, in order not to
+  // leave a security hole, we define an allowed attribute list that can be accessed.
+  // The attributes could be retrieved from the corresponding python class
+  private lazy val ALLOWED_ATTRIBUTES = HashSet(
+    "toString",
+    "numFeatures",
+    "predict", // PredictionModel
+    "numClasses",
+    "predictRaw", // ClassificationModel
+    "predictProbability", // ProbabilisticClassificationModel
+    "coefficients",
+    "intercept",
+    "coefficientMatrix",
+    "interceptVector", // LogisticRegressionModel
+    "summary",
+    "hasSummary",
+    "evaluate", // LogisticRegressionModel
+    "predictions",
+    "predictionCol",
+    "labelCol",
+    "weightCol",
+    "labels", // _ClassificationSummary
+    "truePositiveRateByLabel",
+    "falsePositiveRateByLabel", // _ClassificationSummary
+    "precisionByLabel",
+    "recallByLabel",
+    "fMeasureByLabel",
+    "accuracy", // _ClassificationSummary
+    "weightedTruePositiveRate",
+    "weightedFalsePositiveRate", // _ClassificationSummary
+    "weightedRecall",
+    "weightedPrecision",
+    "weightedFMeasure", // _ClassificationSummary
+    "scoreCol",
+    "roc",
+    "areaUnderROC",
+    "pr",
+    "fMeasureByThreshold", // _BinaryClassificationSummary
+    "precisionByThreshold",
+    "recallByThreshold", // _BinaryClassificationSummary
+    "probabilityCol",
+    "featuresCol", // LogisticRegressionSummary
+    "objectiveHistory",
+    "totalIterations" // _TrainingSummary
+  )
+
+  def invokeMethodAllowed(obj: Object, methodName: String): Object = {
+    if (!ALLOWED_ATTRIBUTES.contains(methodName)) {
+      throw MLAttributeNotAllowedException(methodName)
+    }
+    invokeMethod(obj, methodName)
+  }
+
+  def invokeMethodAllowed(
+      obj: Object,
+      methodName: String,
+      args: Array[Object],
+      parameterTypes: Array[Class[_]]): Object = {
+    if (!ALLOWED_ATTRIBUTES.contains(methodName)) {
+      throw MLAttributeNotAllowedException(methodName)
+    }
+    invokeMethod(obj, methodName, args, parameterTypes)
+  }
+
+  def write(instance: MLWritable, writeProto: proto.MlCommand.Write): Unit = {
+    val writer = if (writeProto.getShouldOverwrite) {
+      instance.write.overwrite()
+    } else {
+      instance.write
+    }
+    val path = writeProto.getPath
+    val options = writeProto.getOptionsMap
+    options.forEach((k, v) => writer.option(k, v))
+    writer.save(path)
+  }
+
+}
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/Serializer.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/Serializer.scala
new file mode 100644
index 0000000000000..ad6735997f834
--- /dev/null
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ml/Serializer.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.ml
+
+import org.apache.spark.connect.proto
+import org.apache.spark.ml.linalg.{DenseMatrix, DenseVector, SparseMatrix, SparseVector}
+import org.apache.spark.ml.param.Params
+import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.connect.common.LiteralValueProtoConverter
+import org.apache.spark.sql.connect.service.SessionHolder
+
+private[ml] object Serializer {
+
+  /**
+   * Serialize the ML parameters, currently support Vector/Matrix and literals
+   * @param data
+   *   the value of parameter
+   * @return
+   *   proto.Param
+   */
+  def serializeParam(data: Any): proto.Param = {
+    data match {
+      case v: DenseVector =>
+        val denseBuilder = proto.Vector.Dense.newBuilder()
+        v.values.foreach(denseBuilder.addValue)
+        proto.Param
+          .newBuilder()
+          .setVector(proto.Vector.newBuilder().setDense(denseBuilder))
+          .build()
+      case v: SparseVector =>
+        val sparseBuilder = proto.Vector.Sparse.newBuilder().setSize(v.size)
+        v.indices.foreach(sparseBuilder.addIndex)
+        v.values.foreach(sparseBuilder.addValue)
+        proto.Param
+          .newBuilder()
+          .setVector(proto.Vector.newBuilder().setSparse(sparseBuilder))
+          .build()
+      case v: DenseMatrix =>
+        val denseBuilder = proto.Matrix.Dense.newBuilder()
+        v.values.foreach(denseBuilder.addValue)
+        denseBuilder.setNumCols(v.numCols)
+        denseBuilder.setNumRows(v.numRows)
+        denseBuilder.setIsTransposed(v.isTransposed)
+        proto.Param
+          .newBuilder()
+          .setMatrix(proto.Matrix.newBuilder().setDense(denseBuilder))
+          .build()
+      case v: SparseMatrix =>
+        val sparseBuilder = proto.Matrix.Sparse
+          .newBuilder()
+          .setNumCols(v.numCols)
+          .setNumRows(v.numRows)
+        v.values.foreach(sparseBuilder.addValue)
+        v.colPtrs.foreach(sparseBuilder.addColptr)
+        v.rowIndices.foreach(sparseBuilder.addRowIndex)
+        proto.Param
+          .newBuilder()
+          .setMatrix(proto.Matrix.newBuilder().setSparse(sparseBuilder))
+          .build()
+      case _: Byte | _: Short | _: Int | _: Long | _: Float | _: Double | _: Boolean | _: String |
+          _: Array[_] =>
+        proto.Param
+          .newBuilder()
+          .setLiteral(LiteralValueProtoConverter.toLiteralProto(data))
+          .build()
+
+      case other => throw MlUnsupportedException(s"$other not supported")
+    }
+  }
+
+  def deserializeMethodArguments(
+      args: Array[proto.Fetch.Method.Args],
+      sessionHolder: SessionHolder): Array[(Object, Class[_])] = {
+    args.map { arg =>
+      if (arg.hasParam) {
+        val param = arg.getParam
+        if (param.hasLiteral) {
+          param.getLiteral.getLiteralTypeCase match {
+            case proto.Expression.Literal.LiteralTypeCase.INTEGER =>
+              (param.getLiteral.getInteger.asInstanceOf[Object], classOf[Int])
+            case proto.Expression.Literal.LiteralTypeCase.FLOAT =>
+              (param.getLiteral.getFloat.toDouble.asInstanceOf[Object], classOf[Double])
+            case proto.Expression.Literal.LiteralTypeCase.STRING =>
+              (param.getLiteral.getString, classOf[String])
+            case proto.Expression.Literal.LiteralTypeCase.DOUBLE =>
+              (param.getLiteral.getDouble.asInstanceOf[Object], classOf[Double])
+            case proto.Expression.Literal.LiteralTypeCase.BOOLEAN =>
+              (param.getLiteral.getBoolean.asInstanceOf[Object], classOf[Boolean])
+            case other =>
+              throw MlUnsupportedException(s"$other not supported")
+          }
+        } else if (param.hasVector) {
+          val vector = MLUtils.deserializeVector(param.getVector)
+          val vectorType = if (param.getVector.hasDense) {
+            classOf[DenseVector]
+          } else {
+            classOf[SparseVector]
+          }
+          (vector, vectorType)
+        } else if (param.hasMatrix) {
+          val matrix = MLUtils.deserializeMatrix(param.getMatrix)
+          val matrixType = if (param.getMatrix.hasDense) {
+            classOf[DenseMatrix]
+          } else {
+            classOf[SparseMatrix]
+          }
+          (matrix, matrixType)
+        } else {
+          throw MlUnsupportedException(s"$param not supported")
+        }
+      } else if (arg.hasInput) {
+        (MLUtils.parseRelationProto(arg.getInput, sessionHolder), classOf[Dataset[_]])
+      } else {
+        throw MlUnsupportedException(s"$arg not supported")
+      }
+    }
+  }
+
+  /**
+   * Serialize an instance of "Params" which could be estimator/model/evaluator ...
+   * @param instance
+   *   of Params
+   * @return
+   *   proto.MlParams
+   */
+  def serializeParams(instance: Params): proto.MlParams = {
+    val builder = proto.MlParams.newBuilder()
+    instance.params.foreach { param =>
+      if (instance.isSet(param)) {
+        val v = serializeParam(instance.get(param).get)
+        builder.putParams(param.name, v)
+      }
+    }
+    builder.build()
+  }
+}
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index 979fd83612e7b..94a1ab1618086 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -43,9 +43,9 @@ import org.apache.spark.connect.proto.WriteStreamOperationStart.TriggerCase
 import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.LogKeys.{DATAFRAME_ID, SESSION_ID}
 import org.apache.spark.resource.{ExecutorResourceRequest, ResourceProfile, TaskResourceProfile, TaskResourceRequest}
-import org.apache.spark.sql.{Dataset, Encoders, ForeachWriter, Observation, RelationalGroupedDataset, Row, SparkSession}
+import org.apache.spark.sql.{Column, Dataset, Encoders, ForeachWriter, Observation, RelationalGroupedDataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{expressions, AliasIdentifier, FunctionIdentifier, QueryPlanningTracker}
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, GlobalTempView, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedRegex, UnresolvedRelation, UnresolvedStar, UnresolvedTableValuedFunction, UnresolvedTranspose}
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, GlobalTempView, LazyExpression, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedPlanId, UnresolvedRegex, UnresolvedRelation, UnresolvedStar, UnresolvedStarWithColumns, UnresolvedStarWithColumnsRenames, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction, UnresolvedTranspose}
 import org.apache.spark.sql.catalyst.encoders.{encoderFor, AgnosticEncoder, ExpressionEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.UnboundRowEncoder
 import org.apache.spark.sql.catalyst.expressions._
@@ -55,11 +55,13 @@ import org.apache.spark.sql.catalyst.plans.{Cross, FullOuter, Inner, JoinType, L
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.{AppendColumns, Assignment, CoGroup, CollectMetrics, CommandResult, Deduplicate, DeduplicateWithinWatermark, DeleteAction, DeserializeToObject, Except, FlatMapGroupsWithState, InsertAction, InsertStarAction, Intersect, JoinWith, LocalRelation, LogicalGroupState, LogicalPlan, MapGroups, MapPartitions, MergeAction, Project, Sample, SerializeFromObject, Sort, SubqueryAlias, TypedFilter, Union, Unpivot, UnresolvedHint, UpdateAction, UpdateStarAction}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
-import org.apache.spark.sql.catalyst.trees.CurrentOrigin
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreePattern}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
-import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, ForeachWriterPacket, InvalidPlanInput, LiteralValueProtoConverter, StorageLevelProtoConverter, StreamingListenerPacket, UdfPacket}
+import org.apache.spark.sql.classic.ClassicConversions._
+import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, ForeachWriterPacket, InvalidCommandInput, InvalidPlanInput, LiteralValueProtoConverter, StorageLevelProtoConverter, StreamingListenerPacket, UdfPacket}
 import org.apache.spark.sql.connect.config.Connect.CONNECT_GRPC_ARROW_MAX_BATCH_SIZE
+import org.apache.spark.sql.connect.ml.MLHandler
 import org.apache.spark.sql.connect.plugin.SparkConnectPluginRegistry
 import org.apache.spark.sql.connect.service.{ExecuteHolder, SessionHolder, SparkConnectService}
 import org.apache.spark.sql.connect.utils.MetricGenerator
@@ -76,8 +78,7 @@ import org.apache.spark.sql.execution.stat.StatFunctions
 import org.apache.spark.sql.execution.streaming.GroupStateImpl.groupStateTimeoutFromString
 import org.apache.spark.sql.execution.streaming.StreamingQueryWrapper
 import org.apache.spark.sql.expressions.{Aggregator, ReduceAggregator, SparkUserDefinedFunction, UserDefinedAggregator, UserDefinedFunction}
-import org.apache.spark.sql.internal.{CatalogImpl, MergeIntoWriterImpl, TypedAggUtils}
-import org.apache.spark.sql.internal.ExpressionUtils.column
+import org.apache.spark.sql.internal.{CatalogImpl, MergeIntoWriterImpl, TypedAggUtils, UserDefinedFunctionUtils}
 import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode, StreamingQuery, StreamingQueryListener, StreamingQueryProgress, Trigger}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -85,11 +86,6 @@ import org.apache.spark.storage.CacheId
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.Utils
 
-final case class InvalidCommandInput(
-    private val message: String = "",
-    private val cause: Throwable = null)
-    extends Exception(message, cause)
-
 class SparkConnectPlanner(
     val sessionHolder: SessionHolder,
     val executeHolderOpt: Option[ExecuteHolder] = None)
@@ -106,7 +102,7 @@ class SparkConnectPlanner(
   @Since("4.0.0")
   @DeveloperApi
   def session: SparkSession = sessionHolder.session
-  import sessionHolder.session.RichColumn
+  import sessionHolder.session.toRichColumn
 
   private[connect] def parser = session.sessionState.sqlParser
 
@@ -159,15 +155,15 @@ class SparkConnectPlanner(
         case proto.Relation.RelTypeCase.TAIL => transformTail(rel.getTail)
         case proto.Relation.RelTypeCase.JOIN => transformJoinOrJoinWith(rel.getJoin)
         case proto.Relation.RelTypeCase.AS_OF_JOIN => transformAsOfJoin(rel.getAsOfJoin)
+        case proto.Relation.RelTypeCase.LATERAL_JOIN => transformLateralJoin(rel.getLateralJoin)
         case proto.Relation.RelTypeCase.DEDUPLICATE => transformDeduplicate(rel.getDeduplicate)
         case proto.Relation.RelTypeCase.SET_OP => transformSetOperation(rel.getSetOp)
         case proto.Relation.RelTypeCase.SORT => transformSort(rel.getSort)
         case proto.Relation.RelTypeCase.DROP => transformDrop(rel.getDrop)
         case proto.Relation.RelTypeCase.AGGREGATE => transformAggregate(rel.getAggregate)
         case proto.Relation.RelTypeCase.SQL => transformSql(rel.getSql)
-        case proto.Relation.RelTypeCase.WITH_RELATIONS
-            if isValidSQLWithRefs(rel.getWithRelations) =>
-          transformSqlWithRefs(rel.getWithRelations)
+        case proto.Relation.RelTypeCase.WITH_RELATIONS =>
+          transformWithRelations(rel.getWithRelations)
         case proto.Relation.RelTypeCase.LOCAL_RELATION =>
           transformLocalRelation(rel.getLocalRelation)
         case proto.Relation.RelTypeCase.SAMPLE => transformSample(rel.getSample)
@@ -228,6 +224,10 @@ class SparkConnectPlanner(
         // Catalog API (internal-only)
         case proto.Relation.RelTypeCase.CATALOG => transformCatalog(rel.getCatalog)
 
+        // ML Relation
+        case proto.Relation.RelTypeCase.ML_RELATION =>
+          MLHandler.transformMLRelation(rel.getMlRelation, sessionHolder).logicalPlan
+
         // Handle plugins for Spark Connect Relation types.
         case proto.Relation.RelTypeCase.EXTENSION =>
           transformRelationPlugin(rel.getExtension)
@@ -554,7 +554,7 @@ class SparkConnectPlanner(
       .ofRows(session, transformRelation(rel.getInput))
       .stat
       .sampleBy(
-        col = column(transformExpression(rel.getCol)),
+        col = Column(transformExpression(rel.getCol)),
         fractions = fractions.toMap,
         seed = if (rel.hasSeed) rel.getSeed else Utils.random.nextLong)
       .logicalPlan
@@ -562,7 +562,7 @@ class SparkConnectPlanner(
 
   private def transformToSchema(rel: proto.ToSchema): LogicalPlan = {
     val schema = transformDataType(rel.getSchema)
-    assert(schema.isInstanceOf[StructType])
+    assertPlan(schema.isInstanceOf[StructType])
 
     Dataset
       .ofRows(session, transformRelation(rel.getInput))
@@ -571,10 +571,9 @@ class SparkConnectPlanner(
   }
 
   private def transformToDF(rel: proto.ToDF): LogicalPlan = {
-    Dataset
-      .ofRows(session, transformRelation(rel.getInput))
-      .toDF(rel.getColumnNamesList.asScala.toSeq: _*)
-      .logicalPlan
+    UnresolvedSubqueryColumnAliases(
+      rel.getColumnNamesList.asScala.toSeq,
+      transformRelation(rel.getInput))
   }
 
   private def transformMapPartitions(rel: proto.MapPartitions): LogicalPlan = {
@@ -646,17 +645,17 @@ class SparkConnectPlanner(
         val pythonUdf = transformPythonUDF(commonUdf)
         val cols =
           rel.getGroupingExpressionsList.asScala.toSeq.map(expr =>
-            column(transformExpression(expr)))
+            Column(transformExpression(expr)))
         val group = Dataset
           .ofRows(session, transformRelation(rel.getInput))
           .groupBy(cols: _*)
 
         pythonUdf.evalType match {
           case PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF =>
-            group.flatMapGroupsInPandas(column(pythonUdf)).logicalPlan
+            group.flatMapGroupsInPandas(Column(pythonUdf)).logicalPlan
 
           case PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF =>
-            group.flatMapGroupsInArrow(column(pythonUdf)).logicalPlan
+            group.flatMapGroupsInArrow(Column(pythonUdf)).logicalPlan
 
           case _ =>
             throw InvalidPlanInput(
@@ -672,7 +671,8 @@ class SparkConnectPlanner(
   private def transformTypedGroupMap(
       rel: proto.GroupMap,
       commonUdf: proto.CommonInlineUserDefinedFunction): LogicalPlan = {
-    val udf = TypedScalaUdf(commonUdf)
+    val unpackedUdf = unpackUdf(commonUdf)
+    val udf = TypedScalaUdf(unpackedUdf, None)
     val ds = UntypedKeyValueGroupedDataset(
       rel.getInput,
       rel.getGroupingExpressionsList,
@@ -702,6 +702,18 @@ class SparkConnectPlanner(
         InternalOutputModes(rel.getOutputMode)
       }
 
+      val stateSchema = DataTypeProtoConverter.toCatalystType(rel.getStateSchema) match {
+        case s: StructType => s
+        case other =>
+          throw InvalidPlanInput(
+            s"Invalid state schema dataType $other for flatMapGroupsWithState")
+      }
+      val stateEncoder = TypedScalaUdf.encoderFor(
+        // the state agnostic encoder is the second element in the input encoders.
+        unpackedUdf.inputEncoders.tail.head,
+        "state",
+        Some(DataTypeUtils.toAttributes(stateSchema)))
+
       val flatMapGroupsWithState = if (hasInitialState) {
         new FlatMapGroupsWithState(
           udf.function
@@ -711,7 +723,7 @@ class SparkConnectPlanner(
           ds.groupingAttributes,
           ds.dataAttributes,
           udf.outputObjAttr,
-          initialDs.vEncoder.asInstanceOf[ExpressionEncoder[Any]],
+          stateEncoder.asInstanceOf[ExpressionEncoder[Any]],
           outputMode,
           rel.getIsMapGroupsWithState,
           timeoutConf,
@@ -730,7 +742,7 @@ class SparkConnectPlanner(
           ds.groupingAttributes,
           ds.dataAttributes,
           udf.outputObjAttr,
-          initialDs.vEncoder.asInstanceOf[ExpressionEncoder[Any]],
+          stateEncoder.asInstanceOf[ExpressionEncoder[Any]],
           outputMode,
           rel.getIsMapGroupsWithState,
           timeoutConf,
@@ -765,10 +777,10 @@ class SparkConnectPlanner(
       case proto.CommonInlineUserDefinedFunction.FunctionCase.PYTHON_UDF =>
         val inputCols =
           rel.getInputGroupingExpressionsList.asScala.toSeq.map(expr =>
-            column(transformExpression(expr)))
+            Column(transformExpression(expr)))
         val otherCols =
           rel.getOtherGroupingExpressionsList.asScala.toSeq.map(expr =>
-            column(transformExpression(expr)))
+            Column(transformExpression(expr)))
 
         val input = Dataset
           .ofRows(session, transformRelation(rel.getInput))
@@ -783,10 +795,10 @@ class SparkConnectPlanner(
 
         pythonUdf.evalType match {
           case PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF =>
-            input.flatMapCoGroupsInPandas(other, pythonUdf).logicalPlan
+            input.flatMapCoGroupsInPandas(other, Column(pythonUdf)).logicalPlan
 
           case PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF =>
-            input.flatMapCoGroupsInArrow(other, pythonUdf).logicalPlan
+            input.flatMapCoGroupsInArrow(other, Column(pythonUdf)).logicalPlan
 
           case _ =>
             throw InvalidPlanInput(
@@ -838,9 +850,10 @@ class SparkConnectPlanner(
       kEncoder: ExpressionEncoder[_],
       vEncoder: ExpressionEncoder[_],
       analyzed: LogicalPlan,
-      dataAttributes: Seq[Attribute],
+      analyzedData: LogicalPlan,
       groupingAttributes: Seq[Attribute],
       sortOrder: Seq[SortOrder]) {
+    val dataAttributes: Seq[Attribute] = analyzedData.output
     val valueDeserializer: Expression =
       UnresolvedDeserializer(vEncoder.deserializer, dataAttributes)
   }
@@ -880,18 +893,20 @@ class SparkConnectPlanner(
         logicalPlan: LogicalPlan,
         groupingExprs: java.util.List[proto.Expression],
         sortOrder: Seq[SortOrder]): UntypedKeyValueGroupedDataset = {
-      assert(groupingExprs.size() >= 1)
+      val analyzed = session.sessionState.executePlan(logicalPlan).analyzed
+
+      assertPlan(groupingExprs.size() >= 1)
       val dummyFunc = TypedScalaUdf(groupingExprs.get(0), None)
       val groupExprs = groupingExprs.asScala.toSeq.drop(1).map(expr => transformExpression(expr))
 
       val (qe, aliasedGroupings) =
-        RelationalGroupedDataset.handleGroupingExpression(logicalPlan, session, groupExprs)
+        RelationalGroupedDataset.handleGroupingExpression(analyzed, session, groupExprs)
 
       UntypedKeyValueGroupedDataset(
         dummyFunc.outEnc,
         dummyFunc.inEnc,
         qe.analyzed,
-        logicalPlan.output,
+        analyzed,
         aliasedGroupings,
         sortOrder)
     }
@@ -900,20 +915,22 @@ class SparkConnectPlanner(
         logicalPlan: LogicalPlan,
         groupingExprs: java.util.List[proto.Expression],
         sortOrder: Seq[SortOrder]): UntypedKeyValueGroupedDataset = {
-      assert(groupingExprs.size() == 1)
-      val groupFunc = TypedScalaUdf(groupingExprs.get(0), Some(logicalPlan.output))
+      val analyzed = session.sessionState.executePlan(logicalPlan).analyzed
+
+      assertPlan(groupingExprs.size() == 1)
+      val groupFunc = TypedScalaUdf(groupingExprs.get(0), Some(analyzed.output))
       val vEnc = groupFunc.inEnc
       val kEnc = groupFunc.outEnc
 
-      val withGroupingKey = AppendColumns(groupFunc.function, vEnc, kEnc, logicalPlan)
+      val withGroupingKey = AppendColumns(groupFunc.function, vEnc, kEnc, analyzed)
       // The input logical plan of KeyValueGroupedDataset need to be executed and analyzed
-      val analyzed = session.sessionState.executePlan(withGroupingKey).analyzed
+      val withGroupingKeyAnalyzed = session.sessionState.executePlan(withGroupingKey).analyzed
 
       UntypedKeyValueGroupedDataset(
         kEnc,
         vEnc,
+        withGroupingKeyAnalyzed,
         analyzed,
-        logicalPlan.output,
         withGroupingKey.newColumns,
         sortOrder)
     }
@@ -948,19 +965,23 @@ class SparkConnectPlanner(
       }
     }
 
-    def apply(
-        commonUdf: proto.CommonInlineUserDefinedFunction,
-        inputAttrs: Option[Seq[Attribute]] = None): TypedScalaUdf = {
-      val udf = unpackUdf(commonUdf)
+    def apply(udf: UdfPacket, inputAttrs: Option[Seq[Attribute]]): TypedScalaUdf = {
       // There might be more than one inputs, but we only interested in the first one.
       // Most typed API takes one UDF input.
       // For the few that takes more than one inputs, e.g. grouping function mapping UDFs,
       // the first input which is the key of the grouping function.
-      assert(udf.inputEncoders.nonEmpty)
+      assertPlan(udf.inputEncoders.nonEmpty)
       val inEnc = udf.inputEncoders.head // single input encoder or key encoder
       TypedScalaUdf(udf.function, udf.outputEncoder, inEnc, inputAttrs)
     }
 
+    def apply(
+        commonUdf: proto.CommonInlineUserDefinedFunction,
+        inputAttrs: Option[Seq[Attribute]] = None): TypedScalaUdf = {
+      val udf = unpackUdf(commonUdf)
+      apply(udf, inputAttrs)
+    }
+
     def encoderFor(
         encoder: AgnosticEncoder[_],
         errorType: String,
@@ -982,7 +1003,7 @@ class SparkConnectPlanner(
   private def transformApplyInPandasWithState(rel: proto.ApplyInPandasWithState): LogicalPlan = {
     val pythonUdf = transformPythonUDF(rel.getFunc)
     val cols =
-      rel.getGroupingExpressionsList.asScala.toSeq.map(expr => column(transformExpression(expr)))
+      rel.getGroupingExpressionsList.asScala.toSeq.map(expr => Column(transformExpression(expr)))
 
     val outputSchema = parseSchema(rel.getOutputSchema)
 
@@ -992,7 +1013,7 @@ class SparkConnectPlanner(
       .ofRows(session, transformRelation(rel.getInput))
       .groupBy(cols: _*)
       .applyInPandasWithState(
-        column(pythonUdf),
+        Column(pythonUdf),
         outputSchema,
         stateSchema,
         rel.getOutputMode,
@@ -1049,25 +1070,21 @@ class SparkConnectPlanner(
   }
 
   private def transformWithColumnsRenamed(rel: proto.WithColumnsRenamed): LogicalPlan = {
-    if (rel.getRenamesCount > 0) {
-      val (colNames, newColNames) = rel.getRenamesList.asScala.toSeq.map { rename =>
+    val (colNames, newColNames) = if (rel.getRenamesCount > 0) {
+      rel.getRenamesList.asScala.toSeq.map { rename =>
         (rename.getColName, rename.getNewColName)
       }.unzip
-      Dataset
-        .ofRows(session, transformRelation(rel.getInput))
-        .withColumnsRenamed(colNames, newColNames)
-        .logicalPlan
     } else {
       // for backward compatibility
-      Dataset
-        .ofRows(session, transformRelation(rel.getInput))
-        .withColumnsRenamed(rel.getRenameColumnsMapMap)
-        .logicalPlan
+      rel.getRenameColumnsMapMap.asScala.toSeq.unzip
     }
+    Project(
+      Seq(UnresolvedStarWithColumnsRenames(existingNames = colNames, newNames = newColNames)),
+      transformRelation(rel.getInput))
   }
 
   private def transformWithColumns(rel: proto.WithColumns): LogicalPlan = {
-    val (colNames, cols, metadata) =
+    val (colNames, exprs, metadata) =
       rel.getAliasesList.asScala.toSeq.map { alias =>
         if (alias.getNameCount != 1) {
           throw InvalidPlanInput(s"""WithColumns require column name only contains one name part,
@@ -1080,13 +1097,16 @@ class SparkConnectPlanner(
           Metadata.empty
         }
 
-        (alias.getName(0), column(transformExpression(alias.getExpr)), metadata)
+        (alias.getName(0), transformExpression(alias.getExpr), metadata)
       }.unzip3
 
-    Dataset
-      .ofRows(session, transformRelation(rel.getInput))
-      .withColumns(colNames, cols, metadata)
-      .logicalPlan
+    Project(
+      Seq(
+        UnresolvedStarWithColumns(
+          colNames = colNames,
+          exprs = exprs,
+          explicitMetadata = Some(metadata))),
+      transformRelation(rel.getInput))
   }
 
   private def transformWithWatermark(rel: proto.WithWatermark): LogicalPlan = {
@@ -1142,7 +1162,7 @@ class SparkConnectPlanner(
 
   private def transformUnpivot(rel: proto.Unpivot): LogicalPlan = {
     val ids = rel.getIdsList.asScala.toArray.map { expr =>
-      column(transformExpression(expr))
+      Column(transformExpression(expr))
     }
 
     if (!rel.hasValues) {
@@ -1155,7 +1175,7 @@ class SparkConnectPlanner(
         transformRelation(rel.getInput))
     } else {
       val values = rel.getValues.getValuesList.asScala.toArray.map { expr =>
-        column(transformExpression(expr))
+        Column(transformExpression(expr))
       }
 
       Unpivot(
@@ -1184,20 +1204,20 @@ class SparkConnectPlanner(
 
   private def transformCollectMetrics(rel: proto.CollectMetrics, planId: Long): LogicalPlan = {
     val metrics = rel.getMetricsList.asScala.toSeq.map { expr =>
-      column(transformExpression(expr))
+      Column(transformExpression(expr))
     }
     val name = rel.getName
     val input = transformRelation(rel.getInput)
 
     if (input.isStreaming || executeHolderOpt.isEmpty) {
-      CollectMetrics(name, metrics.map(_.named), transformRelation(rel.getInput), planId)
+      CollectMetrics(name, metrics.map(_.named), input, planId)
     } else {
       // TODO this might be too complex for no good reason. It might
       //  be easier to inspect the plan after it completes.
       val observation = Observation(name)
       session.observationManager.register(observation, planId)
       executeHolderOpt.get.addObservation(name, observation)
-      CollectMetrics(name, metrics.map(_.named), transformRelation(rel.getInput), planId)
+      CollectMetrics(name, metrics.map(_.named), input, planId)
     }
   }
 
@@ -1435,7 +1455,7 @@ class SparkConnectPlanner(
   }
 
   private def transformFilter(rel: proto.Filter): LogicalPlan = {
-    assert(rel.hasInput)
+    assertPlan(rel.hasInput)
     val baseRel = transformRelation(rel.getInput)
     val cond = rel.getCondition
     if (isTypedScalaUdfExpr(cond)) {
@@ -1462,8 +1482,9 @@ class SparkConnectPlanner(
   private def transformTypedFilter(
       fun: proto.CommonInlineUserDefinedFunction,
       child: LogicalPlan): TypedFilter = {
-    val udf = TypedScalaUdf(fun, Some(child.output))
-    TypedFilter(udf.function, child)(udf.inEnc)
+    val analyzed = session.sessionState.executePlan(child).analyzed
+    val udf = TypedScalaUdf(fun, Some(analyzed.output))
+    TypedFilter(udf.function, analyzed)(udf.inEnc)
   }
 
   private def transformProject(rel: proto.Project): LogicalPlan = {
@@ -1473,11 +1494,19 @@ class SparkConnectPlanner(
       logical.OneRowRelation()
     }
 
+    val logicalPlan =
+      if (rel.getExpressionsList.asScala.toSeq.exists(
+          _.getExprTypeCase == proto.Expression.ExprTypeCase.TYPED_AGGREGATE_EXPRESSION)) {
+        session.sessionState.executePlan(baseRel).analyzed
+      } else {
+        baseRel
+      }
+
     val projection = rel.getExpressionsList.asScala.toSeq
-      .map(transformExpression(_, Some(baseRel)))
+      .map(transformExpression(_, Some(logicalPlan)))
       .map(toNamedExpression)
 
-    logical.Project(projectList = projection, child = baseRel)
+    logical.Project(projectList = projection, child = logicalPlan)
   }
 
   /**
@@ -1561,6 +1590,10 @@ class SparkConnectPlanner(
         transformMergeAction(exp.getMergeAction)
       case proto.Expression.ExprTypeCase.TYPED_AGGREGATE_EXPRESSION =>
         transformTypedAggregateExpression(exp.getTypedAggregateExpression, baseRelationOpt)
+      case proto.Expression.ExprTypeCase.LAZY_EXPRESSION =>
+        transformLazyExpression(exp.getLazyExpression)
+      case proto.Expression.ExprTypeCase.SUBQUERY_EXPRESSION =>
+        transformSubqueryExpression(exp.getSubqueryExpression)
       case _ =>
         throw InvalidPlanInput(
           s"Expression with ID: ${exp.getExprTypeCase.getNumber} is not supported")
@@ -1634,14 +1667,18 @@ class SparkConnectPlanner(
         fun.getArgumentsList.asScala.map(transformExpression).toSeq,
         isDistinct = fun.getIsDistinct)
     } else {
-      // Spark Connect historically used the global namespace to lookup a couple of internal
-      // functions (e.g. product, collect_top_k, unwrap_udt, ...). In Spark 4 we moved these
-      // functions to a dedicated namespace, however in order to stay backwards compatible we still
-      // need to allow connect to use the global namespace. Here we check if a function is
-      // registered in the internal function registry, and we reroute the lookup to the internal
-      // registry.
       val name = fun.getFunctionName
-      val internal = FunctionRegistry.internal.functionExists(FunctionIdentifier(name))
+      val internal = if (fun.hasIsInternal) {
+        fun.getIsInternal
+      } else {
+        // Spark Connect historically used the global namespace to look up a couple of internal
+        // functions (e.g. product, collect_top_k, unwrap_udt, ...). In Spark 4 we moved these
+        // functions to a dedicated namespace, however in order to stay backwards compatible we
+        // still need to allow Connect to use the global namespace. Here we check if a function is
+        // registered in the internal function registry, and we reroute the lookup to the internal
+        // registry.
+        FunctionRegistry.internal.functionExists(FunctionIdentifier(name))
+      }
       UnresolvedFunction(
         name :: Nil,
         fun.getArgumentsList.asScala.map(transformExpression).toSeq,
@@ -1723,40 +1760,42 @@ class SparkConnectPlanner(
   }
 
   /**
-   * Translates a Scala user-defined function from proto to the Catalyst expression.
+   * Translates a Scala user-defined function or aggregator from proto to the corresponding
+   * Catalyst expression.
    *
    * @param fun
-   *   Proto representation of the Scala user-defined function.
+   *   Proto representation of the Scala user-defined function or aggregator.
    * @return
-   *   ScalaUDF.
+   *   An expression, either a ScalaUDF or a ScalaAggregator.
    */
   private def transformScalaUDF(fun: proto.CommonInlineUserDefinedFunction): Expression = {
-    val udf = fun.getScalarScalaUdf
-    val udfPacket = unpackUdf(fun)
-    if (udf.getAggregate) {
-      ScalaAggregator(
-        transformScalaFunction(fun).asInstanceOf[UserDefinedAggregator[Any, Any, Any]],
-        fun.getArgumentsList.asScala.map(transformExpression).toSeq)
-        .toAggregateExpression()
-    } else {
-      ScalaUDF(
-        function = udfPacket.function,
-        dataType = transformDataType(udf.getOutputType),
-        children = fun.getArgumentsList.asScala.map(transformExpression).toSeq,
-        inputEncoders = udfPacket.inputEncoders.map(e => Try(ExpressionEncoder(e)).toOption),
-        outputEncoder = Option(ExpressionEncoder(udfPacket.outputEncoder)),
-        udfName = Option(fun.getFunctionName),
-        nullable = udf.getNullable,
-        udfDeterministic = fun.getDeterministic)
+    val children = fun.getArgumentsList.asScala.map(transformExpression).toSeq
+    transformScalaFunction(fun) match {
+      case udf: SparkUserDefinedFunction =>
+        UserDefinedFunctionUtils.toScalaUDF(udf, children)
+      case uda: UserDefinedAggregator[_, _, _] =>
+        ScalaAggregator(uda, children).toAggregateExpression()
+      case other =>
+        throw InvalidPlanInput(
+          s"Unsupported UserDefinedFunction implementation: ${other.getClass}")
     }
   }
 
+  /**
+   * Translates a Scala user-defined function or aggregator. from proto to a UserDefinedFunction.
+   *
+   * @param fun
+   *   Proto representation of the Scala user-defined function or aggregator.
+   * @return
+   *   A concrete UserDefinedFunction implementation, either a SparkUserDefinedFunction or a
+   *   UserDefinedAggregator.
+   */
   private def transformScalaFunction(
       fun: proto.CommonInlineUserDefinedFunction): UserDefinedFunction = {
     val udf = fun.getScalarScalaUdf
     val udfPacket = unpackUdf(fun)
     if (udf.getAggregate) {
-      assert(udfPacket.inputEncoders.size == 1, "UDAF should have exactly one input encoder")
+      assertPlan(udfPacket.inputEncoders.size == 1, "UDAF should have exactly one input encoder")
       UserDefinedAggregator(
         aggregator = udfPacket.function.asInstanceOf[Aggregator[Any, Any, Any]],
         inputEncoder = ExpressionEncoder(udfPacket.inputEncoders.head),
@@ -2074,7 +2113,7 @@ class SparkConnectPlanner(
   }
 
   private def transformJoin(rel: proto.Join): LogicalPlan = {
-    assert(rel.hasLeft && rel.hasRight, "Both join sides must be present")
+    assertPlan(rel.hasLeft && rel.hasRight, "Both join sides must be present")
     if (rel.hasJoinCondition && rel.getUsingColumnsCount > 0) {
       throw InvalidPlanInput(
         s"Using columns or join conditions cannot be set at the same time in Join")
@@ -2112,10 +2151,10 @@ class SparkConnectPlanner(
   private def transformAsOfJoin(rel: proto.AsOfJoin): LogicalPlan = {
     val left = Dataset.ofRows(session, transformRelation(rel.getLeft))
     val right = Dataset.ofRows(session, transformRelation(rel.getRight))
-    val leftAsOf = column(transformExpression(rel.getLeftAsOf))
-    val rightAsOf = column(transformExpression(rel.getRightAsOf))
+    val leftAsOf = Column(transformExpression(rel.getLeftAsOf))
+    val rightAsOf = Column(transformExpression(rel.getRightAsOf))
     val joinType = rel.getJoinType
-    val tolerance = if (rel.hasTolerance) column(transformExpression(rel.getTolerance)) else null
+    val tolerance = if (rel.hasTolerance) Column(transformExpression(rel.getTolerance)) else null
     val allowExactMatches = rel.getAllowExactMatches
     val direction = rel.getDirection
 
@@ -2131,7 +2170,7 @@ class SparkConnectPlanner(
         allowExactMatches = allowExactMatches,
         direction = direction)
     } else {
-      val joinExprs = if (rel.hasJoinExpr) column(transformExpression(rel.getJoinExpr)) else null
+      val joinExprs = if (rel.hasJoinExpr) Column(transformExpression(rel.getJoinExpr)) else null
       left.joinAsOf(
         other = right,
         leftAsOf = leftAsOf,
@@ -2145,8 +2184,21 @@ class SparkConnectPlanner(
     joined.logicalPlan
   }
 
+  private def transformLateralJoin(rel: proto.LateralJoin): LogicalPlan = {
+    assertPlan(rel.hasLeft && rel.hasRight, "Both join sides must be present")
+    val joinCondition =
+      if (rel.hasJoinCondition) Some(transformExpression(rel.getJoinCondition)) else None
+    val joinType = transformJoinType(
+      if (rel.getJoinType != null) rel.getJoinType else proto.Join.JoinType.JOIN_TYPE_INNER)
+    logical.LateralJoin(
+      left = transformRelation(rel.getLeft),
+      right = LateralSubquery(transformRelation(rel.getRight)),
+      joinType = joinType,
+      condition = joinCondition)
+  }
+
   private def transformSort(sort: proto.Sort): LogicalPlan = {
-    assert(sort.getOrderCount > 0, "'order' must be present and contain elements.")
+    assertPlan(sort.getOrderCount > 0, "'order' must be present and contain elements.")
     logical.Sort(
       child = transformRelation(sort.getInput),
       global = sort.getIsGlobal,
@@ -2172,7 +2224,7 @@ class SparkConnectPlanner(
   private def transformDrop(rel: proto.Drop): LogicalPlan = {
     var output = Dataset.ofRows(session, transformRelation(rel.getInput))
     if (rel.getColumnsCount > 0) {
-      val cols = rel.getColumnsList.asScala.toSeq.map(expr => column(transformExpression(expr)))
+      val cols = rel.getColumnsList.asScala.toSeq.map(expr => Column(transformExpression(expr)))
       output = output.drop(cols.head, cols.tail: _*)
     }
     if (rel.getColumnNamesCount > 0) {
@@ -2202,7 +2254,7 @@ class SparkConnectPlanner(
 
     val keyColumn = TypedAggUtils.aggKeyColumn(ds.kEncoder, ds.groupingAttributes)
     val namedColumns = rel.getAggregateExpressionsList.asScala.toSeq
-      .map(expr => transformExpressionWithTypedReduceExpression(expr, input))
+      .map(expr => transformExpressionWithTypedReduceExpression(expr, ds.analyzedData))
       .map(toNamedExpression)
     logical.Aggregate(ds.groupingAttributes, keyColumn +: namedColumns, ds.analyzed)
   }
@@ -2213,9 +2265,17 @@ class SparkConnectPlanner(
     }
     val input = transformRelation(rel.getInput)
 
+    val logicalPlan =
+      if (rel.getAggregateExpressionsList.asScala.toSeq.exists(
+          _.getExprTypeCase == proto.Expression.ExprTypeCase.TYPED_AGGREGATE_EXPRESSION)) {
+        session.sessionState.executePlan(input).analyzed
+      } else {
+        input
+      }
+
     val groupingExprs = rel.getGroupingExpressionsList.asScala.toSeq.map(transformExpression)
     val aggExprs = rel.getAggregateExpressionsList.asScala.toSeq
-      .map(expr => transformExpressionWithTypedReduceExpression(expr, input))
+      .map(expr => transformExpressionWithTypedReduceExpression(expr, logicalPlan))
     val aliasedAgg = (groupingExprs ++ aggExprs).map(toNamedExpression)
 
     rel.getGroupType match {
@@ -2223,19 +2283,19 @@ class SparkConnectPlanner(
         logical.Aggregate(
           groupingExpressions = groupingExprs,
           aggregateExpressions = aliasedAgg,
-          child = input)
+          child = logicalPlan)
 
       case proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP =>
         logical.Aggregate(
           groupingExpressions = Seq(Rollup(groupingExprs.map(Seq(_)))),
           aggregateExpressions = aliasedAgg,
-          child = input)
+          child = logicalPlan)
 
       case proto.Aggregate.GroupType.GROUP_TYPE_CUBE =>
         logical.Aggregate(
           groupingExpressions = Seq(Cube(groupingExprs.map(Seq(_)))),
           aggregateExpressions = aliasedAgg,
-          child = input)
+          child = logicalPlan)
 
       case proto.Aggregate.GroupType.GROUP_TYPE_PIVOT =>
         if (!rel.hasPivot) {
@@ -2247,7 +2307,7 @@ class SparkConnectPlanner(
           rel.getPivot.getValuesList.asScala.toSeq.map(transformLiteral)
         } else {
           RelationalGroupedDataset
-            .collectPivotValues(Dataset.ofRows(session, input), column(pivotExpr))
+            .collectPivotValues(Dataset.ofRows(session, logicalPlan), Column(pivotExpr))
             .map(expressions.Literal.apply)
         }
         logical.Pivot(
@@ -2255,7 +2315,7 @@ class SparkConnectPlanner(
           pivotColumn = pivotExpr,
           pivotValues = valueExprs,
           aggregates = aggExprs,
-          child = input)
+          child = logicalPlan)
 
       case proto.Aggregate.GroupType.GROUP_TYPE_GROUPING_SETS =>
         val groupingSetsExprs = rel.getGroupingSetsList.asScala.toSeq.map { getGroupingSets =>
@@ -2267,7 +2327,7 @@ class SparkConnectPlanner(
               groupingSets = groupingSetsExprs,
               userGivenGroupByExprs = groupingExprs)),
           aggregateExpressions = aliasedAgg,
-          child = input)
+          child = logicalPlan)
 
       case other => throw InvalidPlanInput(s"Unknown Group Type $other")
     }
@@ -2276,10 +2336,8 @@ class SparkConnectPlanner(
   private def transformTypedReduceExpression(
       fun: proto.Expression.UnresolvedFunction,
       dataAttributes: Seq[Attribute]): Expression = {
-    assert(fun.getFunctionName == "reduce")
-    if (fun.getArgumentsCount != 1) {
-      throw InvalidPlanInput("reduce requires single child expression")
-    }
+    assertPlan(fun.getFunctionName == "reduce")
+    assertPlan(fun.getArgumentsCount == 1, "reduce requires single child expression")
     val udf = fun.getArgumentsList.asScala match {
       case collection.Seq(e)
           if e.hasCommonInlineUserDefinedFunction &&
@@ -2309,10 +2367,10 @@ class SparkConnectPlanner(
       expr: proto.TypedAggregateExpression,
       baseRelationOpt: Option[LogicalPlan]): AggregateExpression = {
     val udf = expr.getScalarScalaUdf
-    assert(udf.getAggregate)
+    assertPlan(udf.getAggregate)
 
     val udfPacket = unpackScalaUDF[UdfPacket](udf)
-    assert(udfPacket.inputEncoders.size == 1, "UDAF should have exactly one input encoder")
+    assertPlan(udfPacket.inputEncoders.size == 1, "UDAF should have exactly one input encoder")
 
     val aggregator = udfPacket.function.asInstanceOf[Aggregator[Any, Any, Any]]
     val tae =
@@ -2345,17 +2403,17 @@ class SparkConnectPlanner(
     }.toSeq
     action.getActionType match {
       case proto.MergeAction.ActionType.ACTION_TYPE_DELETE =>
-        assert(assignments.isEmpty, "Delete action should not have assignment.")
+        assertPlan(assignments.isEmpty, "Delete action should not have assignment.")
         DeleteAction(condition)
       case proto.MergeAction.ActionType.ACTION_TYPE_INSERT =>
         InsertAction(condition, assignments)
       case proto.MergeAction.ActionType.ACTION_TYPE_INSERT_STAR =>
-        assert(assignments.isEmpty, "InsertStar action should not have assignment.")
+        assertPlan(assignments.isEmpty, "InsertStar action should not have assignment.")
         InsertStarAction(condition)
       case proto.MergeAction.ActionType.ACTION_TYPE_UPDATE =>
         UpdateAction(condition, assignments)
       case proto.MergeAction.ActionType.ACTION_TYPE_UPDATE_STAR =>
-        assert(assignments.isEmpty, "UpdateStar action should not have assignment.")
+        assertPlan(assignments.isEmpty, "UpdateStar action should not have assignment.")
         UpdateStarAction(condition)
       case _ =>
         throw InvalidPlanInput(s"Unsupported merge action type ${action.getActionType}.")
@@ -2407,11 +2465,27 @@ class SparkConnectPlanner(
         handleRemoveCachedRemoteRelationCommand(command.getRemoveCachedRemoteRelationCommand)
       case proto.Command.CommandTypeCase.MERGE_INTO_TABLE_COMMAND =>
         handleMergeIntoTableCommand(command.getMergeIntoTableCommand)
+      case proto.Command.CommandTypeCase.ML_COMMAND =>
+        handleMlCommand(command.getMlCommand, responseObserver)
 
       case _ => throw new UnsupportedOperationException(s"$command not supported.")
     }
   }
 
+  private def handleMlCommand(
+      command: proto.MlCommand,
+      responseObserver: StreamObserver[proto.ExecutePlanResponse]): Unit = {
+    val result = MLHandler.handleMlCommand(sessionHolder, command)
+    executeHolder.eventsManager.postFinished()
+    responseObserver.onNext(
+      proto.ExecutePlanResponse
+        .newBuilder()
+        .setSessionId(sessionId)
+        .setServerSideSessionId(sessionHolder.serverSessionId)
+        .setMlCommandResult(result)
+        .build())
+  }
+
   private def handleSqlCommand(
       command: SqlCommand,
       responseObserver: StreamObserver[ExecutePlanResponse]): Unit = {
@@ -2574,12 +2648,12 @@ class SparkConnectPlanner(
     if (!namedArguments.isEmpty) {
       session.sql(
         sql.getQuery,
-        namedArguments.asScala.toMap.transform((_, e) => column(transformExpression(e))),
+        namedArguments.asScala.toMap.transform((_, e) => Column(transformExpression(e))),
         tracker)
     } else if (!posArguments.isEmpty) {
       session.sql(
         sql.getQuery,
-        posArguments.asScala.map(e => column(transformExpression(e))).toArray,
+        posArguments.asScala.map(e => Column(transformExpression(e))).toArray,
         tracker)
     } else if (!args.isEmpty) {
       session.sql(
@@ -2710,6 +2784,7 @@ class SparkConnectPlanner(
       name = tableIdentifier,
       userSpecifiedColumns = Nil,
       comment = None,
+      collation = None,
       properties = Map.empty,
       originalText = None,
       plan = transformRelation(createView.getInput),
@@ -2830,7 +2905,7 @@ class SparkConnectPlanner(
     if (writeOperation.getPartitioningColumnsCount > 0) {
       val names = writeOperation.getPartitioningColumnsList.asScala
         .map(transformExpression)
-        .map(column)
+        .map(Column(_))
         .toSeq
       w.partitionedBy(names.head, names.tail: _*)
     }
@@ -2848,7 +2923,7 @@ class SparkConnectPlanner(
           w.create()
         }
       case proto.WriteOperationV2.Mode.MODE_OVERWRITE =>
-        w.overwrite(column(transformExpression(writeOperation.getOverwriteCondition)))
+        w.overwrite(Column(transformExpression(writeOperation.getOverwriteCondition)))
       case proto.WriteOperationV2.Mode.MODE_OVERWRITE_PARTITIONS =>
         w.overwritePartitions()
       case proto.WriteOperationV2.Mode.MODE_APPEND =>
@@ -2946,10 +3021,9 @@ class SparkConnectPlanner(
           fn
 
         case StreamingForeachFunction.FunctionCase.SCALA_FUNCTION =>
-          val scalaFn = Utils.deserialize[StreamingForeachBatchHelper.ForeachBatchFnType](
+          StreamingForeachBatchHelper.scalaForeachBatchWrapper(
             writeOp.getForeachBatch.getScalaFunction.getPayload.toByteArray,
-            Utils.getContextOrSparkClassLoader)
-          StreamingForeachBatchHelper.scalaForeachBatchWrapper(scalaFn, sessionHolder)
+            sessionHolder)
 
         case StreamingForeachFunction.FunctionCase.FUNCTION_NOT_SET =>
           throw InvalidPlanInput("Unexpected foreachBatch function") // Unreachable
@@ -3410,7 +3484,7 @@ class SparkConnectPlanner(
 
     val sourceDs = Dataset.ofRows(session, transformRelation(cmd.getSourceTablePlan))
     val mergeInto = sourceDs
-      .mergeInto(cmd.getTargetTableName, column(transformExpression(cmd.getMergeCondition)))
+      .mergeInto(cmd.getTargetTableName, Column(transformExpression(cmd.getMergeCondition)))
       .asInstanceOf[MergeIntoWriterImpl[Row]]
     mergeInto.matchedActions ++= matchedActions
     mergeInto.notMatchedActions ++= notMatchedActions
@@ -3567,7 +3641,7 @@ class SparkConnectPlanner(
       getCreateExternalTable: proto.CreateExternalTable): LogicalPlan = {
     val schema = if (getCreateExternalTable.hasSchema) {
       val struct = transformDataType(getCreateExternalTable.getSchema)
-      assert(struct.isInstanceOf[StructType])
+      assertPlan(struct.isInstanceOf[StructType])
       struct.asInstanceOf[StructType]
     } else {
       new StructType
@@ -3597,7 +3671,7 @@ class SparkConnectPlanner(
   private def transformCreateTable(getCreateTable: proto.CreateTable): LogicalPlan = {
     val schema = if (getCreateTable.hasSchema) {
       val struct = transformDataType(getCreateTable.getSchema)
-      assert(struct.isInstanceOf[StructType])
+      assertPlan(struct.isInstanceOf[StructType])
       struct.asInstanceOf[StructType]
     } else {
       new StructType
@@ -3709,4 +3783,61 @@ class SparkConnectPlanner(
       session.catalog.listCatalogs().logicalPlan
     }
   }
+
+  private def transformLazyExpression(getLazyExpression: proto.LazyExpression): Expression = {
+    LazyExpression(transformExpression(getLazyExpression.getChild))
+  }
+
+  private def transformSubqueryExpression(
+      getSubqueryExpression: proto.SubqueryExpression): Expression = {
+    val planId = getSubqueryExpression.getPlanId
+    getSubqueryExpression.getSubqueryType match {
+      case proto.SubqueryExpression.SubqueryType.SUBQUERY_TYPE_SCALAR =>
+        UnresolvedScalarSubqueryPlanId(planId)
+      case proto.SubqueryExpression.SubqueryType.SUBQUERY_TYPE_EXISTS =>
+        UnresolvedExistsPlanId(planId)
+      case other => throw InvalidPlanInput(s"Unknown SubqueryType $other")
+    }
+  }
+
+  private def transformWithRelations(getWithRelations: proto.WithRelations): LogicalPlan = {
+    if (isValidSQLWithRefs(getWithRelations)) {
+      transformSqlWithRefs(getWithRelations)
+    } else {
+      // Wrap the plan to keep the original planId.
+      val plan = Project(Seq(UnresolvedStar(None)), transformRelation(getWithRelations.getRoot))
+
+      val relations = getWithRelations.getReferencesList.asScala.map { ref =>
+        if (ref.hasCommon && ref.getCommon.hasPlanId) {
+          val planId = ref.getCommon.getPlanId
+          val plan = transformRelation(ref)
+          planId -> plan
+        } else {
+          throw InvalidPlanInput("Invalid WithRelation reference")
+        }
+      }.toMap
+
+      val missingPlanIds = mutable.Set.empty[Long]
+      val withRelations = plan
+        .transformAllExpressionsWithPruning(_.containsPattern(TreePattern.UNRESOLVED_PLAN_ID)) {
+          case u: UnresolvedPlanId =>
+            if (relations.contains(u.planId)) {
+              u.withPlan(relations(u.planId))
+            } else {
+              missingPlanIds += u.planId
+              u
+            }
+        }
+      assertPlan(
+        missingPlanIds.isEmpty,
+        "Missing relation in WithRelations: " +
+          s"${missingPlanIds.mkString("(", ", ", ")")} not in " +
+          s"${relations.keys.mkString("(", ", ", ")")}")
+      withRelations
+    }
+  }
+
+  private def assertPlan(assertion: Boolean, message: => String = ""): Unit = {
+    if (!assertion) throw InvalidPlanInput(message)
+  }
 }
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala
index df883a5c86814..ab6bed7152c09 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala
@@ -27,12 +27,15 @@ import scala.util.control.NonFatal
 import org.apache.spark.SparkException
 import org.apache.spark.api.python.{PythonException, PythonWorkerUtils, SimplePythonFunction, SpecialLengths, StreamingPythonRunner}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKeys.{DATAFRAME_ID, QUERY_ID, RUN_ID, SESSION_ID}
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.internal.LogKeys.{DATAFRAME_ID, QUERY_ID, RUN_ID_STRING, SESSION_ID}
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, AgnosticEncoders}
+import org.apache.spark.sql.connect.common.ForeachWriterPacket
 import org.apache.spark.sql.connect.service.SessionHolder
 import org.apache.spark.sql.connect.service.SparkConnectService
 import org.apache.spark.sql.streaming.StreamingQuery
 import org.apache.spark.sql.streaming.StreamingQueryListener
+import org.apache.spark.util.Utils
 
 /**
  * A helper class for handling ForeachBatch related functionality in Spark Connect servers
@@ -88,13 +91,31 @@ object StreamingForeachBatchHelper extends Logging {
    * DataFrame, so the user code actually runs with legacy DataFrame and session..
    */
   def scalaForeachBatchWrapper(
-      fn: ForeachBatchFnType,
+      payloadBytes: Array[Byte],
       sessionHolder: SessionHolder): ForeachBatchFnType = {
+    val foreachBatchPkt =
+      Utils.deserialize[ForeachWriterPacket](payloadBytes, Utils.getContextOrSparkClassLoader)
+    val fn = foreachBatchPkt.foreachWriter.asInstanceOf[(Dataset[Any], Long) => Unit]
+    val encoder = foreachBatchPkt.datasetEncoder.asInstanceOf[AgnosticEncoder[Any]]
     // TODO(SPARK-44462): Set up Spark Connect session.
     // Do we actually need this for the first version?
     dataFrameCachingWrapper(
       (args: FnArgsWithId) => {
-        fn(args.df, args.batchId) // dfId is not used, see hack comment above.
+        // dfId is not used, see hack comment above.
+        try {
+          val ds = if (AgnosticEncoders.UnboundRowEncoder == encoder) {
+            // When the dataset is a DataFrame (Dataset[Row).
+            args.df.asInstanceOf[Dataset[Any]]
+          } else {
+            // Recover the Dataset from the DataFrame using the encoder.
+            Dataset.apply(args.df.sparkSession, args.df.logicalPlan)(encoder)
+          }
+          fn(ds, args.batchId)
+        } catch {
+          case t: Throwable =>
+            logError(s"Calling foreachBatch fn failed", t)
+            throw t
+        }
       },
       sessionHolder)
   }
@@ -203,7 +224,7 @@ object StreamingForeachBatchHelper extends Logging {
       Option(cleanerCache.remove(key)).foreach { cleaner =>
         logInfo(
           log"Cleaning up runner for queryId ${MDC(QUERY_ID, key.queryId)} " +
-            log"runId ${MDC(RUN_ID, key.runId)}.")
+            log"runId ${MDC(RUN_ID_STRING, key.runId)}.")
         cleaner.close()
       }
     }
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala
index faa7582d169f1..61cd95621d156 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala
@@ -145,13 +145,19 @@ case class ExecuteEventsManager(executeHolder: ExecuteHolder, clock: Clock) {
    *
    * @param analyzedPlan
    *   The analyzed plan generated by the Connect request plan. None when the request does not
-   *   generate a plan.
+   *   generate a Spark plan or analysis fails.
+   * @param parsedPlan
+   *   The analyzed plan generated by the Connect request plan. None when the request does not
+   *   generate a plan or does not fail analysis.
    */
-  def postAnalyzed(analyzedPlan: Option[LogicalPlan] = None): Unit = {
+  def postAnalyzed(
+      analyzedPlan: Option[LogicalPlan] = None,
+      parsedPlan: Option[LogicalPlan] = None): Unit = {
     assertStatus(List(ExecuteStatus.Started, ExecuteStatus.Analyzed), ExecuteStatus.Analyzed)
     val event =
       SparkListenerConnectOperationAnalyzed(jobTag, operationId, clock.getTimeMillis())
     event.analyzedPlan = analyzedPlan
+    event.parsedPlan = parsedPlan
     listenerBus.post(event)
   }
 
@@ -251,6 +257,12 @@ case class ExecuteEventsManager(executeHolder: ExecuteHolder, clock: Clock) {
         postAnalyzed(Some(analyzedPlan))
       }
 
+      override def analysisFailed(
+          tracker: QueryPlanningTracker,
+          parsedPlan: LogicalPlan): Unit = {
+        postAnalyzed(parsedPlan = Some(parsedPlan))
+      }
+
       def readyForExecution(tracker: QueryPlanningTracker): Unit = postReadyForExecution()
     }))
   }
@@ -341,9 +353,15 @@ case class SparkListenerConnectOperationAnalyzed(
     extraTags: Map[String, String] = Map.empty)
     extends SparkListenerEvent {
 
+  /**
+   * Parsed Spark plan generated by the Connect request. None when the Connect request does not
+   * generate a Spark plan or does not fail analysis.
+   */
+  @JsonIgnore var parsedPlan: Option[LogicalPlan] = None
+
   /**
    * Analyzed Spark plan generated by the Connect request. None when the Connect request does not
-   * generate a Spark plan.
+   * generate a Spark plan or analysis fails.
    */
   @JsonIgnore var analyzedPlan: Option[LogicalPlan] = None
 }
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
index 821ddb2c85d58..94638151f7f18 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
@@ -22,6 +22,8 @@ import java.util.concurrent.atomic.AtomicBoolean
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 
+import com.google.protobuf.GeneratedMessage
+
 import org.apache.spark.SparkEnv
 import org.apache.spark.connect.proto
 import org.apache.spark.internal.Logging
@@ -81,6 +83,10 @@ private[connect] class ExecuteHolder(
 
   val observations: mutable.Map[String, Observation] = mutable.Map.empty
 
+  lazy val allObservationAndPlanIds: Map[String, Long] = {
+    ExecuteHolder.collectAllObservationAndPlanIds(request.getPlan).toMap
+  }
+
   private val runner: ExecuteThreadRunner = new ExecuteThreadRunner(this)
 
   /** System.currentTimeMillis when this ExecuteHolder was created. */
@@ -289,6 +295,26 @@ private[connect] class ExecuteHolder(
   def operationId: String = key.operationId
 }
 
+private object ExecuteHolder {
+  private def collectAllObservationAndPlanIds(
+      planOrMessage: GeneratedMessage,
+      collected: mutable.Map[String, Long] = mutable.Map.empty): mutable.Map[String, Long] = {
+    planOrMessage match {
+      case relation: proto.Relation if relation.hasCollectMetrics =>
+        collected += relation.getCollectMetrics.getName -> relation.getCommon.getPlanId
+        collectAllObservationAndPlanIds(relation.getCollectMetrics.getInput, collected)
+      case _ =>
+        planOrMessage.getAllFields.values().asScala.foreach {
+          case message: GeneratedMessage =>
+            collectAllObservationAndPlanIds(message, collected)
+          case _ =>
+          // not a message (probably a primitive type), do nothing
+        }
+    }
+    collected
+  }
+}
+
 /** Used to identify ExecuteHolder jobTag among SparkContext.SPARK_JOB_TAGS. */
 object ExecuteJobTag {
   private val prefix = "SparkConnect_OperationTag"
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
index 5dced7acfb0d2..5b56b7079a897 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
@@ -37,6 +37,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.connect.common.InvalidPlanInput
 import org.apache.spark.sql.connect.config.Connect
+import org.apache.spark.sql.connect.ml.MLCache
 import org.apache.spark.sql.connect.planner.PythonStreamingQueryListener
 import org.apache.spark.sql.connect.planner.StreamingForeachBatchHelper
 import org.apache.spark.sql.connect.service.ExecuteKey
@@ -111,6 +112,9 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
   private[spark] lazy val dataFrameCache: ConcurrentMap[String, DataFrame] =
     new ConcurrentHashMap()
 
+  // ML model cache
+  private[connect] lazy val mlCache = new MLCache()
+
   // Mapping from id to StreamingQueryListener. Used for methods like removeListener() in
   // StreamingQueryManager.
   private lazy val listenerCache: ConcurrentMap[String, StreamingQueryListener] =
@@ -301,7 +305,7 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
 
     // Clean up all artifacts.
     // Note: there can be concurrent AddArtifact calls still adding something.
-    artifactManager.cleanUpResources()
+    artifactManager.close()
 
     // Clean up running streaming queries.
     // Note: there can be concurrent streaming queries being started.
@@ -322,6 +326,8 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
     // remove all executions and no new executions will be added in the meanwhile.
     SparkConnectService.executionManager.removeAllExecutionsForSession(this.key)
 
+    mlCache.clear()
+
     eventManager.postClosed()
   }
 
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAddArtifactsHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAddArtifactsHandler.scala
index 72403016404c8..3ba79402e99ef 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAddArtifactsHandler.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAddArtifactsHandler.scala
@@ -87,8 +87,8 @@ class SparkConnectAddArtifactsHandler(val responseObserver: StreamObserver[AddAr
     ErrorUtils.handleError(
       "addArtifacts.onNext",
       responseObserver,
-      holder.userId,
-      holder.sessionId,
+      req.getUserContext.getUserId,
+      req.getSessionId,
       None,
       false,
       Some(() => {
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala
index 6c5d95ac67d3d..8ca021c5be39e 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, InvalidPlanInput, StorageLevelProtoConverter}
 import org.apache.spark.sql.connect.planner.SparkConnectPlanner
 import org.apache.spark.sql.execution.{CodegenMode, CostMode, ExtendedMode, FormattedMode, SimpleMode}
+import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.util.ArrayImplicits._
 
 private[connect] class SparkConnectAnalyzeHandler(
@@ -206,6 +207,17 @@ private[connect] class SparkConnectAnalyzeHandler(
             .setStorageLevel(StorageLevelProtoConverter.toConnectProtoType(storageLevel))
             .build())
 
+      case proto.AnalyzePlanRequest.AnalyzeCase.JSON_TO_DDL =>
+        val ddl = DataType
+          .fromJson(request.getJsonToDdl.getJsonString)
+          .asInstanceOf[StructType]
+          .toDDL
+        builder.setJsonToDdl(
+          proto.AnalyzePlanResponse.JsonToDDL
+            .newBuilder()
+            .setDdlString(ddl)
+            .build())
+
       case other => throw InvalidPlanInput(s"Unknown Analyze Method $other!")
     }
 
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala
index c5e484e022bc4..06bc24b6ccae6 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala
@@ -73,11 +73,21 @@ class SparkConnectConfigHandler(responseObserver: StreamObserver[proto.ConfigRes
   private def handleSet(
       operation: proto.ConfigRequest.Set,
       conf: RuntimeConfig): proto.ConfigResponse.Builder = {
+    val silent = operation.hasSilent && operation.getSilent
     val builder = proto.ConfigResponse.newBuilder()
     operation.getPairsList.asScala.iterator.foreach { pair =>
       val (key, value) = SparkConnectConfigHandler.toKeyValue(pair)
-      conf.set(key, value.orNull)
-      getWarning(key).foreach(builder.addWarnings)
+      try {
+        conf.set(key, value.orNull)
+        getWarning(key).foreach(builder.addWarnings)
+      } catch {
+        case e: Throwable =>
+          if (silent) {
+            builder.addWarnings(s"Failed to set $key to $value due to ${e.getMessage}")
+          } else {
+            throw e
+          }
+      }
     }
     builder
   }
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseSessionHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseSessionHandler.scala
index ec7a7f3bd242c..c36f07fc67f8f 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseSessionHandler.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseSessionHandler.scala
@@ -37,7 +37,8 @@ class SparkConnectReleaseSessionHandler(
     val maybeSession = SparkConnectService.sessionManager.getIsolatedSessionIfPresent(key)
     maybeSession.foreach(f => responseBuilder.setServerSideSessionId(f.serverSessionId))
 
-    SparkConnectService.sessionManager.closeSession(key)
+    val allowReconnect = v.getAllowReconnect
+    SparkConnectService.sessionManager.closeSession(key, allowReconnect)
 
     responseObserver.onNext(responseBuilder.build())
     responseObserver.onCompleted()
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala
index a306856efa33c..c59fd02a829ae 100644
--- a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala
@@ -134,7 +134,9 @@ class SparkConnectSessionManager extends Logging {
   }
 
   // Removes session from sessionStore and returns it.
-  private def removeSessionHolder(key: SessionKey): Option[SessionHolder] = {
+  private def removeSessionHolder(
+      key: SessionKey,
+      allowReconnect: Boolean = false): Option[SessionHolder] = {
     var sessionHolder: Option[SessionHolder] = None
 
     // The session holder should remain in the session store until it is added to the closed session
@@ -144,9 +146,11 @@ class SparkConnectSessionManager extends Logging {
     sessionHolder = Option(sessionStore.get(key))
 
     sessionHolder.foreach { s =>
-      // Put into closedSessionsCache to prevent the same session from being recreated by
-      // getOrCreateIsolatedSession.
-      closedSessionsCache.put(s.key, s.getSessionHolderInfo)
+      if (!allowReconnect) {
+        // Put into closedSessionsCache to prevent the same session from being recreated by
+        // getOrCreateIsolatedSession when reconnection isn't allowed.
+        closedSessionsCache.put(s.key, s.getSessionHolderInfo)
+      }
 
       // Then, remove the session holder from the session store.
       sessionStore.remove(key)
@@ -154,17 +158,21 @@ class SparkConnectSessionManager extends Logging {
     sessionHolder
   }
 
-  // Shut downs the session after removing.
-  private def shutdownSessionHolder(sessionHolder: SessionHolder): Unit = {
+  // Shuts down the session after removing.
+  private def shutdownSessionHolder(
+      sessionHolder: SessionHolder,
+      allowReconnect: Boolean = false): Unit = {
     sessionHolder.close()
-    // Update in closedSessionsCache: above it wasn't updated with closedTime etc. yet.
-    closedSessionsCache.put(sessionHolder.key, sessionHolder.getSessionHolderInfo)
+    if (!allowReconnect) {
+      // Update in closedSessionsCache: above it wasn't updated with closedTime etc. yet.
+      closedSessionsCache.put(sessionHolder.key, sessionHolder.getSessionHolderInfo)
+    }
   }
 
-  def closeSession(key: SessionKey): Unit = {
-    val sessionHolder = removeSessionHolder(key)
+  def closeSession(key: SessionKey, allowReconnect: Boolean = false): Unit = {
+    val sessionHolder = removeSessionHolder(key, allowReconnect)
     // Rest of the cleanup: the session cannot be accessed anymore by getOrCreateIsolatedSession.
-    sessionHolder.foreach(shutdownSessionHolder(_))
+    sessionHolder.foreach(shutdownSessionHolder(_, allowReconnect))
   }
 
   private[connect] def shutdown(): Unit = {
@@ -289,8 +297,10 @@ class SparkConnectSessionManager extends Logging {
    * Used for testing
    */
   private[connect] def invalidateAllSessions(): Unit = {
-    periodicMaintenance(defaultInactiveTimeoutMs = 0L, ignoreCustomTimeout = true)
-    assert(sessionStore.isEmpty)
+    sessionStore.forEach((key, sessionHolder) => {
+      removeSessionHolder(key)
+      shutdownSessionHolder(sessionHolder)
+    })
     closedSessionsCache.invalidateAll()
   }
 
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
index b04c42a730785..3c857554dc756 100644
--- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
@@ -128,6 +128,17 @@ trait SparkConnectServerTest extends SharedSparkSession {
     req.build()
   }
 
+  protected def buildReleaseSessionRequest(
+      sessionId: String = defaultSessionId,
+      allowReconnect: Boolean = false) = {
+    proto.ReleaseSessionRequest
+      .newBuilder()
+      .setUserContext(userContext)
+      .setSessionId(sessionId)
+      .setAllowReconnect(allowReconnect)
+      .build()
+  }
+
   protected def buildPlan(query: String) = {
     proto.Plan.newBuilder().setRoot(dsl.sql(query)).build()
   }
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ml/MLSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ml/MLSuite.scala
new file mode 100644
index 0000000000000..bea7072b2034e
--- /dev/null
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ml/MLSuite.scala
@@ -0,0 +1,390 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.ml
+
+import java.io.File
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.connect.proto
+import org.apache.spark.ml.classification.LogisticRegressionModel
+import org.apache.spark.ml.linalg.{Vectors, VectorUDT}
+import org.apache.spark.ml.param._
+import org.apache.spark.ml.util.Identifiable
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
+import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.connect.SparkConnectTestUtils
+import org.apache.spark.sql.connect.planner.SparkConnectPlanTest
+import org.apache.spark.sql.types.{FloatType, Metadata, StructField, StructType}
+import org.apache.spark.util.Utils
+
+trait FakeArrayParams extends Params {
+  final val arrayString: StringArrayParam =
+    new StringArrayParam(this, "arrayString", "array string")
+
+  final def getArrayString: Array[String] = $(arrayString)
+
+  final val arrayDouble: DoubleArrayParam =
+    new DoubleArrayParam(this, "arrayDouble", "array double")
+
+  final def getArrayDouble: Array[Double] = $(arrayDouble)
+
+  final val arrayInt: IntArrayParam = new IntArrayParam(this, "arrayInt", "array int")
+
+  final def getArrayInt: Array[Int] = $(arrayInt)
+
+  final val int: IntParam = new IntParam(this, "int", "int")
+
+  final def getInt: Int = $(int)
+
+  final val float: FloatParam = new FloatParam(this, "float", "float")
+
+  final def getFloat: Float = $(float)
+
+  final val boolean: BooleanParam = new BooleanParam(this, "boolean", "boolean")
+
+  final def getBoolean: Boolean = $(boolean)
+
+  final val double: DoubleParam = new DoubleParam(this, "double", "double")
+
+  final def getDouble: Double = $(double)
+}
+
+class FakedML(override val uid: String) extends FakeArrayParams {
+  def this() = this(Identifiable.randomUID("FakedML"))
+
+  override def copy(extra: ParamMap): Params = this
+}
+
+class MLSuite extends SparkFunSuite with SparkConnectPlanTest {
+
+  def createLocalRelationProto: proto.Relation = {
+    val udt = new VectorUDT()
+    val rows = Seq(
+      InternalRow(1.0f, udt.serialize(Vectors.dense(Array(1.0, 2.0)))),
+      InternalRow(1.0f, udt.serialize(Vectors.dense(Array(2.0, -1.0)))),
+      InternalRow(0.0f, udt.serialize(Vectors.dense(Array(-3.0, -2.0)))),
+      InternalRow(0.0f, udt.serialize(Vectors.dense(Array(-1.0, -2.0)))))
+
+    val schema = StructType(
+      Seq(
+        StructField("label", FloatType),
+        StructField("features", new VectorUDT(), false, Metadata.empty)))
+
+    val inputRows = rows.map { row =>
+      val proj = UnsafeProjection.create(schema)
+      proj(row).copy()
+    }
+    createLocalRelationProto(DataTypeUtils.toAttributes(schema), inputRows, "UTC", Some(schema))
+  }
+
+  test("reconcileParam") {
+    val fakedML = new FakedML
+    val params = proto.MlParams
+      .newBuilder()
+      .putParams(
+        "boolean",
+        proto.Param
+          .newBuilder()
+          .setLiteral(proto.Expression.Literal.newBuilder().setBoolean(true))
+          .build())
+      .putParams(
+        "double",
+        proto.Param
+          .newBuilder()
+          .setLiteral(proto.Expression.Literal.newBuilder().setDouble(1.0))
+          .build())
+      .putParams(
+        "int",
+        proto.Param
+          .newBuilder()
+          .setLiteral(proto.Expression.Literal.newBuilder().setInteger(10))
+          .build())
+      .putParams(
+        "float",
+        proto.Param
+          .newBuilder()
+          .setLiteral(proto.Expression.Literal.newBuilder().setFloat(10.0f))
+          .build())
+      .putParams(
+        "arrayString",
+        proto.Param
+          .newBuilder()
+          .setLiteral(
+            proto.Expression.Literal
+              .newBuilder()
+              .setArray(
+                proto.Expression.Literal.Array
+                  .newBuilder()
+                  .setElementType(proto.DataType
+                    .newBuilder()
+                    .setString(proto.DataType.String.getDefaultInstance)
+                    .build())
+                  .addElements(proto.Expression.Literal.newBuilder().setString("hello"))
+                  .addElements(proto.Expression.Literal.newBuilder().setString("world"))
+                  .build())
+              .build())
+          .build())
+      .putParams(
+        "arrayInt",
+        proto.Param
+          .newBuilder()
+          .setLiteral(
+            proto.Expression.Literal
+              .newBuilder()
+              .setArray(
+                proto.Expression.Literal.Array
+                  .newBuilder()
+                  .setElementType(proto.DataType
+                    .newBuilder()
+                    .setInteger(proto.DataType.Integer.getDefaultInstance)
+                    .build())
+                  .addElements(proto.Expression.Literal.newBuilder().setInteger(1))
+                  .addElements(proto.Expression.Literal.newBuilder().setInteger(2))
+                  .build())
+              .build())
+          .build())
+      .putParams(
+        "arrayDouble",
+        proto.Param
+          .newBuilder()
+          .setLiteral(
+            proto.Expression.Literal
+              .newBuilder()
+              .setArray(
+                proto.Expression.Literal.Array
+                  .newBuilder()
+                  .setElementType(proto.DataType
+                    .newBuilder()
+                    .setDouble(proto.DataType.Double.getDefaultInstance)
+                    .build())
+                  .addElements(proto.Expression.Literal.newBuilder().setDouble(11.0))
+                  .addElements(proto.Expression.Literal.newBuilder().setDouble(12.0))
+                  .build())
+              .build())
+          .build())
+      .build()
+    MLUtils.setInstanceParams(fakedML, params)
+    assert(fakedML.getInt === 10)
+    assert(fakedML.getFloat === 10.0)
+    assert(fakedML.getArrayInt === Array(1, 2))
+    assert(fakedML.getArrayDouble === Array(11.0, 12.0))
+    assert(fakedML.getArrayString === Array("hello", "world"))
+    assert(fakedML.getBoolean === true)
+    assert(fakedML.getDouble === 1.0)
+  }
+
+  test("LogisticRegression works") {
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
+
+    def verifyModel(modelId: String, hasSummary: Boolean = false): Unit = {
+      val model = sessionHolder.mlCache.get(modelId)
+      // Model is cached
+      assert(model != null)
+      assert(model.isInstanceOf[LogisticRegressionModel])
+      val lrModel = model.asInstanceOf[LogisticRegressionModel]
+      assert(lrModel.getMaxIter === 2)
+
+      // Fetch double attribute
+      val interceptCommand = proto.MlCommand
+        .newBuilder()
+        .setFetch(
+          proto.Fetch
+            .newBuilder()
+            .setObjRef(proto.ObjectRef.newBuilder().setId(modelId))
+            .addMethods(proto.Fetch.Method.newBuilder().setMethod("intercept")))
+        .build()
+      val interceptResult = MLHandler.handleMlCommand(sessionHolder, interceptCommand)
+      assert(interceptResult.getParam.getLiteral.getDouble === lrModel.intercept)
+
+      // Fetch Vector attribute
+      val coefficientsCommand = proto.MlCommand
+        .newBuilder()
+        .setFetch(
+          proto.Fetch
+            .newBuilder()
+            .setObjRef(proto.ObjectRef.newBuilder().setId(modelId))
+            .addMethods(proto.Fetch.Method.newBuilder().setMethod("coefficients")))
+        .build()
+      val coefficientsResult = MLHandler.handleMlCommand(sessionHolder, coefficientsCommand)
+      val deserializedCoefficients =
+        MLUtils.deserializeVector(coefficientsResult.getParam.getVector)
+      assert(deserializedCoefficients === lrModel.coefficients)
+
+      // Fetch Matrix attribute
+      val coefficientsMatrixCommand = proto.MlCommand
+        .newBuilder()
+        .setFetch(
+          proto.Fetch
+            .newBuilder()
+            .setObjRef(proto.ObjectRef.newBuilder().setId(modelId))
+            .addMethods(proto.Fetch.Method.newBuilder().setMethod("coefficientMatrix")))
+        .build()
+      val coefficientsMatrixResult =
+        MLHandler.handleMlCommand(sessionHolder, coefficientsMatrixCommand)
+      val deserializedCoefficientsMatrix =
+        MLUtils.deserializeMatrix(coefficientsMatrixResult.getParam.getMatrix)
+      assert(lrModel.coefficientMatrix === deserializedCoefficientsMatrix)
+
+      // Predict with sparse vector
+      val sparseVector = Vectors.dense(Array(0.0, 2.0)).toSparse
+      val predictCommand = proto.MlCommand
+        .newBuilder()
+        .setFetch(
+          proto.Fetch
+            .newBuilder()
+            .setObjRef(proto.ObjectRef.newBuilder().setId(modelId))
+            .addMethods(
+              proto.Fetch.Method
+                .newBuilder()
+                .setMethod("predict")
+                .addArgs(proto.Fetch.Method.Args
+                  .newBuilder()
+                  .setParam(Serializer.serializeParam(sparseVector)))))
+        .build()
+      val predictResult = MLHandler.handleMlCommand(sessionHolder, predictCommand)
+      val predictValue = predictResult.getParam.getLiteral.getDouble
+      assert(lrModel.predict(sparseVector) === predictValue)
+
+      // The loaded model doesn't have summary
+      if (hasSummary) {
+        // Fetch summary attribute
+        val accuracyCommand = proto.MlCommand
+          .newBuilder()
+          .setFetch(
+            proto.Fetch
+              .newBuilder()
+              .setObjRef(proto.ObjectRef.newBuilder().setId(modelId))
+              .addMethods(proto.Fetch.Method.newBuilder().setMethod("summary"))
+              .addMethods(proto.Fetch.Method.newBuilder().setMethod("accuracy")))
+          .build()
+        val accuracyResult = MLHandler.handleMlCommand(sessionHolder, accuracyCommand)
+        assert(lrModel.summary.accuracy === accuracyResult.getParam.getLiteral.getDouble)
+
+        val weightedFMeasureCommand = proto.MlCommand
+          .newBuilder()
+          .setFetch(
+            proto.Fetch
+              .newBuilder()
+              .setObjRef(proto.ObjectRef.newBuilder().setId(modelId))
+              .addMethods(proto.Fetch.Method.newBuilder().setMethod("summary"))
+              .addMethods(
+                proto.Fetch.Method
+                  .newBuilder()
+                  .setMethod("weightedFMeasure")
+                  .addArgs(proto.Fetch.Method.Args
+                    .newBuilder()
+                    .setParam(Serializer.serializeParam(2.5)))))
+          .build()
+        val weightedFMeasureResult =
+          MLHandler.handleMlCommand(sessionHolder, weightedFMeasureCommand)
+        assert(
+          lrModel.summary.weightedFMeasure(2.5) ===
+            weightedFMeasureResult.getParam.getLiteral.getDouble)
+      }
+    }
+
+    try {
+      val fitCommand = proto.MlCommand
+        .newBuilder()
+        .setFit(
+          proto.MlCommand.Fit
+            .newBuilder()
+            .setDataset(createLocalRelationProto)
+            .setEstimator(
+              proto.MlOperator
+                .newBuilder()
+                .setName("org.apache.spark.ml.classification.LogisticRegression")
+                .setUid("LogisticRegression")
+                .setType(proto.MlOperator.OperatorType.ESTIMATOR))
+            .setParams(
+              proto.MlParams
+                .newBuilder()
+                .putParams(
+                  "maxIter",
+                  proto.Param
+                    .newBuilder()
+                    .setLiteral(proto.Expression.Literal
+                      .newBuilder()
+                      .setInteger(2))
+                    .build())))
+        .build()
+      val fitResult = MLHandler.handleMlCommand(sessionHolder, fitCommand)
+      val modelId = fitResult.getOperatorInfo.getObjRef.getId
+
+      verifyModel(modelId, true)
+
+      // read/write
+      val tempDir = Utils.createTempDir(namePrefix = this.getClass.getName)
+      try {
+        val path = new File(tempDir, Identifiable.randomUID("LogisticRegression")).getPath
+        val writeCmd = proto.MlCommand
+          .newBuilder()
+          .setWrite(
+            proto.MlCommand.Write
+              .newBuilder()
+              .setPath(path)
+              .setObjRef(proto.ObjectRef.newBuilder().setId(modelId)))
+          .build()
+        MLHandler.handleMlCommand(sessionHolder, writeCmd)
+
+        val readCmd = proto.MlCommand
+          .newBuilder()
+          .setRead(
+            proto.MlCommand.Read
+              .newBuilder()
+              .setOperator(
+                proto.MlOperator
+                  .newBuilder()
+                  .setName("org.apache.spark.ml.classification.LogisticRegressionModel")
+                  .setType(proto.MlOperator.OperatorType.MODEL))
+              .setPath(path))
+          .build()
+
+        val readResult = MLHandler.handleMlCommand(sessionHolder, readCmd)
+        verifyModel(readResult.getOperatorInfo.getObjRef.getId)
+
+      } finally {
+        Utils.deleteRecursively(tempDir)
+      }
+
+    } finally {
+      sessionHolder.mlCache.clear()
+    }
+  }
+
+  test("Exception: Unsupported ML operator") {
+    intercept[MlUnsupportedException] {
+      val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
+      val command = proto.MlCommand
+        .newBuilder()
+        .setFit(
+          proto.MlCommand.Fit
+            .newBuilder()
+            .setDataset(createLocalRelationProto)
+            .setEstimator(
+              proto.MlOperator
+                .newBuilder()
+                .setName("org.apache.spark.ml.NotExistingML")
+                .setUid("FakedUid")
+                .setType(proto.MlOperator.OperatorType.ESTIMATOR)))
+        .build()
+      MLHandler.handleMlCommand(sessionHolder, command)
+    }
+  }
+}
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
index e44d3eacc66df..55c492f511049 100644
--- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
@@ -26,9 +26,10 @@ import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.Expression.{Alias, ExpressionString, UnresolvedStar}
 import org.apache.spark.sql.{AnalysisException, Dataset, Row}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedFunction, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, UnsafeProjection}
 import org.apache.spark.sql.catalyst.plans.logical
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.connect.SparkConnectTestUtils
 import org.apache.spark.sql.connect.common.InvalidPlanInput
@@ -88,7 +89,8 @@ trait SparkConnectPlanTest extends SharedSparkSession {
   def createLocalRelationProto(
       attrs: Seq[AttributeReference],
       data: Seq[InternalRow],
-      timeZoneId: String = "UTC"): proto.Relation = {
+      timeZoneId: String = "UTC",
+      schema: Option[StructType] = None): proto.Relation = {
     val localRelationBuilder = proto.LocalRelation.newBuilder()
 
     val bytes = ArrowConverters
@@ -102,6 +104,7 @@ trait SparkConnectPlanTest extends SharedSparkSession {
       .next()
 
     localRelationBuilder.setData(ByteString.copyFrom(bytes))
+    schema.foreach(s => localRelationBuilder.setSchema(s.json))
     proto.Relation.newBuilder().setLocalRelation(localRelationBuilder.build()).build()
   }
 }
@@ -274,7 +277,7 @@ class SparkConnectPlannerSuite extends SparkFunSuite with SparkConnectPlanTest {
   test("Simple Join") {
     val incompleteJoin =
       proto.Relation.newBuilder.setJoin(proto.Join.newBuilder.setLeft(readRel)).build()
-    intercept[AssertionError](transform(incompleteJoin))
+    intercept[InvalidPlanInput](transform(incompleteJoin))
 
     // Join type JOIN_TYPE_UNSPECIFIED is not supported.
     intercept[InvalidPlanInput] {
@@ -503,26 +506,27 @@ class SparkConnectPlannerSuite extends SparkFunSuite with SparkConnectPlanTest {
   }
 
   test("Test duplicated names in WithColumns") {
-    intercept[AnalysisException] {
-      transform(
-        proto.Relation
-          .newBuilder()
-          .setWithColumns(
-            proto.WithColumns
-              .newBuilder()
-              .setInput(readRel)
-              .addAliases(proto.Expression.Alias
+    val logical = transform(
+      proto.Relation
+        .newBuilder()
+        .setWithColumns(
+          proto.WithColumns
+            .newBuilder()
+            .setInput(readRel)
+            .addAliases(
+              proto.Expression.Alias
                 .newBuilder()
                 .addName("test")
                 .setExpr(proto.Expression.newBuilder
                   .setLiteral(proto.Expression.Literal.newBuilder.setInteger(32))))
-              .addAliases(proto.Expression.Alias
-                .newBuilder()
-                .addName("test")
-                .setExpr(proto.Expression.newBuilder
-                  .setLiteral(proto.Expression.Literal.newBuilder.setInteger(32)))))
-          .build())
-    }
+            .addAliases(proto.Expression.Alias
+              .newBuilder()
+              .addName("test")
+              .setExpr(proto.Expression.newBuilder
+                .setLiteral(proto.Expression.Literal.newBuilder.setInteger(32)))))
+        .build())
+
+    intercept[AnalysisException](Dataset.ofRows(spark, logical))
   }
 
   test("Test multi nameparts for column names in WithColumns") {
@@ -884,4 +888,36 @@ class SparkConnectPlannerSuite extends SparkFunSuite with SparkConnectPlanTest {
 
     intercept[AnalysisException](Dataset.ofRows(spark, logical))
   }
+
+  test("Internal functions") {
+    def getProjectRelationWithFn(name: String, isInternal: Option[Boolean]): proto.Relation = {
+      val fn = proto.Expression.UnresolvedFunction.newBuilder.setFunctionName(name)
+      isInternal.foreach(fn.setIsInternal)
+      val proj = proto.Project.newBuilder
+        .setInput(readRel)
+        .addExpressions(proto.Expression.newBuilder.setUnresolvedFunction(fn))
+      proto.Relation.newBuilder.setProject(proj).build()
+    }
+
+    def getUnresolvedFunction(plan: LogicalPlan): UnresolvedFunction =
+      plan.expressions.head.asInstanceOf[UnresolvedAlias].child.asInstanceOf[UnresolvedFunction]
+
+    // "bloom_filter_agg" is an internal function.
+    val plan1 = transform(getProjectRelationWithFn("bloom_filter_agg", isInternal = None))
+    val fn1 = getUnresolvedFunction(plan1)
+    assert(fn1.nameParts.head == "bloom_filter_agg")
+    assert(fn1.isInternal)
+
+    // "abcde" is not an internal function.
+    val plan2 = transform(getProjectRelationWithFn("abcde", isInternal = None))
+    val fn2 = getUnresolvedFunction(plan2)
+    assert(fn2.nameParts.head == "abcde")
+    assert(!fn2.isInternal)
+
+    // "abcde" is not an internal function but we could set it to be internal.
+    val plan3 = transform(getProjectRelationWithFn("abcde", isInternal = Some(true)))
+    val fn3 = getUnresolvedFunction(plan3)
+    assert(fn3.nameParts.head == "abcde")
+    assert(fn3.isInternal)
+  }
 }
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
index cad7fe6370827..1a86ced3a2ac9 100644
--- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GenericInt
 import org.apache.spark.sql.catalyst.plans.{FullOuter, Inner, LeftAnti, LeftOuter, LeftSemi, PlanTest, RightOuter}
 import org.apache.spark.sql.catalyst.plans.logical.{CollectMetrics, Distinct, LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
-import org.apache.spark.sql.connect.common.InvalidPlanInput
+import org.apache.spark.sql.connect.common.{InvalidCommandInput, InvalidPlanInput}
 import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProto
 import org.apache.spark.sql.connect.dsl.MockRemoteSession
 import org.apache.spark.sql.connect.dsl.commands._
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
index d6d137e6d91aa..5e88725691656 100644
--- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
@@ -919,7 +919,8 @@ class SparkConnectServiceSuite
   }
   class MockSparkListener() extends SparkListener {
     val semaphoreStarted = new Semaphore(0)
-    var executeHolder = Option.empty[ExecuteHolder]
+    // Accessed by multiple threads in parallel.
+    @volatile var executeHolder = Option.empty[ExecuteHolder]
     override def onOtherEvent(event: SparkListenerEvent): Unit = {
       event match {
         case e: SparkListenerConnectOperationStarted =>
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceE2ESuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceE2ESuite.scala
index f86298a8b5b98..f24560259a883 100644
--- a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceE2ESuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceE2ESuite.scala
@@ -245,4 +245,26 @@ class SparkConnectServiceE2ESuite extends SparkConnectServerTest {
       assert(queryError.getMessage.contains("INVALID_HANDLE.SESSION_CHANGED"))
     }
   }
+
+  test("Client is allowed to reconnect to released session if allow_reconnect is set") {
+    withRawBlockingStub { stub =>
+      val sessionId = UUID.randomUUID.toString()
+      val iter =
+        stub.executePlan(
+          buildExecutePlanRequest(
+            buildPlan("select * from range(1000000)"),
+            sessionId = sessionId))
+      iter.hasNext // guarantees the request was received by server.
+
+      stub.releaseSession(buildReleaseSessionRequest(sessionId, allowReconnect = true))
+
+      val iter2 =
+        stub.executePlan(
+          buildExecutePlanRequest(
+            buildPlan("select * from range(1000000)"),
+            sessionId = sessionId))
+      // guarantees the request was received by server. No exception should be thrown on reuse
+      iter2.hasNext
+    }
+  }
 }
diff --git a/sql/connect/shims/src/main/scala/org/apache/spark/shims.scala b/sql/connect/shims/src/main/scala/org/apache/spark/shims.scala
index ad8771a03b287..9c5fb515580a7 100644
--- a/sql/connect/shims/src/main/scala/org/apache/spark/shims.scala
+++ b/sql/connect/shims/src/main/scala/org/apache/spark/shims.scala
@@ -32,7 +32,6 @@ package rdd {
 package sql {
   class ExperimentalMethods
   class SparkSessionExtensions
-  class SQLContext
 
   package execution {
     class QueryExecution
diff --git a/sql/core/benchmarks/AggregateBenchmark-jdk21-results.txt b/sql/core/benchmarks/AggregateBenchmark-jdk21-results.txt
index 50a31e7e73bb1..0c14099f23b73 100644
--- a/sql/core/benchmarks/AggregateBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/AggregateBenchmark-jdk21-results.txt
@@ -2,147 +2,147 @@
 aggregate without grouping
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-agg w/o group wholestage off                      35098          35975        1240         59.8          16.7       1.0X
-agg w/o group wholestage on                        2835           2844           9        739.9           1.4      12.4X
+agg w/o group wholestage off                      35412          35848         616         59.2          16.9       1.0X
+agg w/o group wholestage on                        2849           2853           5        736.1           1.4      12.4X
 
 
 ================================================================================================
 stat functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-stddev wholestage off                              4102           4138          51         25.6          39.1       1.0X
-stddev wholestage on                                974            983           6        107.6           9.3       4.2X
+stddev wholestage off                              4533           4585          74         23.1          43.2       1.0X
+stddev wholestage on                                987            991           4        106.3           9.4       4.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-kurtosis wholestage off                           21188          21367         253          4.9         202.1       1.0X
-kurtosis wholestage on                              992            993           2        105.7           9.5      21.4X
+kurtosis wholestage off                           21221          21417         277          4.9         202.4       1.0X
+kurtosis wholestage on                              995           1000           4        105.4           9.5      21.3X
 
 
 ================================================================================================
 aggregate with linear keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        6757           6835         110         12.4          80.5       1.0X
-codegen = T, hashmap = F                           3850           4003         160         21.8          45.9       1.8X
-codegen = T, row-based hashmap = T                 1222           1238          15         68.6          14.6       5.5X
-codegen = T, vectorized hashmap = T                 804            814           9        104.3           9.6       8.4X
+codegen = F                                        6703           6748          64         12.5          79.9       1.0X
+codegen = T, hashmap = F                           3746           3777          36         22.4          44.7       1.8X
+codegen = T, row-based hashmap = T                 1226           1233           5         68.4          14.6       5.5X
+codegen = T, vectorized hashmap = T                 812            820          12        103.4           9.7       8.3X
 
 
 ================================================================================================
 aggregate with randomized keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        7331           7374          60         11.4          87.4       1.0X
-codegen = T, hashmap = F                           4664           4687          24         18.0          55.6       1.6X
-codegen = T, row-based hashmap = T                 1620           1627           7         51.8          19.3       4.5X
-codegen = T, vectorized hashmap = T                1113           1171          72         75.4          13.3       6.6X
+codegen = F                                        7454           7484          42         11.3          88.9       1.0X
+codegen = T, hashmap = F                           4485           4585          87         18.7          53.5       1.7X
+codegen = T, row-based hashmap = T                 1676           1685           9         50.1          20.0       4.4X
+codegen = T, vectorized hashmap = T                1061           1110          85         79.0          12.7       7.0X
 
 
 ================================================================================================
 aggregate with string key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2485           2510          35          8.4         118.5       1.0X
-codegen = T, hashmap = F                           1519           1529          10         13.8          72.4       1.6X
-codegen = T, row-based hashmap = T                  994           1010          16         21.1          47.4       2.5X
-codegen = T, vectorized hashmap = T                 804            815          11         26.1          38.3       3.1X
+codegen = F                                        2296           2326          43          9.1         109.5       1.0X
+codegen = T, hashmap = F                           1499           1512          12         14.0          71.5       1.5X
+codegen = T, row-based hashmap = T                  996           1013          21         21.1          47.5       2.3X
+codegen = T, vectorized hashmap = T                 798            801           2         26.3          38.1       2.9X
 
 
 ================================================================================================
 aggregate with decimal key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2018           2050          46         10.4          96.2       1.0X
-codegen = T, hashmap = F                           1305           1318          18         16.1          62.2       1.5X
-codegen = T, row-based hashmap = T                  499            505           6         42.0          23.8       4.0X
-codegen = T, vectorized hashmap = T                 313            317           4         67.0          14.9       6.4X
+codegen = F                                        2042           2096          76         10.3          97.4       1.0X
+codegen = T, hashmap = F                           1362           1374          18         15.4          64.9       1.5X
+codegen = T, row-based hashmap = T                  479            501          17         43.8          22.8       4.3X
+codegen = T, vectorized hashmap = T                 312            326          13         67.3          14.9       6.6X
 
 
 ================================================================================================
 aggregate with multiple key types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        4453           4457           6          4.7         212.3       1.0X
-codegen = T, hashmap = F                           2320           2333          20          9.0         110.6       1.9X
-codegen = T, row-based hashmap = T                 1821           1826           6         11.5          86.8       2.4X
-codegen = T, vectorized hashmap = T                1600           1652          74         13.1          76.3       2.8X
+codegen = F                                        4229           4241          16          5.0         201.7       1.0X
+codegen = T, hashmap = F                           2316           2320           6          9.1         110.4       1.8X
+codegen = T, row-based hashmap = T                 1819           1827          11         11.5          86.8       2.3X
+codegen = T, vectorized hashmap = T                1518           1519           1         13.8          72.4       2.8X
 
 
 ================================================================================================
 max function bytecode size of wholestagecodegen
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                         358            384          22          1.8         545.8       1.0X
-codegen = T, hugeMethodLimit = 10000                134            160          24          4.9         204.1       2.7X
-codegen = T, hugeMethodLimit = 1500                 129            145          16          5.1         196.1       2.8X
+codegen = F                                         368            389          23          1.8         561.7       1.0X
+codegen = T, hugeMethodLimit = 10000                143            162          16          4.6         218.9       2.6X
+codegen = T, hugeMethodLimit = 1500                 140            154          12          4.7         214.1       2.6X
 
 
 ================================================================================================
 cube
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cube wholestage off                                1962           1973          16          2.7         374.2       1.0X
-cube wholestage on                                 1054           1075          24          5.0         201.0       1.9X
+cube wholestage off                                2051           2074          33          2.6         391.2       1.0X
+cube wholestage on                                 1065           1078          10          4.9         203.1       1.9X
 
 
 ================================================================================================
 hash and BytesToBytesMap
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeRowhash                                       146            147           3        143.7           7.0       1.0X
-murmur3 hash                                         53             54           1        392.4           2.5       2.7X
-fast hash                                            24             24           0        887.7           1.1       6.2X
-arrayEqual                                          136            136           0        153.9           6.5       1.1X
-Java HashMap (Long)                                  62             72           8        338.3           3.0       2.4X
-Java HashMap (two ints)                              85             88           2        245.8           4.1       1.7X
-Java HashMap (UnsafeRow)                            492            495           2         42.6          23.5       0.3X
-LongToUnsafeRowMap (opt=false)                      350            354           3         59.9          16.7       0.4X
-LongToUnsafeRowMap (opt=true)                        79             82           5        263.9           3.8       1.8X
-BytesToBytesMap (off Heap)                          459            471          12         45.7          21.9       0.3X
-BytesToBytesMap (on Heap)                           466            468           2         45.0          22.2       0.3X
-Aggregate HashMap                                    30             30           2        697.8           1.4       4.9X
+UnsafeRowhash                                       146            147           1        143.2           7.0       1.0X
+murmur3 hash                                         54             55           4        390.7           2.6       2.7X
+fast hash                                            24             24           0        883.4           1.1       6.2X
+arrayEqual                                          137            137           0        153.2           6.5       1.1X
+Java HashMap (Long)                                  61             67           7        344.8           2.9       2.4X
+Java HashMap (two ints)                              76             79           2        275.1           3.6       1.9X
+Java HashMap (UnsafeRow)                            531            533           2         39.5          25.3       0.3X
+LongToUnsafeRowMap (opt=false)                      346            349           4         60.6          16.5       0.4X
+LongToUnsafeRowMap (opt=true)                        80             80           1        262.5           3.8       1.8X
+BytesToBytesMap (off Heap)                          440            443           4         47.6          21.0       0.3X
+BytesToBytesMap (on Heap)                           456            459           4         46.0          21.7       0.3X
+Aggregate HashMap                                    30             31           0        689.1           1.5       4.8X
 
 
diff --git a/sql/core/benchmarks/AggregateBenchmark-results.txt b/sql/core/benchmarks/AggregateBenchmark-results.txt
index f1118da89122d..73ca9abe75a6b 100644
--- a/sql/core/benchmarks/AggregateBenchmark-results.txt
+++ b/sql/core/benchmarks/AggregateBenchmark-results.txt
@@ -2,147 +2,147 @@
 aggregate without grouping
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-agg w/o group wholestage off                      37435          38685        1769         56.0          17.9       1.0X
-agg w/o group wholestage on                        3364           3369           3        623.4           1.6      11.1X
+agg w/o group wholestage off                      39769          40576        1142         52.7          19.0       1.0X
+agg w/o group wholestage on                        3381           3388          13        620.3           1.6      11.8X
 
 
 ================================================================================================
 stat functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-stddev wholestage off                              4461           4505          63         23.5          42.5       1.0X
-stddev wholestage on                                976            980           3        107.5           9.3       4.6X
+stddev wholestage off                              4522           4526           7         23.2          43.1       1.0X
+stddev wholestage on                                987            992           7        106.2           9.4       4.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-kurtosis wholestage off                           20698          20799         143          5.1         197.4       1.0X
-kurtosis wholestage on                              990            992           2        105.9           9.4      20.9X
+kurtosis wholestage off                           21070          21074           5          5.0         200.9       1.0X
+kurtosis wholestage on                              994            998           4        105.5           9.5      21.2X
 
 
 ================================================================================================
 aggregate with linear keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        6646           6677          43         12.6          79.2       1.0X
-codegen = T, hashmap = F                           4024           4116         118         20.8          48.0       1.7X
-codegen = T, row-based hashmap = T                 1240           1255          13         67.7          14.8       5.4X
-codegen = T, vectorized hashmap = T                 816            838          14        102.8           9.7       8.1X
+codegen = F                                        7074           7120          64         11.9          84.3       1.0X
+codegen = T, hashmap = F                           3968           4028          63         21.1          47.3       1.8X
+codegen = T, row-based hashmap = T                 1211           1214           3         69.3          14.4       5.8X
+codegen = T, vectorized hashmap = T                 827            846          15        101.5           9.9       8.6X
 
 
 ================================================================================================
 aggregate with randomized keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        7348           7361          18         11.4          87.6       1.0X
-codegen = T, hashmap = F                           4766           4799          30         17.6          56.8       1.5X
-codegen = T, row-based hashmap = T                 1712           1734          23         49.0          20.4       4.3X
-codegen = T, vectorized hashmap = T                1052           1057           5         79.7          12.5       7.0X
+codegen = F                                        7673           7686          18         10.9          91.5       1.0X
+codegen = T, hashmap = F                           4857           4875          20         17.3          57.9       1.6X
+codegen = T, row-based hashmap = T                 1702           1710          14         49.3          20.3       4.5X
+codegen = T, vectorized hashmap = T                1077           1115          48         77.9          12.8       7.1X
 
 
 ================================================================================================
 aggregate with string key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2303           2306           3          9.1         109.8       1.0X
-codegen = T, hashmap = F                           1467           1472           7         14.3          70.0       1.6X
-codegen = T, row-based hashmap = T                  989            998           9         21.2          47.2       2.3X
-codegen = T, vectorized hashmap = T                 794            799           4         26.4          37.9       2.9X
+codegen = F                                        2478           2506          40          8.5         118.2       1.0X
+codegen = T, hashmap = F                           1510           1517          10         13.9          72.0       1.6X
+codegen = T, row-based hashmap = T                 1008           1019          19         20.8          48.1       2.5X
+codegen = T, vectorized hashmap = T                 779            790           8         26.9          37.1       3.2X
 
 
 ================================================================================================
 aggregate with decimal key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2026           2039          18         10.4          96.6       1.0X
-codegen = T, hashmap = F                           1349           1352           4         15.5          64.3       1.5X
-codegen = T, row-based hashmap = T                  437            445           6         47.9          20.9       4.6X
-codegen = T, vectorized hashmap = T                 316            322           5         66.4          15.1       6.4X
+codegen = F                                        2118           2182          92          9.9         101.0       1.0X
+codegen = T, hashmap = F                           1307           1334          38         16.0          62.3       1.6X
+codegen = T, row-based hashmap = T                  433            436           2         48.4          20.7       4.9X
+codegen = T, vectorized hashmap = T                 320            327           7         65.6          15.2       6.6X
 
 
 ================================================================================================
 aggregate with multiple key types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        4168           4201          47          5.0         198.7       1.0X
-codegen = T, hashmap = F                           2412           2418           8          8.7         115.0       1.7X
-codegen = T, row-based hashmap = T                 1661           1663           2         12.6          79.2       2.5X
-codegen = T, vectorized hashmap = T                1606           1610           5         13.1          76.6       2.6X
+codegen = F                                        4200           4204           7          5.0         200.3       1.0X
+codegen = T, hashmap = F                           2346           2384          53          8.9         111.9       1.8X
+codegen = T, row-based hashmap = T                 1869           1875           9         11.2          89.1       2.2X
+codegen = T, vectorized hashmap = T                1608           1612           6         13.0          76.7       2.6X
 
 
 ================================================================================================
 max function bytecode size of wholestagecodegen
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                         385            401          10          1.7         587.4       1.0X
-codegen = T, hugeMethodLimit = 10000                141            157          12          4.7         214.5       2.7X
-codegen = T, hugeMethodLimit = 1500                 127            141          11          5.2         193.9       3.0X
+codegen = F                                         394            430          27          1.7         600.5       1.0X
+codegen = T, hugeMethodLimit = 10000                140            156          15          4.7         213.5       2.8X
+codegen = T, hugeMethodLimit = 1500                 136            144           6          4.8         207.1       2.9X
 
 
 ================================================================================================
 cube
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cube wholestage off                                1943           1948           7          2.7         370.6       1.0X
-cube wholestage on                                 1110           1130          17          4.7         211.7       1.8X
+cube wholestage off                                1971           2004          47          2.7         376.0       1.0X
+cube wholestage on                                 1129           1158          25          4.6         215.3       1.7X
 
 
 ================================================================================================
 hash and BytesToBytesMap
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeRowhash                                       203            204           2        103.4           9.7       1.0X
-murmur3 hash                                         68             69           1        308.6           3.2       3.0X
-fast hash                                            71             71           0        296.3           3.4       2.9X
-arrayEqual                                          144            145           1        145.7           6.9       1.4X
-Java HashMap (Long)                                  66             69           4        318.6           3.1       3.1X
-Java HashMap (two ints)                              80             84          10        263.7           3.8       2.5X
-Java HashMap (UnsafeRow)                            532            536           3         39.4          25.4       0.4X
-LongToUnsafeRowMap (opt=false)                      335            337           1         62.6          16.0       0.6X
-LongToUnsafeRowMap (opt=true)                        78             78           1        269.7           3.7       2.6X
-BytesToBytesMap (off Heap)                          484            487           3         43.4          23.1       0.4X
-BytesToBytesMap (on Heap)                           484            491           5         43.4          23.1       0.4X
-Aggregate HashMap                                    30             31           1        690.1           1.4       6.7X
+UnsafeRowhash                                       204            204           1        102.9           9.7       1.0X
+murmur3 hash                                         69             70           0        301.7           3.3       2.9X
+fast hash                                            71             72           1        294.3           3.4       2.9X
+arrayEqual                                          144            145           1        145.1           6.9       1.4X
+Java HashMap (Long)                                  66             69           5        319.8           3.1       3.1X
+Java HashMap (two ints)                              85             87           2        247.5           4.0       2.4X
+Java HashMap (UnsafeRow)                            547            554          12         38.3          26.1       0.4X
+LongToUnsafeRowMap (opt=false)                      347            348           1         60.4          16.6       0.6X
+LongToUnsafeRowMap (opt=true)                        74             74           1        285.3           3.5       2.8X
+BytesToBytesMap (off Heap)                          487            490           2         43.0          23.2       0.4X
+BytesToBytesMap (on Heap)                           511            514           2         41.0          24.4       0.4X
+Aggregate HashMap                                    30             30           0        703.2           1.4       6.8X
 
 
diff --git a/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk21-results.txt b/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk21-results.txt
index dd6aabd2695fd..5bbbe6c90a83d 100644
--- a/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk21-results.txt
@@ -1,28 +1,28 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 year month interval one column:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-year month interval one column enable radix           23157          23546         354          4.3         231.6       1.0X
-year month interval one column disable radix          33035          33049          14          3.0         330.3       0.7X
+year month interval one column enable radix           23762          24352         802          4.2         237.6       1.0X
+year month interval one column disable radix          33034          33062          30          3.0         330.3       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 year month interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-year month interval two columns enable radix           33726          33825          89          3.0         337.3       1.0X
-year month interval two columns disable radix          33759          34063         472          3.0         337.6       1.0X
+year month interval two columns enable radix           33122          33540         609          3.0         331.2       1.0X
+year month interval two columns disable radix          34128          34610         427          2.9         341.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 day time interval one columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval one columns enable radix           23123          23169          61          4.3         231.2       1.0X
-day time interval one columns disable radix          34121          34201          96          2.9         341.2       0.7X
+day time interval one columns enable radix           21937          22337         373          4.6         219.4       1.0X
+day time interval one columns disable radix          32194          32530         557          3.1         321.9       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 day time interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval two columns enable radix           35022          35250         212          2.9         350.2       1.0X
-day time interval two columns disable radix          35240          35498         224          2.8         352.4       1.0X
+day time interval two columns enable radix           33037          33149          97          3.0         330.4       1.0X
+day time interval two columns disable radix          32944          33036         103          3.0         329.4       1.0X
 
diff --git a/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt b/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt
index e8aadd025df2d..67acf452919e7 100644
--- a/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt
+++ b/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt
@@ -1,28 +1,28 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 year month interval one column:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-year month interval one column enable radix           22561          22685         121          4.4         225.6       1.0X
-year month interval one column disable radix          32247          32353         132          3.1         322.5       0.7X
+year month interval one column enable radix           22822          22918         117          4.4         228.2       1.0X
+year month interval one column disable radix          32739          33177         697          3.1         327.4       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 year month interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-year month interval two columns enable radix           33236          33446         207          3.0         332.4       1.0X
-year month interval two columns disable radix          34800          34873          63          2.9         348.0       1.0X
+year month interval two columns enable radix           33341          33730         549          3.0         333.4       1.0X
+year month interval two columns disable radix          33557          33961         565          3.0         335.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 day time interval one columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval one columns enable radix           21978          22020          66          4.5         219.8       1.0X
-day time interval one columns disable radix          33183          33211          38          3.0         331.8       0.7X
+day time interval one columns enable radix           21059          21193         133          4.7         210.6       1.0X
+day time interval one columns disable radix          32196          32724         660          3.1         322.0       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 day time interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval two columns enable radix           34526          34670         185          2.9         345.3       1.0X
-day time interval two columns disable radix          35632          35826         191          2.8         356.3       1.0X
+day time interval two columns enable radix           33887          34787         793          3.0         338.9       1.0X
+day time interval two columns disable radix          35163          35274         181          2.8         351.6       1.0X
 
diff --git a/sql/core/benchmarks/Base64Benchmark-jdk21-results.txt b/sql/core/benchmarks/Base64Benchmark-jdk21-results.txt
index 52092328fd576..ab6e5283dba89 100644
--- a/sql/core/benchmarks/Base64Benchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/Base64Benchmark-jdk21-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 encode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               1974           2002          47         10.1          98.7       1.0X
-apache                                            10784          10862          90          1.9         539.2       0.2X
+java                                               2144           2180          49          9.3         107.2       1.0X
+apache                                            11251          11315          62          1.8         562.6       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 encode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               2431           2445          12          8.2         121.6       1.0X
-apache                                            12049          12094          41          1.7         602.5       0.2X
+java                                               2446           2450           4          8.2         122.3       1.0X
+apache                                            12449          12535          76          1.6         622.4       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 encode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               2857           2867          14          7.0         142.8       1.0X
-apache                                            13281          13344          56          1.5         664.0       0.2X
+java                                               2873           2875           3          7.0         143.7       1.0X
+apache                                            13571          13613          41          1.5         678.5       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 encode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               2943           2973          50          6.8         147.1       1.0X
-apache                                            14384          14421          32          1.4         719.2       0.2X
+java                                               2910           2918           9          6.9         145.5       1.0X
+apache                                            14577          14593          25          1.4         728.9       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 decode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3435           3439           4          5.8         171.7       1.0X
-apache                                            12572          12615          40          1.6         628.6       0.3X
+java                                               3169           3171           2          6.3         158.5       1.0X
+apache                                            12500          12611         100          1.6         625.0       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 decode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               4040           4052          11          5.0         202.0       1.0X
-apache                                            14274          14363         120          1.4         713.7       0.3X
+java                                               4313           4314           2          4.6         215.7       1.0X
+apache                                            14491          14571          70          1.4         724.5       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 decode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               4756           4767          17          4.2         237.8       1.0X
-apache                                            16291          16304          20          1.2         814.6       0.3X
+java                                               4932           4951          24          4.1         246.6       1.0X
+apache                                            15728          15782          49          1.3         786.4       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 decode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               5200           5209           8          3.8         260.0       1.0X
-apache                                            17434          17540         101          1.1         871.7       0.3X
+java                                               5328           5330           2          3.8         266.4       1.0X
+apache                                            17182          17223          36          1.2         859.1       0.3X
 
diff --git a/sql/core/benchmarks/Base64Benchmark-results.txt b/sql/core/benchmarks/Base64Benchmark-results.txt
index 3e8d7e2727c34..12f54feeed1d5 100644
--- a/sql/core/benchmarks/Base64Benchmark-results.txt
+++ b/sql/core/benchmarks/Base64Benchmark-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 encode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               2287           2338          50          8.7         114.3       1.0X
-apache                                            10870          10993         126          1.8         543.5       0.2X
+java                                               2097           2144          64          9.5         104.9       1.0X
+apache                                            11350          11380          29          1.8         567.5       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 encode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               2930           2945          21          6.8         146.5       1.0X
-apache                                            12069          12172         108          1.7         603.4       0.2X
+java                                               2624           2631          11          7.6         131.2       1.0X
+apache                                            12395          12421          40          1.6         619.7       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 encode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3403           3410           7          5.9         170.1       1.0X
-apache                                            13236          13327         139          1.5         661.8       0.3X
+java                                               3196           3212          14          6.3         159.8       1.0X
+apache                                            13591          13708         167          1.5         679.6       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 encode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3914           3947          29          5.1         195.7       1.0X
-apache                                            14411          14441          27          1.4         720.6       0.3X
+java                                               3665           3679          12          5.5         183.3       1.0X
+apache                                            14907          14947          42          1.3         745.4       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 decode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3572           3580           9          5.6         178.6       1.0X
-apache                                            12652          12656           7          1.6         632.6       0.3X
+java                                               3319           3362          37          6.0         166.0       1.0X
+apache                                            12471          12532          64          1.6         623.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 decode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               4918           4919           2          4.1         245.9       1.0X
-apache                                            14579          14601          20          1.4         728.9       0.3X
+java                                               4068           4068           0          4.9         203.4       1.0X
+apache                                            14651          14708          53          1.4         732.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 decode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               5939           5949          10          3.4         296.9       1.0X
-apache                                            16626          16675          58          1.2         831.3       0.4X
+java                                               5456           5463          12          3.7         272.8       1.0X
+apache                                            16405          16460          69          1.2         820.2       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 decode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               6666           6672           5          3.0         333.3       1.0X
-apache                                            18901          18922          24          1.1         945.0       0.4X
+java                                               6111           6119           8          3.3         305.5       1.0X
+apache                                            17824          17959         117          1.1         891.2       0.3X
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk21-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-jdk21-results.txt
index 5cf56352fa761..2d1b73ac4e241 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk21-results.txt
@@ -2,195 +2,195 @@
 ORC Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               8070           8132          88         12.4          80.7       1.0X
-With bloom filter                                 10025          10082          81         10.0         100.2       0.8X
+Without bloom filter                               7949           7971          31         12.6          79.5       1.0X
+With bloom filter                                  9864           9897          47         10.1          98.6       0.8X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            882            890           7        113.4           8.8       1.0X
-With bloom filter, blocksize: 2097152               567            577          10        176.4           5.7       1.6X
+Without bloom filter, blocksize: 2097152            895            941          40        111.7           9.0       1.0X
+With bloom filter, blocksize: 2097152               838            876          34        119.3           8.4       1.1X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            810            836          22        123.4           8.1       1.0X
-With bloom filter, blocksize: 4194304               550            568          22        181.8           5.5       1.5X
+Without bloom filter, blocksize: 4194304           1345           1348           4         74.3          13.5       1.0X
+With bloom filter, blocksize: 4194304               835            842           6        119.7           8.4       1.6X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            823            836          11        121.5           8.2       1.0X
-With bloom filter, blocksize: 6291456               540            563          17        185.3           5.4       1.5X
+Without bloom filter, blocksize: 6291456           1344           1351          11         74.4          13.4       1.0X
+With bloom filter, blocksize: 6291456               814            827          12        122.8           8.1       1.7X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            797            821          21        125.5           8.0       1.0X
-With bloom filter, blocksize: 8388608               533            553          23        187.5           5.3       1.5X
+Without bloom filter, blocksize: 8388608           1352           1363          16         74.0          13.5       1.0X
+With bloom filter, blocksize: 8388608               812            819          11        123.1           8.1       1.7X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            859            876          15        116.4           8.6       1.0X
-With bloom filter, blocksize: 12582912               545            576          22        183.4           5.5       1.6X
+Without bloom filter, blocksize: 12582912           1347           1356          13         74.3          13.5       1.0X
+With bloom filter, blocksize: 12582912               816            830          20        122.6           8.2       1.7X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            810            841          26        123.4           8.1       1.0X
-With bloom filter, blocksize: 16777216               554            575          15        180.5           5.5       1.5X
+Without bloom filter, blocksize: 16777216           1322           1322           0         75.7          13.2       1.0X
+With bloom filter, blocksize: 16777216               793            803          10        126.1           7.9       1.7X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            845            852           7        118.4           8.4       1.0X
-With bloom filter, blocksize: 33554432               545            564          16        183.4           5.5       1.5X
+Without bloom filter, blocksize: 33554432           1315           1324          12         76.0          13.2       1.0X
+With bloom filter, blocksize: 33554432               790            810          32        126.6           7.9       1.7X
 
 
 ================================================================================================
 Parquet Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write 100M rows:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                                 12141          12156          21          8.2         121.4       1.0X
-With bloom filter                                    21175          21296         172          4.7         211.7       0.6X
-With adaptive bloom filter & 3 candidates            20846          20897          71          4.8         208.5       0.6X
-With adaptive bloom filter & 5 candidates            20731          20989         365          4.8         207.3       0.6X
-With adaptive bloom filter & 9 candidates            23208          23264          79          4.3         232.1       0.5X
-With adaptive bloom filter & 15 candidates           23293          23349          78          4.3         232.9       0.5X
+Without bloom filter                                 10145          10239         133          9.9         101.4       1.0X
+With bloom filter                                    21381          21403          32          4.7         213.8       0.5X
+With adaptive bloom filter & 3 candidates            21248          21357         154          4.7         212.5       0.5X
+With adaptive bloom filter & 5 candidates            21353          21396          60          4.7         213.5       0.5X
+With adaptive bloom filter & 9 candidates            21141          21175          48          4.7         211.4       0.5X
+With adaptive bloom filter & 15 candidates           21062          21121          83          4.7         210.6       0.5X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            451            502          37        221.9           4.5       1.0X
-With bloom filter, blocksize: 2097152               174            186          12        573.8           1.7       2.6X
+Without bloom filter, blocksize: 2097152            426            455          27        234.8           4.3       1.0X
+With bloom filter, blocksize: 2097152               182            188           6        550.1           1.8       2.3X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            404            409           4        247.6           4.0       1.0X
-With bloom filter, blocksize: 4194304               139            150           7        719.2           1.4       2.9X
+Without bloom filter, blocksize: 4194304            406            416           9        246.5           4.1       1.0X
+With bloom filter, blocksize: 4194304               129            135           4        772.6           1.3       3.1X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            416            423           7        240.5           4.2       1.0X
-With bloom filter, blocksize: 6291456               141            152          10        709.9           1.4       3.0X
+Without bloom filter, blocksize: 6291456            405            409           3        247.0           4.0       1.0X
+With bloom filter, blocksize: 6291456               133            142           7        749.6           1.3       3.0X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            419            432          10        238.6           4.2       1.0X
-With bloom filter, blocksize: 8388608               210            223           7        476.2           2.1       2.0X
+Without bloom filter, blocksize: 8388608            413            423           9        242.3           4.1       1.0X
+With bloom filter, blocksize: 8388608               162            169           5        616.2           1.6       2.5X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            422            430           9        236.8           4.2       1.0X
-With bloom filter, blocksize: 12582912               325            330           4        307.2           3.3       1.3X
+Without bloom filter, blocksize: 12582912            419            436          24        238.5           4.2       1.0X
+With bloom filter, blocksize: 12582912               345            355           7        289.7           3.5       1.2X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            420            436          22        238.3           4.2       1.0X
-With bloom filter, blocksize: 16777216               398            428          29        251.2           4.0       1.1X
+Without bloom filter, blocksize: 16777216            455            469          12        219.8           4.5       1.0X
+With bloom filter, blocksize: 16777216               353            425          44        283.5           3.5       1.3X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            428            439           9        233.5           4.3       1.0X
-With bloom filter, blocksize: 33554432               430            441          15        232.4           4.3       1.0X
+Without bloom filter, blocksize: 33554432            448            459           7        223.4           4.5       1.0X
+With bloom filter, blocksize: 33554432               419            429           8        238.5           4.2       1.1X
 
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
index 286df98479f97..f01ad4e47f807 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
@@ -2,195 +2,195 @@
 ORC Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               8021           8137         165         12.5          80.2       1.0X
-With bloom filter                                 10132          10186          76          9.9         101.3       0.8X
+Without bloom filter                               7507           7623         165         13.3          75.1       1.0X
+With bloom filter                                  9512           9543          45         10.5          95.1       0.8X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            876            940          61        114.2           8.8       1.0X
-With bloom filter, blocksize: 2097152               588            618          21        169.9           5.9       1.5X
+Without bloom filter, blocksize: 2097152            867            880          11        115.3           8.7       1.0X
+With bloom filter, blocksize: 2097152               604            641          38        165.5           6.0       1.4X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            837            839           2        119.4           8.4       1.0X
-With bloom filter, blocksize: 4194304               579            601          34        172.7           5.8       1.4X
+Without bloom filter, blocksize: 4194304            823            839          21        121.5           8.2       1.0X
+With bloom filter, blocksize: 4194304               558            564           8        179.1           5.6       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            787            797           9        127.0           7.9       1.0X
-With bloom filter, blocksize: 6291456               532            548          12        188.1           5.3       1.5X
+Without bloom filter, blocksize: 6291456            810            813           3        123.5           8.1       1.0X
+With bloom filter, blocksize: 6291456               524            550          20        190.7           5.2       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            796            799           4        125.7           8.0       1.0X
-With bloom filter, blocksize: 8388608               534            548          10        187.1           5.3       1.5X
+Without bloom filter, blocksize: 8388608            798            803           7        125.4           8.0       1.0X
+With bloom filter, blocksize: 8388608               560            604          32        178.7           5.6       1.4X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            836            839           3        119.7           8.4       1.0X
-With bloom filter, blocksize: 12582912               517            544          19        193.4           5.2       1.6X
+Without bloom filter, blocksize: 12582912            837            843           8        119.5           8.4       1.0X
+With bloom filter, blocksize: 12582912               537            548          10        186.1           5.4       1.6X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            793            796           4        126.1           7.9       1.0X
-With bloom filter, blocksize: 16777216               570            574           5        175.3           5.7       1.4X
+Without bloom filter, blocksize: 16777216            804            806           4        124.5           8.0       1.0X
+With bloom filter, blocksize: 16777216               514            526           9        194.6           5.1       1.6X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            784            794          12        127.5           7.8       1.0X
-With bloom filter, blocksize: 33554432               565            587          27        177.1           5.6       1.4X
+Without bloom filter, blocksize: 33554432            801            807           7        124.8           8.0       1.0X
+With bloom filter, blocksize: 33554432               520            551          28        192.5           5.2       1.5X
 
 
 ================================================================================================
 Parquet Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write 100M rows:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                                 11173          11180          11          9.0         111.7       1.0X
-With bloom filter                                    19387          19485         138          5.2         193.9       0.6X
-With adaptive bloom filter & 3 candidates            19252          19395         202          5.2         192.5       0.6X
-With adaptive bloom filter & 5 candidates            19204          19337         188          5.2         192.0       0.6X
-With adaptive bloom filter & 9 candidates            19267          19380         160          5.2         192.7       0.6X
-With adaptive bloom filter & 15 candidates           19144          19184          57          5.2         191.4       0.6X
+Without bloom filter                                 10073          10250         250          9.9         100.7       1.0X
+With bloom filter                                    13981          14127         206          7.2         139.8       0.7X
+With adaptive bloom filter & 3 candidates            13992          14059          94          7.1         139.9       0.7X
+With adaptive bloom filter & 5 candidates            14691          14804         160          6.8         146.9       0.7X
+With adaptive bloom filter & 9 candidates            14634          14805         242          6.8         146.3       0.7X
+With adaptive bloom filter & 15 candidates           14698          14727          41          6.8         147.0       0.7X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            447            476          24        223.6           4.5       1.0X
-With bloom filter, blocksize: 2097152               177            185           5        565.6           1.8       2.5X
+Without bloom filter, blocksize: 2097152            438            457          17        228.1           4.4       1.0X
+With bloom filter, blocksize: 2097152               173            182           9        576.9           1.7       2.5X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            424            440          14        236.0           4.2       1.0X
-With bloom filter, blocksize: 4194304               127            135           7        790.4           1.3       3.3X
+Without bloom filter, blocksize: 4194304            416            423           7        240.4           4.2       1.0X
+With bloom filter, blocksize: 4194304               116            124           7        864.6           1.2       3.6X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            423            439          16        236.2           4.2       1.0X
-With bloom filter, blocksize: 6291456               130            139           9        768.6           1.3       3.3X
+Without bloom filter, blocksize: 6291456            412            421          13        242.9           4.1       1.0X
+With bloom filter, blocksize: 6291456               145            153           6        687.4           1.5       2.8X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            426            435           7        235.0           4.3       1.0X
-With bloom filter, blocksize: 8388608               204            214           6        489.3           2.0       2.1X
+Without bloom filter, blocksize: 8388608            417            423           5        240.0           4.2       1.0X
+With bloom filter, blocksize: 8388608               158            164           5        634.4           1.6       2.6X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            426            447          23        234.5           4.3       1.0X
-With bloom filter, blocksize: 12582912               295            306           8        339.2           2.9       1.4X
+Without bloom filter, blocksize: 12582912            413            414           2        242.4           4.1       1.0X
+With bloom filter, blocksize: 12582912               312            318           5        320.5           3.1       1.3X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            427            441           9        234.0           4.3       1.0X
-With bloom filter, blocksize: 16777216               372            392          12        268.5           3.7       1.1X
+Without bloom filter, blocksize: 16777216            418            424           4        239.1           4.2       1.0X
+With bloom filter, blocksize: 16777216               368            417          62        271.5           3.7       1.1X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            508            524          14        197.0           5.1       1.0X
-With bloom filter, blocksize: 33554432               439            463          31        227.7           4.4       1.2X
+Without bloom filter, blocksize: 33554432            485            503          21        206.3           4.8       1.0X
+With bloom filter, blocksize: 33554432               429            457          41        233.0           4.3       1.1X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt
index ac33c0edbcd24..3bb4debe8d59f 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt
@@ -2,69 +2,69 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet(PARQUET_1_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1630           1688          82          9.7         103.6       1.0X
-Output Single Double Column                        1848           1854          10          8.5         117.5       0.9X
-Output Int and String Column                       4604           4635          44          3.4         292.7       0.4X
-Output Partitions                                  3399           3432          46          4.6         216.1       0.5X
-Output Buckets                                     4919           4925           9          3.2         312.7       0.3X
+Output Single Int Column                           1793           1815          30          8.8         114.0       1.0X
+Output Single Double Column                        1935           2005          99          8.1         123.0       0.9X
+Output Int and String Column                       4319           4531         299          3.6         274.6       0.4X
+Output Partitions                                  3240           3261          29          4.9         206.0       0.6X
+Output Buckets                                     4415           4427          17          3.6         280.7       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet(PARQUET_2_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1917           1930          19          8.2         121.9       1.0X
-Output Single Double Column                        1739           1765          35          9.0         110.6       1.1X
-Output Int and String Column                       5231           5240          13          3.0         332.6       0.4X
-Output Partitions                                  3531           3537           9          4.5         224.5       0.5X
-Output Buckets                                     4815           4816           1          3.3         306.1       0.4X
+Output Single Int Column                           2013           2013           1          7.8         128.0       1.0X
+Output Single Double Column                        1892           1899           9          8.3         120.3       1.1X
+Output Int and String Column                       5133           5137           6          3.1         326.3       0.4X
+Output Partitions                                  3584           3586           3          4.4         227.8       0.6X
+Output Buckets                                     4639           4641           4          3.4         294.9       0.4X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1145           1150           8         13.7          72.8       1.0X
-Output Single Double Column                        1775           1788          18          8.9         112.8       0.6X
-Output Int and String Column                       4092           4104          17          3.8         260.2       0.3X
-Output Partitions                                  2516           2532          22          6.3         160.0       0.5X
-Output Buckets                                     3555           3574          26          4.4         226.0       0.3X
+Output Single Int Column                           1036           1039           4         15.2          65.9       1.0X
+Output Single Double Column                        1709           1719          14          9.2         108.6       0.6X
+Output Int and String Column                       3780           3818          54          4.2         240.3       0.3X
+Output Partitions                                  2598           2600           3          6.1         165.2       0.4X
+Output Buckets                                     3551           3566          21          4.4         225.8       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1710           1721          15          9.2         108.7       1.0X
-Output Single Double Column                        2405           2421          22          6.5         152.9       0.7X
-Output Int and String Column                       4262           4274          18          3.7         271.0       0.4X
-Output Partitions                                  3190           3211          30          4.9         202.8       0.5X
-Output Buckets                                     4134           4160          36          3.8         262.8       0.4X
+Output Single Int Column                           1586           1590           6          9.9         100.9       1.0X
+Output Single Double Column                        2260           2270          13          7.0         143.7       0.7X
+Output Int and String Column                       4163           4179          23          3.8         264.7       0.4X
+Output Partitions                                  3109           3131          31          5.1         197.7       0.5X
+Output Buckets                                     4002           4011          12          3.9         254.5       0.4X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3871           3895          34          4.1         246.1       1.0X
-Output Single Double Column                        4379           4382           4          3.6         278.4       0.9X
-Output Int and String Column                       6820           6835          21          2.3         433.6       0.6X
-Output Partitions                                  5555           5573          26          2.8         353.2       0.7X
-Output Buckets                                     6679           6696          24          2.4         424.6       0.6X
+Output Single Int Column                           3556           3577          28          4.4         226.1       1.0X
+Output Single Double Column                        4048           4062          21          3.9         257.3       0.9X
+Output Int and String Column                       6714           6719           7          2.3         426.9       0.5X
+Output Partitions                                  5340           5353          19          2.9         339.5       0.7X
+Output Buckets                                     6447           6466          26          2.4         409.9       0.6X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
index 56c83e0940856..5c3d4bad772a9 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
@@ -2,69 +2,69 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet(PARQUET_1_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1736           1765          40          9.1         110.4       1.0X
-Output Single Double Column                        1840           1879          56          8.6         117.0       0.9X
-Output Int and String Column                       4395           4435          57          3.6         279.4       0.4X
-Output Partitions                                  3279           3373         132          4.8         208.5       0.5X
-Output Buckets                                     4598           4602           6          3.4         292.3       0.4X
+Output Single Int Column                           1738           1772          48          9.1         110.5       1.0X
+Output Single Double Column                        1821           1838          25          8.6         115.8       1.0X
+Output Int and String Column                       4749           4776          39          3.3         301.9       0.4X
+Output Partitions                                  3238           3272          48          4.9         205.9       0.5X
+Output Buckets                                     4428           4461          46          3.6         281.5       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet(PARQUET_2_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1790           1801          15          8.8         113.8       1.0X
-Output Single Double Column                        1857           1868          17          8.5         118.0       1.0X
-Output Int and String Column                       4717           4735          26          3.3         299.9       0.4X
-Output Partitions                                  3187           3212          35          4.9         202.6       0.6X
-Output Buckets                                     4353           4358           6          3.6         276.8       0.4X
+Output Single Int Column                           1961           1992          44          8.0         124.7       1.0X
+Output Single Double Column                        1876           1878           2          8.4         119.3       1.0X
+Output Int and String Column                       4831           4837           8          3.3         307.2       0.4X
+Output Partitions                                  3430           3431           2          4.6         218.1       0.6X
+Output Buckets                                     4226           4231           6          3.7         268.7       0.5X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                            941            953          20         16.7          59.8       1.0X
-Output Single Double Column                        1563           1569           8         10.1          99.4       0.6X
-Output Int and String Column                       3838           3868          43          4.1         244.0       0.2X
-Output Partitions                                  2514           2542          40          6.3         159.8       0.4X
-Output Buckets                                     3554           3555           2          4.4         225.9       0.3X
+Output Single Int Column                           1076           1084          12         14.6          68.4       1.0X
+Output Single Double Column                        1808           1811           4          8.7         114.9       0.6X
+Output Int and String Column                       3981           4036          78          4.0         253.1       0.3X
+Output Partitions                                  2561           2602          59          6.1         162.8       0.4X
+Output Buckets                                     3521           3557          51          4.5         223.9       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1606           1613          10          9.8         102.1       1.0X
-Output Single Double Column                        2245           2257          17          7.0         142.7       0.7X
-Output Int and String Column                       3818           3837          26          4.1         242.7       0.4X
-Output Partitions                                  3154           3181          38          5.0         200.5       0.5X
-Output Buckets                                     4123           4132          12          3.8         262.1       0.4X
+Output Single Int Column                           1624           1643          27          9.7         103.2       1.0X
+Output Single Double Column                        2272           2275           4          6.9         144.5       0.7X
+Output Int and String Column                       3996           4048          73          3.9         254.1       0.4X
+Output Partitions                                  3045           3054          13          5.2         193.6       0.5X
+Output Buckets                                     3899           3903           6          4.0         247.9       0.4X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3260           3286          36          4.8         207.3       1.0X
-Output Single Double Column                        4065           4076          15          3.9         258.4       0.8X
-Output Int and String Column                       6295           6310          21          2.5         400.2       0.5X
-Output Partitions                                  5151           5177          37          3.1         327.5       0.6X
-Output Buckets                                     6173           6209          51          2.5         392.5       0.5X
+Output Single Int Column                           3445           3446           2          4.6         219.0       1.0X
+Output Single Double Column                        3674           3717          61          4.3         233.6       0.9X
+Output Int and String Column                       6085           6090           7          2.6         386.9       0.6X
+Output Partitions                                  5107           5123          22          3.1         324.7       0.7X
+Output Buckets                                     6098           6123          35          2.6         387.7       0.6X
 
 
diff --git a/sql/core/benchmarks/ByteArrayBenchmark-jdk21-results.txt b/sql/core/benchmarks/ByteArrayBenchmark-jdk21-results.txt
index c650aa1efbb32..9320ff01601c4 100644
--- a/sql/core/benchmarks/ByteArrayBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ByteArrayBenchmark-jdk21-results.txt
@@ -2,26 +2,26 @@
 byte array comparisons
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Byte Array compareTo:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-2-7 byte                                            254            257           1        257.8           3.9       1.0X
-8-16 byte                                           409            437          37        160.3           6.2       0.6X
-16-32 byte                                          415            416           1        158.0           6.3       0.6X
-512-1024 byte                                       540            542           1        121.3           8.2       0.5X
-512 byte slow                                      1524           1553          23         43.0          23.3       0.2X
-2-7 byte                                            313            314           1        209.5           4.8       0.8X
+2-7 byte                                            256            259           2        256.2           3.9       1.0X
+8-16 byte                                           412            438          34        159.1           6.3       0.6X
+16-32 byte                                          409            411           3        160.1           6.2       0.6X
+512-1024 byte                                       544            546           1        120.6           8.3       0.5X
+512 byte slow                                      1543           1570          23         42.5          23.5       0.2X
+2-7 byte                                            315            316           1        208.2           4.8       0.8X
 
 
 ================================================================================================
 byte array equals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Byte Array equals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Byte Array equals                                   538            541           8        297.6           3.4       1.0X
+Byte Array equals                                   548            551           1        291.9           3.4       1.0X
 
 
diff --git a/sql/core/benchmarks/ByteArrayBenchmark-results.txt b/sql/core/benchmarks/ByteArrayBenchmark-results.txt
index 723af23b06a3f..d76d86ce54c31 100644
--- a/sql/core/benchmarks/ByteArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/ByteArrayBenchmark-results.txt
@@ -2,26 +2,26 @@
 byte array comparisons
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Byte Array compareTo:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-2-7 byte                                            258            259           1        254.2           3.9       1.0X
-8-16 byte                                           392            402          11        167.4           6.0       0.7X
-16-32 byte                                          396            398           1        165.4           6.0       0.7X
-512-1024 byte                                       519            523           2        126.4           7.9       0.5X
-512 byte slow                                      3255           3273          13         20.1          49.7       0.1X
-2-7 byte                                            249            250           1        263.0           3.8       1.0X
+2-7 byte                                            259            264           3        253.3           3.9       1.0X
+8-16 byte                                           427            462          23        153.5           6.5       0.6X
+16-32 byte                                          485            486           1        135.3           7.4       0.5X
+512-1024 byte                                       609            612           3        107.7           9.3       0.4X
+512 byte slow                                      1485           1518          30         44.1          22.7       0.2X
+2-7 byte                                            299            301           1        218.8           4.6       0.9X
 
 
 ================================================================================================
 byte array equals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Byte Array equals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Byte Array equals                                   555            556           1        288.3           3.5       1.0X
+Byte Array equals                                   550            552           3        290.8           3.4       1.0X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-jdk21-results.txt b/sql/core/benchmarks/CSVBenchmark-jdk21-results.txt
index cc0b3cdaffd11..8ef55135b58f9 100644
--- a/sql/core/benchmarks/CSVBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-jdk21-results.txt
@@ -2,76 +2,76 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 25656          25710          55          0.0      513115.4       1.0X
+One quoted string                                 24592          24650          50          0.0      491842.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                               59317          59851         631          0.0       59316.9       1.0X
-Select 100 columns                                22419          22524         133          0.0       22419.0       2.6X
-Select one column                                 18736          18821          95          0.1       18736.0       3.2X
-count()                                            4289           4377          88          0.2        4289.5      13.8X
-Select 100 columns, one bad input field           27081          27108          26          0.0       27080.9       2.2X
-Select 100 columns, corrupt record field          30668          30949         319          0.0       30668.3       1.9X
+Select 1000 columns                               58745          59095         571          0.0       58745.1       1.0X
+Select 100 columns                                21111          21163          55          0.0       21111.4       2.8X
+Select one column                                 17328          17405         112          0.1       17328.0       3.4X
+count()                                            3655           4076         698          0.3        3654.7      16.1X
+Select 100 columns, one bad input field           25285          25302          16          0.0       25284.7       2.3X
+Select 100 columns, corrupt record field          28990          29050          52          0.0       28989.8       2.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       10795          10819          21          0.9        1079.5       1.0X
-Select 1 column + count()                          7409           7416           8          1.3         740.9       1.5X
-count()                                            1712           1714           1          5.8         171.2       6.3X
+Select 10 columns + count()                       11213          11266          46          0.9        1121.3       1.0X
+Select 1 column + count()                          7751           7770          18          1.3         775.1       1.4X
+count()                                            1657           1659           2          6.0         165.7       6.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      859            861           2         11.6          85.9       1.0X
-to_csv(timestamp)                                  6073           6115          62          1.6         607.3       0.1X
-write timestamps to files                          6478           6487           7          1.5         647.8       0.1X
-Create a dataset of dates                           974            981          11         10.3          97.4       0.9X
-to_csv(date)                                       4516           4523           9          2.2         451.6       0.2X
-write dates to files                               4714           4723           9          2.1         471.4       0.2X
+Create a dataset of timestamps                      877            888          17         11.4          87.7       1.0X
+to_csv(timestamp)                                  5444           5471          34          1.8         544.4       0.2X
+write timestamps to files                          6094           6122          40          1.6         609.4       0.1X
+Create a dataset of dates                          1067           1076          14          9.4         106.7       0.8X
+to_csv(date)                                       4115           4127          21          2.4         411.5       0.2X
+write dates to files                               4389           4456          59          2.3         438.9       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read dates and timestamps:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                                                  1167           1177          11          8.6         116.7       1.0X
-read timestamps from files                                                      9490           9517          29          1.1         949.0       0.1X
-infer timestamps from files                                                    19176          19254         112          0.5        1917.6       0.1X
-read date text from files                                                       1133           1149          23          8.8         113.3       1.0X
-read date from files                                                            8327           8344          30          1.2         832.7       0.1X
-infer date from files                                                          17583          17672          77          0.6        1758.3       0.1X
-timestamp strings                                                               1310           1318           7          7.6         131.0       0.9X
-parse timestamps from Dataset[String]                                          11767          11853          85          0.8        1176.7       0.1X
-infer timestamps from Dataset[String]                                          21178          21486         268          0.5        2117.8       0.1X
-date strings                                                                    1602           1610           8          6.2         160.2       0.7X
-parse dates from Dataset[String]                                               10041          10114         112          1.0        1004.1       0.1X
-from_csv(timestamp)                                                            10377          10493         115          1.0        1037.7       0.1X
-from_csv(date)                                                                  9618           9622           3          1.0         961.8       0.1X
-infer error timestamps from Dataset[String] with default format                11925          11968          40          0.8        1192.5       0.1X
-infer error timestamps from Dataset[String] with user-provided format          11724          11807          72          0.9        1172.4       0.1X
-infer error timestamps from Dataset[String] with legacy format                 11781          11879          86          0.8        1178.1       0.1X
+read timestamp text from files                                                  1210           1214           4          8.3         121.0       1.0X
+read timestamps from files                                                     12528          12534           9          0.8        1252.8       0.1X
+infer timestamps from files                                                    24564          24614          48          0.4        2456.4       0.0X
+read date text from files                                                       1120           1125           6          8.9         112.0       1.1X
+read date from files                                                           11502          11540          35          0.9        1150.2       0.1X
+infer date from files                                                          23415          23704         263          0.4        2341.5       0.1X
+timestamp strings                                                               1205           1208           3          8.3         120.5       1.0X
+parse timestamps from Dataset[String]                                          13589          13639          48          0.7        1358.9       0.1X
+infer timestamps from Dataset[String]                                          25468          25568         115          0.4        2546.8       0.0X
+date strings                                                                    1561           1565           4          6.4         156.1       0.8X
+parse dates from Dataset[String]                                               12235          12255          25          0.8        1223.5       0.1X
+from_csv(timestamp)                                                            11514          11596          71          0.9        1151.4       0.1X
+from_csv(date)                                                                 10604          10621          26          0.9        1060.4       0.1X
+infer error timestamps from Dataset[String] with default format                14746          14761          14          0.7        1474.6       0.1X
+infer error timestamps from Dataset[String] with user-provided format          14743          14787          56          0.7        1474.3       0.1X
+infer error timestamps from Dataset[String] with legacy format                 14690          14731          56          0.7        1469.0       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        4681           4704          32          0.0       46811.8       1.0X
-pushdown disabled                                  4660           4679          28          0.0       46601.3       1.0X
-w/ filters                                          762            778          16          0.1        7623.6       6.1X
+w/o filters                                        4813           4844          53          0.0       48133.6       1.0X
+pushdown disabled                                  4615           4624          16          0.0       46145.3       1.0X
+w/ filters                                          804            806           3          0.1        8035.4       6.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Interval:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Read as Intervals                                   781            785           7          0.4        2602.2       1.0X
-Read Raw Strings                                    291            294           3          1.0         969.3       2.7X
+Read as Intervals                                   815            816           1          0.4        2716.7       1.0X
+Read Raw Strings                                    331            337           6          0.9        1104.2       2.5X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-results.txt b/sql/core/benchmarks/CSVBenchmark-results.txt
index 5626bbfb08fbd..e96f233c77793 100644
--- a/sql/core/benchmarks/CSVBenchmark-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-results.txt
@@ -2,76 +2,76 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 25766          25929         155          0.0      515313.0       1.0X
+One quoted string                                 24513          24556          44          0.0      490253.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                               51465          51922         628          0.0       51465.3       1.0X
-Select 100 columns                                21796          21940         152          0.0       21796.0       2.4X
-Select one column                                 18651          18703          52          0.1       18651.2       2.8X
-count()                                            3342           3448         103          0.3        3341.9      15.4X
-Select 100 columns, one bad input field           27416          27481          60          0.0       27416.2       1.9X
-Select 100 columns, corrupt record field          30540          30699         138          0.0       30539.8       1.7X
+Select 1000 columns                               55661          56102         621          0.0       55661.4       1.0X
+Select 100 columns                                21761          22015         246          0.0       21761.5       2.6X
+Select one column                                 18450          18504          72          0.1       18449.8       3.0X
+count()                                            3329           3412          72          0.3        3329.0      16.7X
+Select 100 columns, one bad input field           27253          27287          48          0.0       27252.5       2.0X
+Select 100 columns, corrupt record field          30624          30679          90          0.0       30624.5       1.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                        9495           9525          26          1.1         949.5       1.0X
-Select 1 column + count()                          6922           6961          52          1.4         692.2       1.4X
-count()                                            1742           1752           9          5.7         174.2       5.5X
+Select 10 columns + count()                        9849           9871          24          1.0         984.9       1.0X
+Select 1 column + count()                          7065           7075          15          1.4         706.5       1.4X
+count()                                            1738           1741           4          5.8         173.8       5.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      912            958          65         11.0          91.2       1.0X
-to_csv(timestamp)                                  7089           7112          31          1.4         708.9       0.1X
-write timestamps to files                          7242           7267          22          1.4         724.2       0.1X
-Create a dataset of dates                          1157           1185          38          8.6         115.7       0.8X
-to_csv(date)                                       5034           5080          65          2.0         503.4       0.2X
-write dates to files                               5089           5107          29          2.0         508.9       0.2X
+Create a dataset of timestamps                      821            825           5         12.2          82.1       1.0X
+to_csv(timestamp)                                  6711           6729          20          1.5         671.1       0.1X
+write timestamps to files                          6843           6858          25          1.5         684.3       0.1X
+Create a dataset of dates                           939            942           3         10.6          93.9       0.9X
+to_csv(date)                                       4684           4697          21          2.1         468.4       0.2X
+write dates to files                               4479           4495          13          2.2         447.9       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read dates and timestamps:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                                                  1228           1233           4          8.1         122.8       1.0X
-read timestamps from files                                                     10598          10626          30          0.9        1059.8       0.1X
-infer timestamps from files                                                    21159          21181          19          0.5        2115.9       0.1X
-read date text from files                                                       1148           1151           3          8.7         114.8       1.1X
-read date from files                                                           10147          10180          35          1.0        1014.7       0.1X
-infer date from files                                                          21078          21110          47          0.5        2107.8       0.1X
-timestamp strings                                                               1354           1366          21          7.4         135.4       0.9X
-parse timestamps from Dataset[String]                                          12127          12153          23          0.8        1212.7       0.1X
-infer timestamps from Dataset[String]                                          22539          22566          27          0.4        2253.9       0.1X
-date strings                                                                    1857           1862           5          5.4         185.7       0.7X
-parse dates from Dataset[String]                                               11906          11931          30          0.8        1190.6       0.1X
-from_csv(timestamp)                                                            10716          10744          37          0.9        1071.6       0.1X
-from_csv(date)                                                                 11123          11140          15          0.9        1112.3       0.1X
-infer error timestamps from Dataset[String] with default format                12274          12281           9          0.8        1227.4       0.1X
-infer error timestamps from Dataset[String] with user-provided format          12281          12304          26          0.8        1228.1       0.1X
-infer error timestamps from Dataset[String] with legacy format                 12300          12307           9          0.8        1230.0       0.1X
+read timestamp text from files                                                  1190           1197           6          8.4         119.0       1.0X
+read timestamps from files                                                     10627          10667          52          0.9        1062.7       0.1X
+infer timestamps from files                                                    21086          21135          69          0.5        2108.6       0.1X
+read date text from files                                                       1081           1084           5          9.2         108.1       1.1X
+read date from files                                                           10254          10265          12          1.0        1025.4       0.1X
+infer date from files                                                          20908          20924          18          0.5        2090.8       0.1X
+timestamp strings                                                               1173           1175           3          8.5         117.3       1.0X
+parse timestamps from Dataset[String]                                          12413          12473          57          0.8        1241.3       0.1X
+infer timestamps from Dataset[String]                                          22801          22829          42          0.4        2280.1       0.1X
+date strings                                                                    1653           1657           4          6.1         165.3       0.7X
+parse dates from Dataset[String]                                               12033          12057          25          0.8        1203.3       0.1X
+from_csv(timestamp)                                                            10339          10355          18          1.0        1033.9       0.1X
+from_csv(date)                                                                 10554          10561          11          0.9        1055.4       0.1X
+infer error timestamps from Dataset[String] with default format                12871          12878          10          0.8        1287.1       0.1X
+infer error timestamps from Dataset[String] with user-provided format          12845          12867          33          0.8        1284.5       0.1X
+infer error timestamps from Dataset[String] with legacy format                 12872          12904          37          0.8        1287.2       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        4058           4061           2          0.0       40583.1       1.0X
-pushdown disabled                                  4092           4099          10          0.0       40924.1       1.0X
-w/ filters                                          699            705           8          0.1        6990.7       5.8X
+w/o filters                                        4281           4288           7          0.0       42807.9       1.0X
+pushdown disabled                                  4059           4067          10          0.0       40590.4       1.1X
+w/ filters                                          764            775          13          0.1        7640.9       5.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Interval:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Read as Intervals                                   737            742           9          0.4        2456.8       1.0X
-Read Raw Strings                                    294            300          10          1.0         979.5       2.5X
+Read as Intervals                                   709            716           8          0.4        2364.5       1.0X
+Read Raw Strings                                    295            298           3          1.0         984.4       2.4X
 
 
diff --git a/sql/core/benchmarks/CharVarcharBenchmark-jdk21-results.txt b/sql/core/benchmarks/CharVarcharBenchmark-jdk21-results.txt
index 47781a2cc6e1f..2093ce53a24d3 100644
--- a/sql/core/benchmarks/CharVarcharBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CharVarcharBenchmark-jdk21-results.txt
@@ -2,121 +2,121 @@
 Char Varchar Write Side Perf w/o Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                         6905           7223         322          5.8         172.6       1.0X
-write char with length 5                          10769          10842          66          3.7         269.2       0.6X
-write varchar with length 5                        7615           7654          35          5.3         190.4       0.9X
+write string with length 5                         7118           7215         151          5.6         178.0       1.0X
+write char with length 5                          12527          12590          97          3.2         313.2       0.6X
+write varchar with length 5                        7976           8046          82          5.0         199.4       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        3624           3637          21          5.5         181.2       1.0X
-write char with length 10                          6455           6488          33          3.1         322.7       0.6X
-write varchar with length 10                       3802           3861          79          5.3         190.1       1.0X
+write string with length 10                        3745           3751           6          5.3         187.2       1.0X
+write char with length 10                          6606           6702          83          3.0         330.3       0.6X
+write varchar with length 10                       3782           3810          25          5.3         189.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        1770           1784          17          5.6         177.0       1.0X
-write char with length 20                          4741           4751          13          2.1         474.1       0.4X
-write varchar with length 20                       1921           1926           6          5.2         192.1       0.9X
+write string with length 20                        1774           1777           5          5.6         177.4       1.0X
+write char with length 20                          4778           4829          46          2.1         477.8       0.4X
+write varchar with length 20                       2047           2048           1          4.9         204.7       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                         945            955          13          5.3         189.0       1.0X
-write char with length 40                          3669           3697          26          1.4         733.7       0.3X
-write varchar with length 40                       1024           1029           5          4.9         204.7       0.9X
+write string with length 40                         931            950          17          5.4         186.3       1.0X
+write char with length 40                          3709           3713           3          1.3         741.9       0.3X
+write varchar with length 40                       1065           1068           2          4.7         213.0       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                         648            671          29          5.1         194.5       1.0X
-write char with length 60                          3258           3278          17          1.0         977.5       0.2X
-write varchar with length 60                        726            738          12          4.6         217.8       0.9X
+write string with length 60                         662            675          14          5.0         198.5       1.0X
+write char with length 60                          3355           3360           5          1.0        1006.5       0.2X
+write varchar with length 60                        680            691          11          4.9         204.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                         522            526           6          4.8         208.7       1.0X
-write char with length 80                          3151           3173          31          0.8        1260.3       0.2X
-write varchar with length 80                        555            564           8          4.5         222.2       0.9X
+write string with length 80                         500            505           5          5.0         200.1       1.0X
+write char with length 80                          3147           3173          22          0.8        1258.9       0.2X
+write varchar with length 80                        541            549           9          4.6         216.3       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                        423            450          26          4.7         211.3       1.0X
-write char with length 100                         3057           3067          14          0.7        1528.3       0.1X
-write varchar with length 100                       472            478           6          4.2         235.9       0.9X
+write string with length 100                        403            417          13          5.0         201.6       1.0X
+write char with length 100                         3107           3113          10          0.6        1553.5       0.1X
+write varchar with length 100                       440            447           8          4.5         219.8       0.9X
 
 
 ================================================================================================
 Char Varchar Write Side Perf w/ Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                        10664          10748         114          3.8         266.6       1.0X
-write char with length 5                          13099          13173          91          3.1         327.5       0.8X
-write varchar with length 5                       12595          12606          10          3.2         314.9       0.8X
+write string with length 5                        10431          10499         102          3.8         260.8       1.0X
+write char with length 5                          12396          12404           9          3.2         309.9       0.8X
+write varchar with length 5                       12858          12889          49          3.1         321.5       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        5412           5423          14          3.7         270.6       1.0X
-write char with length 10                          8402           8405           5          2.4         420.1       0.6X
-write varchar with length 10                       8000           8031          31          2.5         400.0       0.7X
+write string with length 10                        5537           5556          18          3.6         276.8       1.0X
+write char with length 10                          8103           8104           2          2.5         405.1       0.7X
+write varchar with length 10                       8414           8427          12          2.4         420.7       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        3326           3331           5          3.0         332.6       1.0X
-write char with length 20                          5576           5586           9          1.8         557.6       0.6X
-write varchar with length 20                       5699           5708           9          1.8         569.9       0.6X
+write string with length 20                        3560           3566           7          2.8         356.0       1.0X
+write char with length 20                          5738           5741           3          1.7         573.8       0.6X
+write varchar with length 20                       5787           5803          22          1.7         578.7       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        2210           2214           4          2.3         441.9       1.0X
-write char with length 40                          4306           4306           0          1.2         861.2       0.5X
-write varchar with length 40                       4509           4524          16          1.1         901.8       0.5X
+write string with length 40                        2352           2364          11          2.1         470.4       1.0X
+write char with length 40                          4431           4449          27          1.1         886.2       0.5X
+write varchar with length 40                       4461           4473          11          1.1         892.2       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                        1894           1901           6          1.8         568.2       1.0X
-write char with length 60                          4088           4093           5          0.8        1226.4       0.5X
-write varchar with length 60                       3982           3987           5          0.8        1194.5       0.5X
+write string with length 60                        1923           1928           4          1.7         577.0       1.0X
+write char with length 60                          4005           4013          12          0.8        1201.6       0.5X
+write varchar with length 60                       4028           4033           5          0.8        1208.3       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                        1785           1793           6          1.4         714.2       1.0X
-write char with length 80                          3937           3952          14          0.6        1574.6       0.5X
-write varchar with length 80                       3942           3959          24          0.6        1576.9       0.5X
+write string with length 80                        1969           1986          15          1.3         787.5       1.0X
+write char with length 80                          3999           4009          16          0.6        1599.5       0.5X
+write varchar with length 80                       4030           4040           9          0.6        1612.1       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                       1629           1640          10          1.2         814.6       1.0X
-write char with length 100                         3686           3693          12          0.5        1842.9       0.4X
-write varchar with length 100                      3905           3921          15          0.5        1952.6       0.4X
+write string with length 100                       1616           1626          15          1.2         808.2       1.0X
+write char with length 100                         3851           3852           1          0.5        1925.7       0.4X
+write varchar with length 100                      3841           3858          18          0.5        1920.7       0.4X
 
 
diff --git a/sql/core/benchmarks/CharVarcharBenchmark-results.txt b/sql/core/benchmarks/CharVarcharBenchmark-results.txt
index 03a64c6904e9c..8b8a7fe89d469 100644
--- a/sql/core/benchmarks/CharVarcharBenchmark-results.txt
+++ b/sql/core/benchmarks/CharVarcharBenchmark-results.txt
@@ -2,121 +2,121 @@
 Char Varchar Write Side Perf w/o Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                         6760           7092         292          5.9         169.0       1.0X
-write char with length 5                           9848           9929          87          4.1         246.2       0.7X
-write varchar with length 5                        7633           7676          37          5.2         190.8       0.9X
+write string with length 5                         6706           6744          33          6.0         167.7       1.0X
+write char with length 5                          10593          10612          31          3.8         264.8       0.6X
+write varchar with length 5                        8187           8238          84          4.9         204.7       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        3611           3629          27          5.5         180.5       1.0X
-write char with length 10                          6130           6165          50          3.3         306.5       0.6X
-write varchar with length 10                       3742           3772          26          5.3         187.1       1.0X
+write string with length 10                        3968           3985          21          5.0         198.4       1.0X
+write char with length 10                          6729           6805          67          3.0         336.5       0.6X
+write varchar with length 10                       3987           4047         101          5.0         199.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        1775           1786          11          5.6         177.5       1.0X
-write char with length 20                          4560           4562           2          2.2         456.0       0.4X
-write varchar with length 20                       1923           1933           8          5.2         192.3       0.9X
+write string with length 20                        1968           1985          18          5.1         196.8       1.0X
+write char with length 20                          4800           4832          36          2.1         480.0       0.4X
+write varchar with length 20                       2016           2025          13          5.0         201.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                         935            958          30          5.3         187.1       1.0X
-write char with length 40                          3475           3480           4          1.4         695.0       0.3X
-write varchar with length 40                       1019           1038          18          4.9         203.9       0.9X
+write string with length 40                        1042           1044           2          4.8         208.4       1.0X
+write char with length 40                          3794           3804          13          1.3         758.9       0.3X
+write varchar with length 40                       1124           1129           7          4.4         224.8       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                         663            666           4          5.0         199.0       1.0X
-write char with length 60                          3240           3248          10          1.0         972.1       0.2X
-write varchar with length 60                        711            715           5          4.7         213.2       0.9X
+write string with length 60                         690            696           6          4.8         207.1       1.0X
+write char with length 60                          3430           3443          18          1.0        1029.0       0.2X
+write varchar with length 60                        759            770           9          4.4         227.8       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                         533            544          10          4.7         213.2       1.0X
-write char with length 80                          3024           3028           5          0.8        1209.6       0.2X
-write varchar with length 80                        560            561           1          4.5         223.9       1.0X
+write string with length 80                         560            569          11          4.5         223.8       1.0X
+write char with length 80                          3212           3222          11          0.8        1284.8       0.2X
+write varchar with length 80                        607            612           6          4.1         242.7       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                        460            464           4          4.3         230.0       1.0X
-write char with length 100                         2973           2975           1          0.7        1486.7       0.2X
-write varchar with length 100                       483            486           3          4.1         241.4       1.0X
+write string with length 100                        437            444          11          4.6         218.4       1.0X
+write char with length 100                         3106           3109           5          0.6        1552.8       0.1X
+write varchar with length 100                       483            494          13          4.1         241.5       0.9X
 
 
 ================================================================================================
 Char Varchar Write Side Perf w/ Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                         8798           8827          25          4.5         219.9       1.0X
-write char with length 5                          11984          11999          19          3.3         299.6       0.7X
-write varchar with length 5                       12379          12401          20          3.2         309.5       0.7X
+write string with length 5                         9537           9571          32          4.2         238.4       1.0X
+write char with length 5                          12811          12868          70          3.1         320.3       0.7X
+write varchar with length 5                       12857          12877          24          3.1         321.4       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        5290           5307          19          3.8         264.5       1.0X
-write char with length 10                          7536           7538           2          2.7         376.8       0.7X
-write varchar with length 10                       7489           7519          39          2.7         374.5       0.7X
+write string with length 10                        5556           5559           3          3.6         277.8       1.0X
+write char with length 10                          8349           8354           6          2.4         417.5       0.7X
+write varchar with length 10                       7827           7844          23          2.6         391.4       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        3213           3218           6          3.1         321.3       1.0X
-write char with length 20                          5570           5578           7          1.8         557.0       0.6X
-write varchar with length 20                       5245           5261          15          1.9         524.5       0.6X
+write string with length 20                        3488           3499          10          2.9         348.8       1.0X
+write char with length 20                          5847           5854           7          1.7         584.7       0.6X
+write varchar with length 20                       5408           5409           1          1.8         540.8       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        2121           2125           7          2.4         424.2       1.0X
-write char with length 40                          4399           4419          17          1.1         879.8       0.5X
-write varchar with length 40                       4118           4124           5          1.2         823.7       0.5X
+write string with length 40                        2430           2434           6          2.1         486.1       1.0X
+write char with length 40                          4492           4494           3          1.1         898.4       0.5X
+write varchar with length 40                       4131           4140           8          1.2         826.1       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                        1884           1891           6          1.8         565.1       1.0X
-write char with length 60                          3939           3941           4          0.8        1181.6       0.5X
-write varchar with length 60                       3584           3591           5          0.9        1075.3       0.5X
+write string with length 60                        1900           1906           5          1.8         570.0       1.0X
+write char with length 60                          4153           4155           3          0.8        1245.8       0.5X
+write varchar with length 60                       3659           3660           2          0.9        1097.7       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                        1838           1842           3          1.4         735.4       1.0X
-write char with length 80                          3823           3835          13          0.7        1529.1       0.5X
-write varchar with length 80                       3454           3456           2          0.7        1381.8       0.5X
+write string with length 80                        1816           1821           5          1.4         726.5       1.0X
+write char with length 80                          4030           4050          18          0.6        1611.9       0.5X
+write varchar with length 80                       3744           3758          12          0.7        1497.6       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                       1635           1643          10          1.2         817.3       1.0X
-write char with length 100                         3697           3704           6          0.5        1848.3       0.4X
-write varchar with length 100                      3355           3375          18          0.6        1677.3       0.5X
+write string with length 100                       1674           1687          14          1.2         836.9       1.0X
+write char with length 100                         3922           3927           7          0.5        1961.0       0.4X
+write varchar with length 100                      3503           3505           3          0.6        1751.4       0.5X
 
 
diff --git a/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt b/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
index 88db9ebfa1e34..8a1599b3cfe42 100644
--- a/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
@@ -1,88 +1,88 @@
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                          1193           1194           1          0.1       11929.0       1.0X
-UTF8_LCASE                                           2717           2721           6          0.0       27168.5       2.3X
-UNICODE                                             17991          17993           2          0.0      179913.6      15.1X
-UNICODE_CI                                          17837          17842           7          0.0      178369.9      15.0X
+UTF8_BINARY                                          1360           1360           1          0.1       13597.4       1.0X
+UTF8_LCASE                                           2411           2417           9          0.0       24106.7       1.8X
+UNICODE                                             16945          16969          34          0.0      169452.6      12.5X
+UNICODE_CI                                          16645          16671          36          0.0      166452.8      12.2X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                           1523           1523           0          0.1       15233.9       1.0X
-UTF8_LCASE                                            2441           2441           0          0.0       24407.9       1.6X
-UNICODE                                              17875          17884          13          0.0      178749.6      11.7X
-UNICODE_CI                                           17701          17703           2          0.0      177013.8      11.6X
+UTF8_BINARY                                           1751           1753           2          0.1       17513.9       1.0X
+UTF8_LCASE                                            2571           2573           3          0.0       25712.7       1.5X
+UNICODE                                              16594          16625          44          0.0      165935.1       9.5X
+UNICODE_CI                                           16422          16423           3          0.0      164215.1       9.4X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        2660           2666           9          0.0       26601.1       1.0X
-UTF8_LCASE                                         5013           5016           3          0.0       50134.0       1.9X
-UNICODE                                           75622          75623           1          0.0      756217.3      28.4X
-UNICODE_CI                                        63036          63042           9          0.0      630360.9      23.7X
+UTF8_BINARY                                        2817           2824           9          0.0       28170.1       1.0X
+UTF8_LCASE                                         5427           5428           1          0.0       54268.5       1.9X
+UNICODE                                           70045          70096          72          0.0      700450.7      24.9X
+UNICODE_CI                                        56364          56433          97          0.0      563641.8      20.0X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        2121           2122           0          0.0       21214.2       1.0X
-UTF8_LCASE                                        27635          27636           1          0.0      276347.7      13.0X
-UNICODE                                          523746         524012         376          0.0     5237460.5     246.9X
-UNICODE_CI                                       520134         520227         131          0.0     5201343.3     245.2X
+UTF8_BINARY                                        1644           1645           1          0.1       16440.3       1.0X
+UTF8_LCASE                                        14804          14846          59          0.0      148037.2       9.0X
+UNICODE                                          308825         309294         663          0.0     3088250.5     187.8X
+UNICODE_CI                                       310637         312537        2688          0.0     3106367.6     188.9X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        2767           2769           4          0.0       27666.3       1.0X
-UTF8_LCASE                                        26861          26861           1          0.0      268606.4       9.7X
-UNICODE                                          518540         518815         389          0.0     5185401.3     187.4X
-UNICODE_CI                                       521156         521261         148          0.0     5211559.5     188.4X
+UTF8_BINARY                                        1941           1942           1          0.1       19412.9       1.0X
+UTF8_LCASE                                        10354          10409          78          0.0      103535.9       5.3X
+UNICODE                                          309786         310124         478          0.0     3097864.6     159.6X
+UNICODE_CI                                       313038         313960        1303          0.0     3130382.9     161.3X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        2919           2921           3          0.0       29190.2       1.0X
-UTF8_LCASE                                        26862          26862           1          0.0      268618.0       9.2X
-UNICODE                                          504534         504927         556          0.0     5045340.3     172.8X
-UNICODE_CI                                       506542         506565          32          0.0     5065423.0     173.5X
+UTF8_BINARY                                        1958           1961           4          0.1       19579.3       1.0X
+UTF8_LCASE                                        10329          10332           5          0.0      103285.8       5.3X
+UNICODE                                          323944         328005        5743          0.0     3239437.8     165.5X
+UNICODE_CI                                       332646         333139         697          0.0     3326457.7     169.9X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------------------
-UNICODE                                                           419            425           5          0.2        4189.2       1.0X
-UNICODE_CI                                                        416            426           6          0.2        4163.2       1.0X
+UNICODE                                                           370            371           1          0.3        3698.1       1.0X
+UNICODE_CI                                                        370            370           1          0.3        3696.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execBinaryICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                                             575            576           0          0.2        5754.0       1.0X
-UTF8_LCASE                                                              575            576           1          0.2        5747.8       1.0X
-UNICODE                                                                 576            576           0          0.2        5761.5       1.0X
-UNICODE_CI                                                              576            578           2          0.2        5758.0       1.0X
+UTF8_BINARY                                                             592            593           1          0.2        5915.6       1.0X
+UTF8_LCASE                                                              593            593           1          0.2        5926.8       1.0X
+UNICODE                                                                 591            593           1          0.2        5912.9       1.0X
+UNICODE_CI                                                              593            594           1          0.2        5934.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execBinary:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 -----------------------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                                          159            159           1          0.6        1587.6       1.0X
-UTF8_LCASE                                                           159            159           0          0.6        1586.6       1.0X
-UNICODE                                                              158            159           1          0.6        1584.9       1.0X
-UNICODE_CI                                                           159            160           1          0.6        1586.1       1.0X
+UTF8_BINARY                                                          105            109          10          0.9        1054.8       1.0X
+UTF8_LCASE                                                           105            106           1          0.9        1053.3       1.0X
+UNICODE                                                              105            106           1          0.9        1054.2       1.0X
+UNICODE_CI                                                           105            106           0          1.0        1051.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execLowercase:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                                             397            405           5          0.3        3974.4       1.0X
-UTF8_LCASE                                                              401            405           5          0.2        4009.5       1.0X
-UNICODE                                                                 395            399           3          0.3        3953.9       1.0X
-UNICODE_CI                                                              395            400           3          0.3        3952.0       1.0X
+UTF8_BINARY                                                             370            371           1          0.3        3698.3       1.0X
+UTF8_LCASE                                                              370            371           1          0.3        3697.7       1.0X
+UNICODE                                                                 369            370           1          0.3        3692.7       1.0X
+UNICODE_CI                                                              370            371           1          0.3        3697.2       1.0X
 
diff --git a/sql/core/benchmarks/CollationBenchmark-results.txt b/sql/core/benchmarks/CollationBenchmark-results.txt
index 8402a2db6d869..cbd0727ce92e4 100644
--- a/sql/core/benchmarks/CollationBenchmark-results.txt
+++ b/sql/core/benchmarks/CollationBenchmark-results.txt
@@ -1,88 +1,88 @@
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                          1223           1224           1          0.1       12231.5       1.0X
-UTF8_LCASE                                           3280           3281           1          0.0       32803.3       2.7X
-UNICODE                                             17207          17207           0          0.0      172065.7      14.1X
-UNICODE_CI                                          16560          16565           7          0.0      165604.3      13.5X
+UTF8_BINARY                                          1380           1381           1          0.1       13801.3       1.0X
+UTF8_LCASE                                           3334           3336           3          0.0       33337.1       2.4X
+UNICODE                                             19004          19005           1          0.0      190039.6      13.8X
+UNICODE_CI                                          18686          18699          18          0.0      186856.4      13.5X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                           1656           1657           0          0.1       16564.0       1.0X
-UTF8_LCASE                                            3320           3321           0          0.0       33203.0       2.0X
-UNICODE                                              16392          16393           2          0.0      163921.3       9.9X
-UNICODE_CI                                           16314          16319           6          0.0      163143.3       9.8X
+UTF8_BINARY                                           1739           1739           0          0.1       17392.0       1.0X
+UTF8_LCASE                                            4175           4175           0          0.0       41745.1       2.4X
+UNICODE                                              20212          20220          11          0.0      202124.2      11.6X
+UNICODE_CI                                           20078          20086          11          0.0      200782.6      11.5X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        2812           2813           1          0.0       28119.0       1.0X
-UTF8_LCASE                                         5682           5685           4          0.0       56823.2       2.0X
-UNICODE                                           71678          71685          10          0.0      716777.4      25.5X
-UNICODE_CI                                        60660          60670          15          0.0      606597.4      21.6X
+UTF8_BINARY                                        3112           3115           4          0.0       31119.4       1.0X
+UTF8_LCASE                                         6348           6354           9          0.0       63477.9       2.0X
+UNICODE                                           67421          67436          22          0.0      674208.7      21.7X
+UNICODE_CI                                        54039          54056          24          0.0      540394.5      17.4X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        2528           2528           1          0.0       25276.8       1.0X
-UTF8_LCASE                                        28034          28050          24          0.0      280335.5      11.1X
-UNICODE                                          521518         521690         242          0.0     5215184.7     206.3X
-UNICODE_CI                                       508188         508312         176          0.0     5081880.5     201.0X
+UTF8_BINARY                                        1677           1678           1          0.1       16768.6       1.0X
+UTF8_LCASE                                        17476          17480           5          0.0      174760.6      10.4X
+UNICODE                                          324829         324937         153          0.0     3248290.7     193.7X
+UNICODE_CI                                       317534         317742         294          0.0     3175340.9     189.4X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        2772           2774           4          0.0       27715.0       1.0X
-UTF8_LCASE                                        27387          27390           4          0.0      273872.8       9.9X
-UNICODE                                          501025         501076          72          0.0     5010249.5     180.8X
-UNICODE_CI                                       506654         506666          16          0.0     5066544.6     182.8X
+UTF8_BINARY                                        2040           2041           1          0.0       20400.9       1.0X
+UTF8_LCASE                                        17099          17100           1          0.0      170991.2       8.4X
+UNICODE                                          314251         314484         330          0.0     3142508.7     154.0X
+UNICODE_CI                                       319313         319690         533          0.0     3193131.6     156.5X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        2886           2888           3          0.0       28858.9       1.0X
-UTF8_LCASE                                        27433          27445          17          0.0      274326.2       9.5X
-UNICODE                                          501068         501186         168          0.0     5010676.2     173.6X
-UNICODE_CI                                       506619         506655          52          0.0     5066185.6     175.6X
+UTF8_BINARY                                        2077           2077           1          0.0       20765.6       1.0X
+UTF8_LCASE                                        16903          16905           2          0.0      169034.4       8.1X
+UNICODE                                          326824         328355        2165          0.0     3268239.0     157.4X
+UNICODE_CI                                       334072         334237         233          0.0     3340722.1     160.9X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------------------
-UNICODE                                                           407            411           4          0.2        4065.4       1.0X
-UNICODE_CI                                                        419            423           3          0.2        4194.1       1.0X
+UNICODE                                                           301            301           0          0.3        3006.6       1.0X
+UNICODE_CI                                                        300            301           1          0.3        3003.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execBinaryICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                                             564            565           2          0.2        5639.2       1.0X
-UTF8_LCASE                                                              563            563           0          0.2        5629.0       1.0X
-UNICODE                                                                 563            565           2          0.2        5634.3       1.0X
-UNICODE_CI                                                              564            564           0          0.2        5640.9       1.0X
+UTF8_BINARY                                                             599            600           1          0.2        5992.2       1.0X
+UTF8_LCASE                                                              599            600           1          0.2        5994.4       1.0X
+UNICODE                                                                 599            600           1          0.2        5985.1       1.0X
+UNICODE_CI                                                              597            598           1          0.2        5971.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execBinary:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 -----------------------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                                          165            166           1          0.6        1647.3       1.0X
-UTF8_LCASE                                                           165            165           1          0.6        1646.7       1.0X
-UNICODE                                                              165            165           1          0.6        1646.5       1.0X
-UNICODE_CI                                                           165            166           1          0.6        1648.7       1.0X
+UTF8_BINARY                                                          184            185           1          0.5        1844.8       1.0X
+UTF8_LCASE                                                           185            185           0          0.5        1847.3       1.0X
+UNICODE                                                              184            185           1          0.5        1844.9       1.0X
+UNICODE_CI                                                           185            185           0          0.5        1845.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execLowercase:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                                             391            399           6          0.3        3912.1       1.0X
-UTF8_LCASE                                                              389            399           7          0.3        3894.2       1.0X
-UNICODE                                                                 383            391           6          0.3        3828.6       1.0X
-UNICODE_CI                                                              383            387           2          0.3        3833.0       1.0X
+UTF8_BINARY                                                             324            325           1          0.3        3242.0       1.0X
+UTF8_LCASE                                                              325            326           2          0.3        3251.5       1.0X
+UNICODE                                                                 325            326           1          0.3        3251.9       1.0X
+UNICODE_CI                                                              324            326           1          0.3        3242.6       1.0X
 
diff --git a/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt b/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
index 4da64ade11d68..ffdd34f6aaa8c 100644
--- a/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
@@ -1,88 +1,88 @@
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                           156            156           0          0.3        3887.8       1.0X
-UTF8_LCASE                                           9717           9729          18          0.0      242914.7      62.5X
-UNICODE                                              5026           5027           2          0.0      125640.1      32.3X
-UNICODE_CI                                           4969           4972           4          0.0      124224.9      32.0X
+UTF8_BINARY                                           171            172           1          0.2        4282.8       1.0X
+UTF8_LCASE                                           7012           7018           9          0.0      175288.2      40.9X
+UNICODE                                              5206           5207           0          0.0      130157.7      30.4X
+UNICODE_CI                                           5220           5220           0          0.0      130499.0      30.5X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                            279            279           0          0.1        6969.5       1.0X
-UTF8_LCASE                                            9624           9628           5          0.0      240611.6      34.5X
-UNICODE                                               5243           5244           0          0.0      131080.1      18.8X
-UNICODE_CI                                            5173           5173           0          0.0      129322.8      18.6X
+UTF8_BINARY                                            315            316           1          0.1        7871.4       1.0X
+UTF8_LCASE                                            7036           7038           4          0.0      175888.2      22.3X
+UNICODE                                               5343           5344           1          0.0      133571.1      17.0X
+UNICODE_CI                                            5284           5284           0          0.0      132104.2      16.8X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                         383            383           0          0.1        9576.7       1.0X
-UTF8_LCASE                                         4927           4931           6          0.0      123170.3      12.9X
-UNICODE                                           17244          17261          24          0.0      431096.6      45.0X
-UNICODE_CI                                        12968          12970           3          0.0      324194.1      33.9X
+UTF8_BINARY                                         382            383           1          0.1        9557.2       1.0X
+UTF8_LCASE                                         3587           3592           6          0.0       89683.4       9.4X
+UNICODE                                           15310          15322          16          0.0      382753.7      40.0X
+UNICODE_CI                                        12531          12543          17          0.0      313269.1      32.8X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                         535            536           2          0.1       13371.6       1.0X
-UTF8_LCASE                                         9479           9480           2          0.0      236964.5      17.7X
-UNICODE                                           93629          93676          66          0.0     2340726.5     175.1X
-UNICODE_CI                                        93222          93309         124          0.0     2330541.2     174.3X
+UTF8_BINARY                                         350            350           0          0.1        8742.0       1.0X
+UTF8_LCASE                                         9013           9020          11          0.0      225317.3      25.8X
+UNICODE                                           58338          58368          43          0.0     1458444.2     166.8X
+UNICODE_CI                                        58821          58928         152          0.0     1470530.7     168.2X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                         430            431           1          0.1       10755.8       1.0X
-UTF8_LCASE                                         6550           6551           2          0.0      163753.7      15.2X
-UNICODE                                           87435          87467          45          0.0     2185886.8     203.2X
-UNICODE_CI                                        90113          90255         201          0.0     2252836.0     209.5X
+UTF8_BINARY                                         290            291           1          0.1        7250.9       1.0X
+UTF8_LCASE                                         5323           5325           3          0.0      133079.8      18.4X
+UNICODE                                           57879          57976         138          0.0     1446968.2     199.6X
+UNICODE_CI                                        59098          59188         127          0.0     1477459.9     203.8X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                         455            456           2          0.1       11369.5       1.0X
-UTF8_LCASE                                         7108           7115           9          0.0      177705.2      15.6X
-UNICODE                                          101835         101866          43          0.0     2545883.9     223.9X
-UNICODE_CI                                       100962         101026          91          0.0     2524045.2     222.0X
+UTF8_BINARY                                         300            302           1          0.1        7507.7       1.0X
+UTF8_LCASE                                         5310           5318          11          0.0      132754.5      17.7X
+UNICODE                                           64787          64833          65          0.0     1619680.3     215.7X
+UNICODE_CI                                        64384          64419          50          0.0     1609603.5     214.4X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------------------
-UNICODE                                                           254            255           1          0.2        6346.5       1.0X
-UNICODE_CI                                                        254            254           0          0.2        6348.1       1.0X
+UNICODE                                                           214            215           1          0.2        5339.5       1.0X
+UNICODE_CI                                                        214            215           0          0.2        5355.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execBinaryICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                                             322            323           1          0.1        8046.3       1.0X
-UTF8_LCASE                                                              322            324           2          0.1        8059.0       1.0X
-UNICODE                                                                 322            323           1          0.1        8050.7       1.0X
-UNICODE_CI                                                              322            325           4          0.1        8062.4       1.0X
+UTF8_BINARY                                                             318            318           1          0.1        7946.2       1.0X
+UTF8_LCASE                                                              318            319           1          0.1        7945.3       1.0X
+UNICODE                                                                 318            319           1          0.1        7950.9       1.0X
+UNICODE_CI                                                              317            318           1          0.1        7931.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execBinary:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 -----------------------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                                          119            120           1          0.3        2972.1       1.0X
-UTF8_LCASE                                                           119            120           1          0.3        2971.9       1.0X
-UNICODE                                                              119            120           1          0.3        2970.3       1.0X
-UNICODE_CI                                                           119            120           1          0.3        2968.6       1.0X
+UTF8_BINARY                                                           84             85           0          0.5        2101.1       1.0X
+UTF8_LCASE                                                            84             85           1          0.5        2097.7       1.0X
+UNICODE                                                               84             85           1          0.5        2106.4       1.0X
+UNICODE_CI                                                            84             85           1          0.5        2111.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execLowercase:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                                             254            255           1          0.2        6345.2       1.0X
-UTF8_LCASE                                                              254            255           0          0.2        6351.8       1.0X
-UNICODE                                                                 254            255           0          0.2        6352.9       1.0X
-UNICODE_CI                                                              254            254           0          0.2        6341.2       1.0X
+UTF8_BINARY                                                             214            215           2          0.2        5342.3       1.0X
+UTF8_LCASE                                                              214            215           1          0.2        5348.9       1.0X
+UNICODE                                                                 214            215           1          0.2        5349.8       1.0X
+UNICODE_CI                                                              214            215           0          0.2        5354.4       1.0X
 
diff --git a/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt b/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
index fba59f3893e22..fa21ecbbb6593 100644
--- a/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
+++ b/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
@@ -1,88 +1,88 @@
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                           125            126           1          0.3        3128.6       1.0X
-UTF8_LCASE                                          10335          10345          14          0.0      258377.4      82.6X
-UNICODE                                              5604           5610           8          0.0      140110.8      44.8X
-UNICODE_CI                                           5570           5577           9          0.0      139252.7      44.5X
+UTF8_BINARY                                           141            146           3          0.3        3523.3       1.0X
+UTF8_LCASE                                           7725           7753          40          0.0      193120.1      54.8X
+UNICODE                                              5788           5824          51          0.0      144696.8      41.1X
+UNICODE_CI                                           5997           6002           7          0.0      149920.7      42.6X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                            293            294           2          0.1        7326.8       1.0X
-UTF8_LCASE                                           10035          10035           1          0.0      250865.2      34.2X
-UNICODE                                               5578           5580           3          0.0      139455.8      19.0X
-UNICODE_CI                                            5539           5541           2          0.0      138483.8      18.9X
+UTF8_BINARY                                            337            346           5          0.1        8433.8       1.0X
+UTF8_LCASE                                            7829           7852          33          0.0      195727.0      23.2X
+UNICODE                                               6096           6116          29          0.0      152404.8      18.1X
+UNICODE_CI                                            6112           6131          26          0.0      152805.7      18.1X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                         388            388           0          0.1        9699.6       1.0X
-UTF8_LCASE                                         4965           4967           3          0.0      124121.3      12.8X
-UNICODE                                           15750          15753           5          0.0      393740.9      40.6X
-UNICODE_CI                                        12509          12511           2          0.0      312735.5      32.2X
+UTF8_BINARY                                         452            455           3          0.1       11306.0       1.0X
+UTF8_LCASE                                         3968           3990          32          0.0       99194.0       8.8X
+UNICODE                                           15247          15296          69          0.0      381186.5      33.7X
+UNICODE_CI                                        12374          12397          32          0.0      309347.5      27.4X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                         421            422           2          0.1       10512.9       1.0X
-UTF8_LCASE                                        10793          10796           5          0.0      269819.0      25.7X
-UNICODE                                           94324          94330           9          0.0     2358090.9     224.3X
-UNICODE_CI                                        91647          91748         143          0.0     2291174.6     217.9X
+UTF8_BINARY                                         435            446           7          0.1       10881.1       1.0X
+UTF8_LCASE                                        10346          10366          29          0.0      258656.4      23.8X
+UNICODE                                           78521          78598         110          0.0     1963015.5     180.4X
+UNICODE_CI                                        80810          81202         554          0.0     2020241.0     185.7X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                         452            453           0          0.1       11307.9       1.0X
-UTF8_LCASE                                         6871           6872           2          0.0      171782.0      15.2X
-UNICODE                                           90881          90924          60          0.0     2272034.5     200.9X
-UNICODE_CI                                        91333          91363          42          0.0     2283331.3     201.9X
+UTF8_BINARY                                         321            324           3          0.1        8021.8       1.0X
+UTF8_LCASE                                         5970           5976          10          0.0      149242.0      18.6X
+UNICODE                                           86151          86522         525          0.0     2153773.0     268.5X
+UNICODE_CI                                        89308          90327        1441          0.0     2232710.9     278.3X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                         451            452           2          0.1       11268.1       1.0X
-UTF8_LCASE                                         6685           6686           2          0.0      167120.8      14.8X
-UNICODE                                           99387          99484         138          0.0     2484672.5     220.5X
-UNICODE_CI                                        98525          98597         101          0.0     2463132.9     218.6X
+UTF8_BINARY                                         310            314           3          0.1        7741.5       1.0X
+UTF8_LCASE                                         5707           5711           5          0.0      142683.3      18.4X
+UNICODE                                           91242          95109        5469          0.0     2281057.2     294.7X
+UNICODE_CI                                        91446          92305        1215          0.0     2286138.3     295.3X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------------------
-UNICODE                                                           231            232           0          0.2        5784.5       1.0X
-UNICODE_CI                                                        231            232           1          0.2        5780.4       1.0X
+UNICODE                                                           298            300           2          0.1        7454.2       1.0X
+UNICODE_CI                                                        300            301           1          0.1        7500.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execBinaryICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                                             312            314           1          0.1        7811.2       1.0X
-UTF8_LCASE                                                              313            314           2          0.1        7822.9       1.0X
-UNICODE                                                                 313            314           1          0.1        7815.5       1.0X
-UNICODE_CI                                                              313            315           4          0.1        7825.7       1.0X
+UTF8_BINARY                                                             343            346           3          0.1        8576.7       1.0X
+UTF8_LCASE                                                              343            345           2          0.1        8582.7       1.0X
+UNICODE                                                                 344            348           2          0.1        8607.4       1.0X
+UNICODE_CI                                                              340            345           3          0.1        8493.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execBinary:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 -----------------------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                                          132            133           0          0.3        3302.0       1.0X
-UTF8_LCASE                                                           132            132           0          0.3        3297.5       1.0X
-UNICODE                                                              132            133           1          0.3        3296.9       1.0X
-UNICODE_CI                                                           132            132           0          0.3        3298.1       1.0X
+UTF8_BINARY                                                          130            132           1          0.3        3245.3       1.0X
+UTF8_LCASE                                                           129            132           1          0.3        3235.2       1.0X
+UNICODE                                                              129            133           1          0.3        3231.9       1.0X
+UNICODE_CI                                                           131            133           1          0.3        3274.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
-Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - initCap using impl execLowercase:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                                             231            231           0          0.2        5770.4       1.0X
-UTF8_LCASE                                                              231            232           1          0.2        5776.4       1.0X
-UNICODE                                                                 231            231           0          0.2        5767.5       1.0X
-UNICODE_CI                                                              231            232           1          0.2        5770.2       1.0X
+UTF8_BINARY                                                             294            296           1          0.1        7348.6       1.0X
+UTF8_LCASE                                                              296            299           2          0.1        7390.7       1.0X
+UNICODE                                                                 298            300           2          0.1        7461.3       1.0X
+UNICODE_CI                                                              297            299           2          0.1        7421.1       1.0X
 
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-jdk21-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk21-results.txt
index e6d3fa3dfbe5e..40b7cf00b6669 100644
--- a/sql/core/benchmarks/ColumnarBatchBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk21-results.txt
@@ -2,58 +2,58 @@
 Int Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Java Array                                          122            123           1       2676.8           0.4       1.0X
-ByteBuffer Unsafe                                   194            201           8       1685.2           0.6       0.6X
-ByteBuffer API                                      501            503           2        653.5           1.5       0.2X
-DirectByteBuffer                                    418            419           1        784.8           1.3       0.3X
-Unsafe Buffer                                       154            154           0       2134.6           0.5       0.8X
-Column(on heap)                                     123            123           1       2668.6           0.4       1.0X
-Column(off heap)                                    154            154           1       2134.0           0.5       0.8X
-Column(off heap direct)                             154            154           1       2128.0           0.5       0.8X
-UnsafeRow (on heap)                                 432            433           2        758.6           1.3       0.3X
-UnsafeRow (off heap)                                294            295           1       1116.1           0.9       0.4X
-Column On Heap Append                               336            337           2        976.5           1.0       0.4X
+Java Array                                          123            123           0       2664.8           0.4       1.0X
+ByteBuffer Unsafe                                   188            194           8       1742.2           0.6       0.7X
+ByteBuffer API                                      429            429           1        764.2           1.3       0.3X
+DirectByteBuffer                                    420            421           2        780.9           1.3       0.3X
+Unsafe Buffer                                       154            156           5       2124.5           0.5       0.8X
+Column(on heap)                                     124            124           0       2646.3           0.4       1.0X
+Column(off heap)                                    155            155           0       2117.5           0.5       0.8X
+Column(off heap direct)                             155            155           0       2115.9           0.5       0.8X
+UnsafeRow (on heap)                                 452            452           0        725.7           1.4       0.3X
+UnsafeRow (off heap)                                296            297           0       1106.8           0.9       0.4X
+Column On Heap Append                               312            315           3       1048.8           1.0       0.4X
 
 
 ================================================================================================
 Boolean Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Bitset                                              430            431           2        780.3           1.3       1.0X
-Byte Array                                          249            250           2       1348.7           0.7       1.7X
+Bitset                                              432            433           1        776.0           1.3       1.0X
+Byte Array                                          250            251           1       1341.9           0.7       1.7X
 
 
 ================================================================================================
 String Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap                                             121            122           1        134.9           7.4       1.0X
-Off Heap                                            523            535           9         31.3          31.9       0.2X
+On Heap                                             126            131           9        129.5           7.7       1.0X
+Off Heap                                            475            495          18         34.5          29.0       0.3X
 
 
 ================================================================================================
 Array Vector Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap Read Size Only                               87             88           1       1878.9           0.5       1.0X
-Off Heap Read Size Only                             425            425           0        385.7           2.6       0.2X
-On Heap Read Elements                              2464           2467           5         66.5          15.0       0.0X
-Off Heap Read Elements                             2409           2412           5         68.0          14.7       0.0X
+On Heap Read Size Only                               86             87           0       1898.0           0.5       1.0X
+Off Heap Read Size Only                             307            308           1        533.7           1.9       0.3X
+On Heap Read Elements                              2385           2398          20         68.7          14.6       0.0X
+Off Heap Read Elements                             2606           2608           3         62.9          15.9       0.0X
 
 
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
index ea5edb89dcfe6..96a39bdadeeaf 100644
--- a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
@@ -2,58 +2,58 @@
 Int Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Java Array                                          174            175           1       1883.1           0.5       1.0X
-ByteBuffer Unsafe                                   278            282           6       1177.8           0.8       0.6X
-ByteBuffer API                                      508            509           1        645.6           1.5       0.3X
-DirectByteBuffer                                    468            469           1        700.7           1.4       0.4X
-Unsafe Buffer                                       159            161           1       2057.9           0.5       1.1X
-Column(on heap)                                     170            171           0       1923.5           0.5       1.0X
-Column(off heap)                                    162            162           0       2023.8           0.5       1.1X
-Column(off heap direct)                             157            158           1       2083.7           0.5       1.1X
-UnsafeRow (on heap)                                 436            436           1        751.7           1.3       0.4X
-UnsafeRow (off heap)                                314            321          14       1042.5           1.0       0.6X
-Column On Heap Append                               361            362           1        906.5           1.1       0.5X
+Java Array                                          175            177           5       1871.5           0.5       1.0X
+ByteBuffer Unsafe                                   279            280           1       1174.5           0.9       0.6X
+ByteBuffer API                                      510            511           1        642.1           1.6       0.3X
+DirectByteBuffer                                    470            471           1        697.0           1.4       0.4X
+Unsafe Buffer                                       162            163           1       2020.2           0.5       1.1X
+Column(on heap)                                     171            172           0       1911.5           0.5       1.0X
+Column(off heap)                                    163            163           0       2012.6           0.5       1.1X
+Column(off heap direct)                             158            158           0       2076.1           0.5       1.1X
+UnsafeRow (on heap)                                 439            439           0        747.1           1.3       0.4X
+UnsafeRow (off heap)                                315            318           2       1040.4           1.0       0.6X
+Column On Heap Append                               363            364           1        901.8           1.1       0.5X
 
 
 ================================================================================================
 Boolean Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Bitset                                              451            452           1        744.8           1.3       1.0X
-Byte Array                                          288            289           2       1163.9           0.9       1.6X
+Bitset                                              454            455           1        739.1           1.4       1.0X
+Byte Array                                          263            264           1       1274.3           0.8       1.7X
 
 
 ================================================================================================
 String Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap                                             193            236          33         85.0          11.8       1.0X
-Off Heap                                            424            440          15         38.7          25.9       0.5X
+On Heap                                             139            141           2        118.0           8.5       1.0X
+Off Heap                                            382            391          12         42.9          23.3       0.4X
 
 
 ================================================================================================
 Array Vector Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap Read Size Only                               90             90           1       1826.1           0.5       1.0X
-Off Heap Read Size Only                              85             85           1       1927.9           0.5       1.1X
-On Heap Read Elements                              2177           2178           1         75.3          13.3       0.0X
-Off Heap Read Elements                             2732           2735           4         60.0          16.7       0.0X
+On Heap Read Size Only                               87             87           0       1883.4           0.5       1.0X
+Off Heap Read Size Only                              85             86           0       1918.9           0.5       1.0X
+On Heap Read Elements                              2428           2430           3         67.5          14.8       0.0X
+Off Heap Read Elements                             2956           2958           3         55.4          18.0       0.0X
 
 
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk21-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk21-results.txt
index 3338d6b4df0eb..588ce854d1858 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk21-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    1              1           0      46950.3           0.0       1.0X
-RunLengthEncoding(2.517)                            983            989           6         68.2          14.7       0.0X
-BooleanBitSet(0.125)                                233            234           1        287.8           3.5       0.0X
+PassThrough(1.000)                                    1              1           0      46470.3           0.0       1.0X
+RunLengthEncoding(2.515)                           1110           1283         245         60.5          16.5       0.0X
+BooleanBitSet(0.125)                                285            286           1        235.2           4.3       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         210            211           1        319.3           3.1       1.0X
-RunLengthEncoding                                   598            605          10        112.3           8.9       0.4X
-BooleanBitSet                                       696            699           3         96.5          10.4       0.3X
+PassThrough                                         211            213           1        317.5           3.1       1.0X
+RunLengthEncoding                                   601            601           0        111.7           9.0       0.4X
+BooleanBitSet                                       672            672           0         99.9          10.0       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      23190.9           0.0       1.0X
-RunLengthEncoding(1.495)                           1229           1229           1         54.6          18.3       0.0X
+PassThrough(1.000)                                    3              3           0      23145.6           0.0       1.0X
+RunLengthEncoding(1.489)                           1079           1079           0         62.2          16.1       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         667            668           1        100.6           9.9       1.0X
-RunLengthEncoding                                  1030           1032           3         65.2          15.3       0.6X
+PassThrough                                         795            796           2         84.5          11.8       1.0X
+RunLengthEncoding                                   990            991           1         67.8          14.8       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      23427.9           0.0       1.0X
-RunLengthEncoding(2.000)                           1234           1234           0         54.4          18.4       0.0X
+PassThrough(1.000)                                    3              3           0      23322.1           0.0       1.0X
+RunLengthEncoding(2.000)                           1116           1117           2         60.1          16.6       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         665            666           1        100.9           9.9       1.0X
-RunLengthEncoding                                  1007           1007           0         66.6          15.0       0.7X
+PassThrough                                         796            811          16         84.3          11.9       1.0X
+RunLengthEncoding                                   956            957           1         70.2          14.2       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    6              6           0      11347.4           0.1       1.0X
-RunLengthEncoding(0.997)                           1072           1073           1         62.6          16.0       0.0X
-DictionaryEncoding(0.500)                           378            378           0        177.5           5.6       0.0X
-IntDelta(0.250)                                     139            141           3        481.6           2.1       0.0X
+PassThrough(1.000)                                    6              6           0      11601.5           0.1       1.0X
+RunLengthEncoding(1.004)                           1011           1011           1         66.4          15.1       0.0X
+DictionaryEncoding(0.500)                           335            335           0        200.4           5.0       0.0X
+IntDelta(0.250)                                     110            111           0        607.5           1.6       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         644            646           2        104.2           9.6       1.0X
-RunLengthEncoding                                  1181           1182           2         56.8          17.6       0.5X
-DictionaryEncoding                                  521            522           0        128.8           7.8       1.2X
-IntDelta                                            498            499           2        134.7           7.4       1.3X
+PassThrough                                         647            647           0        103.7           9.6       1.0X
+RunLengthEncoding                                  1213           1214           1         55.3          18.1       0.5X
+DictionaryEncoding                                  526            526           0        127.7           7.8       1.2X
+IntDelta                                            501            503           2        133.9           7.5       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    6              6           0      11261.2           0.1       1.0X
-RunLengthEncoding(1.329)                           1128           1129           2         59.5          16.8       0.0X
-DictionaryEncoding(0.501)                           378            379           2        177.5           5.6       0.0X
-IntDelta(0.250)                                     125            125           0        536.8           1.9       0.0X
+PassThrough(1.000)                                    6              6           0      11080.2           0.1       1.0X
+RunLengthEncoding(1.339)                           1048           1051           5         64.1          15.6       0.0X
+DictionaryEncoding(0.501)                           337            339           1        199.0           5.0       0.0X
+IntDelta(0.250)                                     110            111           0        607.5           1.6       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         711            712           1         94.3          10.6       1.0X
-RunLengthEncoding                                  1150           1154           5         58.4          17.1       0.6X
-DictionaryEncoding                                  651            655           4        103.0           9.7       1.1X
-IntDelta                                            520            573          59        129.1           7.7       1.4X
+PassThrough                                         710            712           2         94.5          10.6       1.0X
+RunLengthEncoding                                  1188           1190           3         56.5          17.7       0.6X
+DictionaryEncoding                                  659            663           7        101.9           9.8       1.1X
+IntDelta                                            524            526           3        128.1           7.8       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   13             13           0       5052.7           0.2       1.0X
-RunLengthEncoding(0.748)                           1072           1073           0         62.6          16.0       0.0X
-DictionaryEncoding(0.250)                           521            521           0        128.8           7.8       0.0X
-LongDelta(0.125)                                    110            110           0        609.1           1.6       0.1X
+PassThrough(1.000)                                   16             19           0       4302.0           0.2       1.0X
+RunLengthEncoding(0.760)                           1066           1066           0         63.0          15.9       0.0X
+DictionaryEncoding(0.250)                           404            405           2        166.2           6.0       0.0X
+LongDelta(0.125)                                    111            111           0        605.5           1.7       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         771            774           3         87.1          11.5       1.0X
-RunLengthEncoding                                  1232           1233           1         54.5          18.4       0.6X
-DictionaryEncoding                                  720            724           6         93.2          10.7       1.1X
-LongDelta                                           541            543           3        124.1           8.1       1.4X
+PassThrough                                         774            776           2         86.7          11.5       1.0X
+RunLengthEncoding                                  1240           1241           2         54.1          18.5       0.6X
+DictionaryEncoding                                  714            717           4         93.9          10.6       1.1X
+LongDelta                                           543            545           2        123.6           8.1       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   13             13           0       5054.5           0.2       1.0X
-RunLengthEncoding(1.007)                           1110           1111           1         60.4          16.5       0.0X
-DictionaryEncoding(0.251)                           533            534           2        126.0           7.9       0.0X
-LongDelta(0.125)                                    111            112           0        605.2           1.7       0.1X
+PassThrough(1.000)                                   18             18           0       3770.3           0.3       1.0X
+RunLengthEncoding(1.002)                           1095           1098           4         61.3          16.3       0.0X
+DictionaryEncoding(0.251)                           404            405           2        166.0           6.0       0.0X
+LongDelta(0.125)                                    111            111           0        603.7           1.7       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         769            770           0         87.2          11.5       1.0X
-RunLengthEncoding                                  1234           1236           4         54.4          18.4       0.6X
-DictionaryEncoding                                  721            723           3         93.0          10.7       1.1X
-LongDelta                                           669            672           3        100.2          10.0       1.1X
+PassThrough                                         774            777           4         86.7          11.5       1.0X
+RunLengthEncoding                                  1217           1218           1         55.2          18.1       0.6X
+DictionaryEncoding                                  715            719           6         93.9          10.7       1.1X
+LongDelta                                           671            672           2        100.1          10.0       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   20             20           0       3376.5           0.3       1.0X
-RunLengthEncoding(0.892)                           2013           2014           1         33.3          30.0       0.0X
-DictionaryEncoding(0.167)                          1687           1691           6         39.8          25.1       0.0X
+PassThrough(1.000)                                   20             23           4       3349.0           0.3       1.0X
+RunLengthEncoding(0.893)                           1852           1854           3         36.2          27.6       0.0X
+DictionaryEncoding(0.167)                          2101           2111          14         31.9          31.3       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1776           1776           1         37.8          26.5       1.0X
-RunLengthEncoding                                  2518           2518           0         26.7          37.5       0.7X
-DictionaryEncoding                                 2028           2030           4         33.1          30.2       0.9X
+PassThrough                                        1654           1675          31         40.6          24.6       1.0X
+RunLengthEncoding                                  2501           2505           6         26.8          37.3       0.7X
+DictionaryEncoding                                 2028           2030           2         33.1          30.2       0.8X
 
 
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
index c56288558bd5f..4b1206ab2e105 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    1              1           0      47046.4           0.0       1.0X
-RunLengthEncoding(2.514)                            882            883           0         76.0          13.1       0.0X
-BooleanBitSet(0.125)                                234            235           0        286.3           3.5       0.0X
+PassThrough(1.000)                                    2              2           0      43967.6           0.0       1.0X
+RunLengthEncoding(2.492)                            900            901           1         74.6          13.4       0.0X
+BooleanBitSet(0.125)                                292            292           0        229.9           4.4       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         167            168           1        402.1           2.5       1.0X
-RunLengthEncoding                                   532            534           1        126.1           7.9       0.3X
-BooleanBitSet                                       663            665           2        101.2           9.9       0.3X
+PassThrough                                         168            169           1        400.0           2.5       1.0X
+RunLengthEncoding                                   551            555           5        121.7           8.2       0.3X
+BooleanBitSet                                       639            640           1        105.0           9.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      23535.9           0.0       1.0X
-RunLengthEncoding(1.501)                           1218           1219           1         55.1          18.2       0.0X
+PassThrough(1.000)                                    3              3           0      23004.2           0.0       1.0X
+RunLengthEncoding(1.488)                           1039           1040           1         64.6          15.5       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         710            712           2         94.6          10.6       1.0X
-RunLengthEncoding                                  1043           1055          18         64.4          15.5       0.7X
+PassThrough                                         548            561           9        122.5           8.2       1.0X
+RunLengthEncoding                                   970            972           2         69.2          14.5       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      24666.2           0.0       1.0X
-RunLengthEncoding(2.012)                           1157           1159           3         58.0          17.2       0.0X
+PassThrough(1.000)                                    3              3           0      23244.9           0.0       1.0X
+RunLengthEncoding(2.018)                           1070           1070           1         62.7          15.9       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         675            675           0         99.5          10.1       1.0X
-RunLengthEncoding                                  1021           1024           4         65.7          15.2       0.7X
+PassThrough                                         543            544           0        123.5           8.1       1.0X
+RunLengthEncoding                                   930            931           2         72.2          13.9       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    6              6           0      11233.4           0.1       1.0X
-RunLengthEncoding(1.002)                           1012           1021          12         66.3          15.1       0.0X
-DictionaryEncoding(0.500)                           386            387           1        174.1           5.7       0.0X
-IntDelta(0.250)                                     115            115           1        585.5           1.7       0.1X
+PassThrough(1.000)                                    6              6           0      11412.0           0.1       1.0X
+RunLengthEncoding(1.006)                            997           1000           3         67.3          14.9       0.0X
+DictionaryEncoding(0.500)                           374            374           1        179.6           5.6       0.0X
+IntDelta(0.250)                                     110            110           1        609.3           1.6       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         644            647           3        104.3           9.6       1.0X
-RunLengthEncoding                                  1194           1194           0         56.2          17.8       0.5X
-DictionaryEncoding                                  502            504           2        133.7           7.5       1.3X
-IntDelta                                            457            458           1        146.9           6.8       1.4X
+PassThrough                                         626            627           1        107.2           9.3       1.0X
+RunLengthEncoding                                  1041           1042           2         64.5          15.5       0.6X
+DictionaryEncoding                                  524            527           2        128.0           7.8       1.2X
+IntDelta                                            460            460           1        146.0           6.8       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    6              6           0      11739.3           0.1       1.0X
-RunLengthEncoding(1.336)                           1040           1040           1         64.5          15.5       0.0X
-DictionaryEncoding(0.501)                           387            388           1        173.2           5.8       0.0X
-IntDelta(0.250)                                     115            115           1        585.4           1.7       0.0X
+PassThrough(1.000)                                    6              6           0      11296.0           0.1       1.0X
+RunLengthEncoding(1.338)                           1018           1018           0         65.9          15.2       0.0X
+DictionaryEncoding(0.501)                           374            374           0        179.4           5.6       0.0X
+IntDelta(0.250)                                     110            110           0        609.1           1.6       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         727            729           3         92.3          10.8       1.0X
-RunLengthEncoding                                  1178           1182           5         57.0          17.6       0.6X
-DictionaryEncoding                                  687            690           3         97.7          10.2       1.1X
-IntDelta                                            480            482           2        139.7           7.2       1.5X
+PassThrough                                         689            691           2         97.4          10.3       1.0X
+RunLengthEncoding                                  1093           1094           1         61.4          16.3       0.6X
+DictionaryEncoding                                  543            544           1        123.6           8.1       1.3X
+IntDelta                                            597            599           2        112.5           8.9       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   13             13           0       5037.6           0.2       1.0X
-RunLengthEncoding(0.750)                           1017           1019           3         66.0          15.2       0.0X
-DictionaryEncoding(0.250)                           442            443           2        152.0           6.6       0.0X
-LongDelta(0.125)                                    110            110           1        609.8           1.6       0.1X
+PassThrough(1.000)                                   18             18           0       3771.2           0.3       1.0X
+RunLengthEncoding(0.756)                           1058           1059           2         63.5          15.8       0.0X
+DictionaryEncoding(0.250)                           441            442           1        152.2           6.6       0.0X
+LongDelta(0.125)                                    111            111           0        604.4           1.7       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         755            758           3         88.8          11.3       1.0X
-RunLengthEncoding                                  1216           1216           0         55.2          18.1       0.6X
-DictionaryEncoding                                  774            774           0         86.8          11.5       1.0X
-LongDelta                                           485            488           2        138.4           7.2       1.6X
+PassThrough                                         713            715           2         94.2          10.6       1.0X
+RunLengthEncoding                                  1192           1192           0         56.3          17.8       0.6X
+DictionaryEncoding                                  686            689           3         97.8          10.2       1.0X
+LongDelta                                           523            526           3        128.4           7.8       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   13             13           0       5032.6           0.2       1.0X
-RunLengthEncoding(1.003)                           1033           1035           3         65.0          15.4       0.0X
-DictionaryEncoding(0.251)                           444            446           3        151.1           6.6       0.0X
-LongDelta(0.125)                                    147            147           1        457.3           2.2       0.1X
+PassThrough(1.000)                                   13             14           0       4998.3           0.2       1.0X
+RunLengthEncoding(1.000)                           1073           1076           4         62.5          16.0       0.0X
+DictionaryEncoding(0.251)                           442            442           0        151.8           6.6       0.0X
+LongDelta(0.125)                                    111            112           1        602.7           1.7       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         753            755           2         89.1          11.2       1.0X
-RunLengthEncoding                                  1225           1227           3         54.8          18.3       0.6X
-DictionaryEncoding                                  773            774           0         86.8          11.5       1.0X
-LongDelta                                           672            675           6         99.9          10.0       1.1X
+PassThrough                                         712            714           2         94.3          10.6       1.0X
+RunLengthEncoding                                  1163           1165           2         57.7          17.3       0.6X
+DictionaryEncoding                                  685            686           2         97.9          10.2       1.0X
+LongDelta                                           609            610           2        110.1           9.1       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   20             26           2       3350.9           0.3       1.0X
-RunLengthEncoding(0.887)                           1812           1813           2         37.0          27.0       0.0X
-DictionaryEncoding(0.167)                          2262           2263           1         29.7          33.7       0.0X
+PassThrough(1.000)                                   27             27           0       2518.1           0.4       1.0X
+RunLengthEncoding(0.892)                           1819           1821           2         36.9          27.1       0.0X
+DictionaryEncoding(0.167)                          2071           2072           1         32.4          30.9       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1645           1650           7         40.8          24.5       1.0X
-RunLengthEncoding                                  2281           2284           4         29.4          34.0       0.7X
-DictionaryEncoding                                 1845           1847           3         36.4          27.5       0.9X
+PassThrough                                        1448           1471          33         46.3          21.6       1.0X
+RunLengthEncoding                                  2222           2227           6         30.2          33.1       0.7X
+DictionaryEncoding                                 1998           2010          17         33.6          29.8       0.7X
 
 
diff --git a/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk21-results.txt b/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk21-results.txt
index c53ca57d7242e..d3aa5cb8235de 100644
--- a/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk21-results.txt
@@ -1,280 +1,280 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    1              1           0     365410.9           0.0       1.0X
-OnHeapColumnVector                                   3342           3368          36        122.6           8.2       0.0X
-OffHeapColumnVector                                  5519           5519           0         74.2          13.5       0.0X
+ConstantColumnVector                                    1              1           0     372657.0           0.0       1.0X
+OnHeapColumnVector                                   2898           2899           1        141.3           7.1       0.0X
+OffHeapColumnVector                                  5566           5569           4         73.6          13.6       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    1              1           0     399028.5           0.0       1.0X
-OnHeapColumnVector                                   4031           4035           6        101.6           9.8       0.0X
-OffHeapColumnVector                                  4792           4796           6         85.5          11.7       0.0X
+ConstantColumnVector                                    1              1           0     423940.2           0.0       1.0X
+OnHeapColumnVector                                   4102           4103           1         99.9          10.0       0.0X
+OffHeapColumnVector                                  4885           4901          22         83.8          11.9       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     399083.0           0.0       1.0X
-OnHeapColumnVector                                    4041           4043           4        101.4           9.9       0.0X
-OffHeapColumnVector                                   4684           4701          25         87.5          11.4       0.0X
+ConstantColumnVector                                     1              1           0     423996.4           0.0       1.0X
+OnHeapColumnVector                                    4284           4291          10         95.6          10.5       0.0X
+OffHeapColumnVector                                   5062           5071          13         80.9          12.4       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     399168.5           0.0       1.0X
-OnHeapColumnVector                                    4762           4762           0         86.0          11.6       0.0X
-OffHeapColumnVector                                   5314           5316           3         77.1          13.0       0.0X
+ConstantColumnVector                                     1              1           0     423912.6           0.0       1.0X
+OnHeapColumnVector                                    4176           4186          14         98.1          10.2       0.0X
+OffHeapColumnVector                                   4728           4736          11         86.6          11.5       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     399059.2           0.0       1.0X
-OnHeapColumnVector                                    8010           8011           0         51.1          19.6       0.0X
-OffHeapColumnVector                                   5170           5183          19         79.2          12.6       0.0X
+ConstantColumnVector                                     1              1           0     423965.7           0.0       1.0X
+OnHeapColumnVector                                    4527           4529           2         90.5          11.1       0.0X
+OffHeapColumnVector                                   5110           5116          10         80.2          12.5       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     399074.8           0.0       1.0X
-OnHeapColumnVector                                    4366           4366           0         93.8          10.7       0.0X
-OffHeapColumnVector                                   4960           4963           4         82.6          12.1       0.0X
+ConstantColumnVector                                     1              1           0     424082.0           0.0       1.0X
+OnHeapColumnVector                                    4366           4372           9         93.8          10.7       0.0X
+OffHeapColumnVector                                   5147           5152           8         79.6          12.6       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with IntegerType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     632717.8           0.0       1.0X
-OnHeapColumnVector                                   16             16           0      25522.9           0.0       0.0X
-OffHeapColumnVector                                  65             65           0       6306.1           0.2       0.0X
+ConstantColumnVector                                  1              1           0     664350.5           0.0       1.0X
+OnHeapColumnVector                                   16             16           0      25444.2           0.0       0.0X
+OffHeapColumnVector                                  65             65           0       6275.3           0.2       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with LongType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     577697.2           0.0       1.0X
-OnHeapColumnVector                                   33             33           0      12488.2           0.1       0.0X
-OffHeapColumnVector                                  66             66           1       6198.2           0.2       0.0X
+ConstantColumnVector                                  1              1           0     632713.9           0.0       1.0X
+OnHeapColumnVector                                   33             34           0      12422.6           0.1       0.0X
+OffHeapColumnVector                                  67             68           1       6094.1           0.2       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with FloatType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     442449.7           0.0       1.0X
-OnHeapColumnVector                                   16             16           0      25047.7           0.0       0.1X
-OffHeapColumnVector                                 127            128           0       3216.3           0.3       0.0X
+ConstantColumnVector                                  1              1           0     457739.0           0.0       1.0X
+OnHeapColumnVector                                   16             16           0      25107.7           0.0       0.1X
+OffHeapColumnVector                                 129            129           0       3177.6           0.3       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with DoubleType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     491627.0           0.0       1.0X
-OnHeapColumnVector                                   33             33           0      12493.3           0.1       0.0X
-OffHeapColumnVector                                 129            129           0       3184.4           0.3       0.0X
+ConstantColumnVector                                  1              1           0     530954.4           0.0       1.0X
+OnHeapColumnVector                                   34             34           0      12039.3           0.1       0.0X
+OffHeapColumnVector                                 129            129           0       3168.0           0.3       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                   0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                   309            310           1       1324.1           0.8       0.0X
-OffHeapColumnVector                                 3767           3768           1        108.7           9.2       0.0X
+ConstantColumnVector                                   0              0           0   13274135.5           0.0       1.0X
+OnHeapColumnVector                                   105            106           1       3884.1           0.3       0.0X
+OffHeapColumnVector                                 6540           6543           4         62.6          16.0       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                   0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                  4118           4123           7         99.5          10.1       0.0X
-OffHeapColumnVector                                 3746           3755          13        109.3           9.1       0.0X
+ConstantColumnVector                                   0              0           0   13274135.5           0.0       1.0X
+OnHeapColumnVector                                  4074           4075           0        100.5           9.9       0.0X
+OffHeapColumnVector                                 6602           6610          12         62.0          16.1       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                   4114           4115           2         99.6          10.0       0.0X
-OffHeapColumnVector                                  3744           3763          27        109.4           9.1       0.0X
+ConstantColumnVector                                    0              0           0   13274135.5           0.0       1.0X
+OnHeapColumnVector                                   4052           4056           6        101.1           9.9       0.0X
+OffHeapColumnVector                                  6534           6537           5         62.7          16.0       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                   4107           4122          21         99.7          10.0       0.0X
-OffHeapColumnVector                                  3763           3779          21        108.8           9.2       0.0X
+ConstantColumnVector                                    0              0           0   13274135.5           0.0       1.0X
+OnHeapColumnVector                                   4056           4058           3        101.0           9.9       0.0X
+OffHeapColumnVector                                  6536           6541           7         62.7          16.0       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                   4102           4104           4         99.9          10.0       0.0X
-OffHeapColumnVector                                  3820           3824           7        107.2           9.3       0.0X
+ConstantColumnVector                                    0              0           0   13274135.5           0.0       1.0X
+OnHeapColumnVector                                   4046           4053           9        101.2           9.9       0.0X
+OffHeapColumnVector                                  6530           6531           1         62.7          15.9       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                   4246           4248           2         96.5          10.4       0.0X
-OffHeapColumnVector                                  3743           3777          48        109.4           9.1       0.0X
+ConstantColumnVector                                    0              0           0   13274135.5           0.0       1.0X
+OnHeapColumnVector                                   4059           4061           3        100.9           9.9       0.0X
+OffHeapColumnVector                                  6537           6538           2         62.7          16.0       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with IntegerType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0   26549131.4           0.0       1.0X
-OnHeapColumnVector                                    1              1           0     492066.4           0.0       0.0X
-OffHeapColumnVector                                 889            890           2        461.0           2.2       0.0X
+ConstantColumnVector                                  0              0           0   13274135.5           0.0       1.0X
+OnHeapColumnVector                                    1              1           0     474473.3           0.0       0.0X
+OffHeapColumnVector                                 893            894           1        458.6           2.2       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with LongType:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               1939           1940           2        211.3           4.7       1.0X
-OnHeapColumnVector                                 2075           2089          19        197.4           5.1       0.9X
-OffHeapColumnVector                                2601           2603           2        157.5           6.3       0.7X
+ConstantColumnVector                               1953           1955           2        209.7           4.8       1.0X
+OnHeapColumnVector                                 2072           2077           7        197.7           5.1       0.9X
+OffHeapColumnVector                                2604           2608           5        157.3           6.4       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with FloatType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               1846           1848           2        221.9           4.5       1.0X
-OnHeapColumnVector                                 2099           2101           4        195.2           5.1       0.9X
-OffHeapColumnVector                                2613           2638          35        156.7           6.4       0.7X
+ConstantColumnVector                               1845           1846           2        222.0           4.5       1.0X
+OnHeapColumnVector                                 2101           2103           4        195.0           5.1       0.9X
+OffHeapColumnVector                                2613           2615           3        156.8           6.4       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with DoubleType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               1986           1987           1        206.3           4.8       1.0X
-OnHeapColumnVector                                 2120           2121           1        193.2           5.2       0.9X
-OffHeapColumnVector                                2753           2753           0        148.8           6.7       0.7X
+ConstantColumnVector                               1985           1986           1        206.3           4.8       1.0X
+OnHeapColumnVector                                 2120           2123           4        193.2           5.2       0.9X
+OffHeapColumnVector                                2758           2762           5        148.5           6.7       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                          1943           1943           0        210.8           4.7       1.0X
-OnHeapColumnVector                                            5899           5903           5         69.4          14.4       0.3X
-OffHeapColumnVector                                           5086           5089           5         80.5          12.4       0.4X
+ConstantColumnVector                                          1948           1952           5        210.2           4.8       1.0X
+OnHeapColumnVector                                            5737           5746          13         71.4          14.0       0.3X
+OffHeapColumnVector                                           8493           8494           2         48.2          20.7       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                          1943           1943           1        210.8           4.7       1.0X
-OnHeapColumnVector                                            5919           5922           5         69.2          14.5       0.3X
-OffHeapColumnVector                                           5089           5096          10         80.5          12.4       0.4X
+ConstantColumnVector                                          1950           1951           1        210.0           4.8       1.0X
+OnHeapColumnVector                                            5657           5657           1         72.4          13.8       0.3X
+OffHeapColumnVector                                           8500           8502           3         48.2          20.8       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                           1940           1946           9        211.1           4.7       1.0X
-OnHeapColumnVector                                             5901           5907           8         69.4          14.4       0.3X
-OffHeapColumnVector                                            5132           5142          14         79.8          12.5       0.4X
+ConstantColumnVector                                           1948           1949           1        210.3           4.8       1.0X
+OnHeapColumnVector                                             5765           5765           1         71.0          14.1       0.3X
+OffHeapColumnVector                                            8512           8533          29         48.1          20.8       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                           1943           1944           2        210.8           4.7       1.0X
-OnHeapColumnVector                                             5913           5914           1         69.3          14.4       0.3X
-OffHeapColumnVector                                            5133           5159          37         79.8          12.5       0.4X
+ConstantColumnVector                                           1949           1950           0        210.1           4.8       1.0X
+OnHeapColumnVector                                             5660           5670          15         72.4          13.8       0.3X
+OffHeapColumnVector                                            8502           8505           4         48.2          20.8       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                           1945           1949           6        210.6           4.7       1.0X
-OnHeapColumnVector                                             5954           5955           2         68.8          14.5       0.3X
-OffHeapColumnVector                                            5081           5083           3         80.6          12.4       0.4X
+ConstantColumnVector                                           1952           1956           6        209.9           4.8       1.0X
+OnHeapColumnVector                                             5742           5745           3         71.3          14.0       0.3X
+OffHeapColumnVector                                            8555           8574          26         47.9          20.9       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                           1956           1957           2        209.4           4.8       1.0X
-OnHeapColumnVector                                             5956           5997          58         68.8          14.5       0.3X
-OffHeapColumnVector                                            5076           5077           1         80.7          12.4       0.4X
+ConstantColumnVector                                           1956           1957           0        209.4           4.8       1.0X
+OnHeapColumnVector                                             5657           5661           4         72.4          13.8       0.3X
+OffHeapColumnVector                                            8523           8539          23         48.1          20.8       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with IntegerType:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                888            888           0        461.4           2.2       1.0X
-OnHeapColumnVector                                  889            890           1        461.0           2.2       1.0X
-OffHeapColumnVector                                 888            889           1        461.3           2.2       1.0X
+ConstantColumnVector                                892            892           1        459.3           2.2       1.0X
+OnHeapColumnVector                                 1020           1021           1        401.5           2.5       0.9X
+OffHeapColumnVector                                 892            893           1        459.0           2.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with LongType:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               2850           2850           0        143.7           7.0       1.0X
-OnHeapColumnVector                                 2978           2978           1        137.6           7.3       1.0X
-OffHeapColumnVector                                2977           2978           1        137.6           7.3       1.0X
+ConstantColumnVector                               2866           2869           4        142.9           7.0       1.0X
+OnHeapColumnVector                                 2993           2994           0        136.8           7.3       1.0X
+OffHeapColumnVector                                2991           2993           3        137.0           7.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with FloatType:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               2867           2872           6        142.9           7.0       1.0X
-OnHeapColumnVector                                 2993           2994           1        136.8           7.3       1.0X
-OffHeapColumnVector                                2991           2995           5        136.9           7.3       1.0X
+ConstantColumnVector                               2877           2892          21        142.4           7.0       1.0X
+OnHeapColumnVector                                 3135           3136           3        130.7           7.7       0.9X
+OffHeapColumnVector                                3012           3013           1        136.0           7.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with DoubleType:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               3009           3011           3        136.1           7.3       1.0X
-OnHeapColumnVector                                 3137           3139           3        130.6           7.7       1.0X
-OffHeapColumnVector                                3141           3142           2        130.4           7.7       1.0X
+ConstantColumnVector                               2381           2381           0        172.1           5.8       1.0X
+OnHeapColumnVector                                 3157           3158           3        129.8           7.7       0.8X
+OffHeapColumnVector                                3148           3149           1        130.1           7.7       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with StringType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3321197.8           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    3321413.2           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with IntegerType:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3321197.8           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    3321440.2           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with LongType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3321197.8           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    3321440.2           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with FloatType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3321197.8           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    3321440.2           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with DoubleType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3321467.1           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    3321440.2           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
diff --git a/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt b/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt
index c381cbab325fc..39aedf6270830 100644
--- a/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt
+++ b/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt
@@ -1,280 +1,280 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    1              1           0     329000.5           0.0       1.0X
-OnHeapColumnVector                                   2882           2884           3        142.1           7.0       0.0X
-OffHeapColumnVector                                  3380           3382           2        121.2           8.3       0.0X
+ConstantColumnVector                                    1              1           0     324095.0           0.0       1.0X
+OnHeapColumnVector                                   2813           2814           2        145.6           6.9       0.0X
+OffHeapColumnVector                                  3407           3412           7        120.2           8.3       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    1              1           0     387377.7           0.0       1.0X
-OnHeapColumnVector                                   3661           3670          12        111.9           8.9       0.0X
-OffHeapColumnVector                                  4386           4388           3         93.4          10.7       0.0X
+ConstantColumnVector                                    1              1           0     382856.0           0.0       1.0X
+OnHeapColumnVector                                   4041           4044           4        101.4           9.9       0.0X
+OffHeapColumnVector                                  4288           4289           1         95.5          10.5       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     387399.7           0.0       1.0X
-OnHeapColumnVector                                    3915           3918           4        104.6           9.6       0.0X
-OffHeapColumnVector                                   4559           4560           2         89.8          11.1       0.0X
+ConstantColumnVector                                     1              1           0     383128.5           0.0       1.0X
+OnHeapColumnVector                                    4013           4014           2        102.1           9.8       0.0X
+OffHeapColumnVector                                   4353           4355           2         94.1          10.6       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     387538.2           0.0       1.0X
-OnHeapColumnVector                                    3628           3632           6        112.9           8.9       0.0X
-OffHeapColumnVector                                   4489           4490           2         91.2          11.0       0.0X
+ConstantColumnVector                                     1              1           0     383052.9           0.0       1.0X
+OnHeapColumnVector                                    3818           3820           3        107.3           9.3       0.0X
+OffHeapColumnVector                                   4644           4645           1         88.2          11.3       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     387487.6           0.0       1.0X
-OnHeapColumnVector                                    4219           4222           5         97.1          10.3       0.0X
-OffHeapColumnVector                                   4701           4702           2         87.1          11.5       0.0X
+ConstantColumnVector                                     1              1           0     383078.3           0.0       1.0X
+OnHeapColumnVector                                    4128           4139          16         99.2          10.1       0.0X
+OffHeapColumnVector                                   4602           4605           4         89.0          11.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     387405.9           0.0       1.0X
-OnHeapColumnVector                                    4336           4342           8         94.5          10.6       0.0X
-OffHeapColumnVector                                   4376           4376           0         93.6          10.7       0.0X
+ConstantColumnVector                                     1              1           0     383207.7           0.0       1.0X
+OnHeapColumnVector                                    4274           4280           8         95.8          10.4       0.0X
+OffHeapColumnVector                                   4583           4584           2         89.4          11.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with IntegerType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     699323.4           0.0       1.0X
-OnHeapColumnVector                                   16             16           0      25587.5           0.0       0.0X
-OffHeapColumnVector                                  65             65           0       6320.1           0.2       0.0X
+ConstantColumnVector                                  1              1           0     699303.1           0.0       1.0X
+OnHeapColumnVector                                   16             16           0      25461.3           0.0       0.0X
+OffHeapColumnVector                                  66             66           1       6242.8           0.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with LongType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     664355.9           0.0       1.0X
-OnHeapColumnVector                                   33             34           0      12331.0           0.1       0.0X
-OffHeapColumnVector                                  67             67           0       6114.9           0.2       0.0X
+ConstantColumnVector                                  1              1           0     664337.6           0.0       1.0X
+OnHeapColumnVector                                   34             34           0      12100.9           0.1       0.0X
+OffHeapColumnVector                                  68             69           0       5986.6           0.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with FloatType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     553059.9           0.0       1.0X
-OnHeapColumnVector                                   16             16           0      25179.1           0.0       0.0X
-OffHeapColumnVector                                 127            127           0       3217.6           0.3       0.0X
+ConstantColumnVector                                  1              1           0     553053.1           0.0       1.0X
+OnHeapColumnVector                                   16             16           0      25009.2           0.0       0.0X
+OffHeapColumnVector                                 128            128           0       3191.8           0.3       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write with DoubleType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     510543.7           0.0       1.0X
-OnHeapColumnVector                                   34             34           0      12081.9           0.1       0.0X
-OffHeapColumnVector                                 128            129           0       3191.4           0.3       0.0X
+ConstantColumnVector                                  1              1           0     510537.3           0.0       1.0X
+OnHeapColumnVector                                   34             35           0      11938.3           0.1       0.0X
+OffHeapColumnVector                                 129            130           0       3165.4           0.3       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                1041           1041           1        393.6           2.5       1.0X
-OnHeapColumnVector                                  2191           2191           0        186.9           5.3       0.5X
-OffHeapColumnVector                                 4378           4379           1         93.6          10.7       0.2X
+ConstantColumnVector                                1051           1051           1        389.8           2.6       1.0X
+OnHeapColumnVector                                  2133           2135           2        192.0           5.2       0.5X
+OffHeapColumnVector                                 4374           4376           2         93.6          10.7       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                 826            827           2        496.1           2.0       1.0X
-OnHeapColumnVector                                  4856           4859           4         84.4          11.9       0.2X
-OffHeapColumnVector                                 4645           4667          32         88.2          11.3       0.2X
+ConstantColumnVector                                 836            836           0        490.1           2.0       1.0X
+OnHeapColumnVector                                  4993           4994           0         82.0          12.2       0.2X
+OffHeapColumnVector                                 4488           4489           1         91.3          11.0       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  828            828           1        494.9           2.0       1.0X
-OnHeapColumnVector                                   4917           4918           2         83.3          12.0       0.2X
-OffHeapColumnVector                                  4624           4631           9         88.6          11.3       0.2X
+ConstantColumnVector                                  835            836           2        490.6           2.0       1.0X
+OnHeapColumnVector                                   5030           5032           3         81.4          12.3       0.2X
+OffHeapColumnVector                                  4509           4513           5         90.8          11.0       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  826            828           2        495.9           2.0       1.0X
-OnHeapColumnVector                                   4914           4917           5         83.4          12.0       0.2X
-OffHeapColumnVector                                  4635           4637           3         88.4          11.3       0.2X
+ConstantColumnVector                                  838            840           3        489.1           2.0       1.0X
+OnHeapColumnVector                                   5039           5045           8         81.3          12.3       0.2X
+OffHeapColumnVector                                  4522           4523           2         90.6          11.0       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  827            829           2        495.1           2.0       1.0X
-OnHeapColumnVector                                   4931           4932           1         83.1          12.0       0.2X
-OffHeapColumnVector                                  4642           4644           2         88.2          11.3       0.2X
+ConstantColumnVector                                  833            836           3        491.5           2.0       1.0X
+OnHeapColumnVector                                   5044           5045           0         81.2          12.3       0.2X
+OffHeapColumnVector                                  4500           4502           3         91.0          11.0       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  826            827           1        496.0           2.0       1.0X
-OnHeapColumnVector                                   4908           4921          19         83.5          12.0       0.2X
-OffHeapColumnVector                                  4627           4628           1         88.5          11.3       0.2X
+ConstantColumnVector                                  835            836           1        490.4           2.0       1.0X
+OnHeapColumnVector                                   5040           5042           3         81.3          12.3       0.2X
+OffHeapColumnVector                                  4499           4499           1         91.0          11.0       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with IntegerType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               1811           1811           0        226.2           4.4       1.0X
-OnHeapColumnVector                                 2128           2130           4        192.5           5.2       0.9X
-OffHeapColumnVector                                2340           2343           3        175.0           5.7       0.8X
+ConstantColumnVector                               2605           2605           1        157.3           6.4       1.0X
+OnHeapColumnVector                                 2723           2724           2        150.4           6.6       1.0X
+OffHeapColumnVector                                2729           2730           1        150.1           6.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with LongType:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0    2657221.1           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    1022070.8           0.0       0.4X
-OffHeapColumnVector                                 691            692           1        592.6           1.7       0.0X
+ConstantColumnVector                                  0              0           0    1476302.0           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    1022060.6           0.0       0.7X
+OffHeapColumnVector                                 767            767           0        534.2           1.9       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with FloatType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0    2214485.0           0.0       1.0X
-OnHeapColumnVector                                    0              1           0     949064.3           0.0       0.4X
-OffHeapColumnVector                                 767            769           3        533.8           1.9       0.0X
+ConstantColumnVector                                  0              0           0    1660780.7           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    1022032.6           0.0       0.6X
+OffHeapColumnVector                                 766            767           1        534.8           1.9       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test read with DoubleType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0    1022070.8           0.0       1.0X
-OnHeapColumnVector                                    1              1           0     738160.3           0.0       0.7X
-OffHeapColumnVector                                 762            762           0        537.5           1.9       0.0X
+ConstantColumnVector                                  0              0           0    1476307.4           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    1022058.1           0.0       0.7X
+OffHeapColumnVector                                 767            767           0        534.2           1.9       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                             0              0           0    1660794.1           0.0       1.0X
-OnHeapColumnVector                                            3784           3785           2        108.3           9.2       0.0X
-OffHeapColumnVector                                           3768           3782          20        108.7           9.2       0.0X
+ConstantColumnVector                                             0              0           0    1021777.6           0.0       1.0X
+OnHeapColumnVector                                            3918           3923           7        104.5           9.6       0.0X
+OffHeapColumnVector                                           3743           3752          12        109.4           9.1       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                             0              0           0    1660794.1           0.0       1.0X
-OnHeapColumnVector                                            3788           3808          28        108.1           9.2       0.0X
-OffHeapColumnVector                                           3680           3687          10        111.3           9.0       0.0X
+ConstantColumnVector                                             0              0           0     857165.6           0.0       1.0X
+OnHeapColumnVector                                            3933           3938           7        104.1           9.6       0.0X
+OffHeapColumnVector                                           3737           3748          16        109.6           9.1       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                              0              0           0    1660794.1           0.0       1.0X
-OnHeapColumnVector                                             3804           3807           5        107.7           9.3       0.0X
-OffHeapColumnVector                                            3712           3713           1        110.3           9.1       0.0X
+ConstantColumnVector                                              0              0           0     857165.6           0.0       1.0X
+OnHeapColumnVector                                             3930           3930           1        104.2           9.6       0.0X
+OffHeapColumnVector                                            3736           3736           1        109.6           9.1       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                              0              0           0    1660794.1           0.0       1.0X
-OnHeapColumnVector                                             3801           3802           2        107.8           9.3       0.0X
-OffHeapColumnVector                                            3704           3704           1        110.6           9.0       0.0X
+ConstantColumnVector                                              0              0           0     857165.6           0.0       1.0X
+OnHeapColumnVector                                             3922           3923           1        104.4           9.6       0.0X
+OffHeapColumnVector                                            3742           3743           1        109.5           9.1       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                              0              0           0    1660794.1           0.0       1.0X
-OnHeapColumnVector                                             3795           3797           2        107.9           9.3       0.0X
-OffHeapColumnVector                                            3703           3715          16        110.6           9.0       0.0X
+ConstantColumnVector                                              0              0           0     857165.6           0.0       1.0X
+OnHeapColumnVector                                             3920           3926           8        104.5           9.6       0.0X
+OffHeapColumnVector                                            3745           3753          12        109.4           9.1       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                              0              0           0    1660794.1           0.0       1.0X
-OnHeapColumnVector                                             3794           3797           4        108.0           9.3       0.0X
-OffHeapColumnVector                                            3719           3720           1        110.1           9.1       0.0X
+ConstantColumnVector                                              0              0           0     857183.5           0.0       1.0X
+OnHeapColumnVector                                             3920           3926           9        104.5           9.6       0.0X
+OffHeapColumnVector                                            3723           3725           3        110.0           9.1       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with IntegerType:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               3673           3675           3        111.5           9.0       1.0X
-OnHeapColumnVector                                 2448           2450           3        167.3           6.0       1.5X
-OffHeapColumnVector                                2585           2585           1        158.5           6.3       1.4X
+ConstantColumnVector                               3097           3099           3        132.3           7.6       1.0X
+OnHeapColumnVector                                 2732           2733           1        149.9           6.7       1.1X
+OffHeapColumnVector                                2741           2742           1        149.4           6.7       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with LongType:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0    2657221.1           0.0       1.0X
-OnHeapColumnVector                                  651            652           1        629.3           1.6       0.0X
-OffHeapColumnVector                                 691            692           1        592.4           1.7       0.0X
+ConstantColumnVector                                765            766           1        535.4           1.9       1.0X
+OnHeapColumnVector                                  774            774           1        529.3           1.9       1.0X
+OffHeapColumnVector                                 830            831           2        493.6           2.0       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with FloatType:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                887            888           1        461.6           2.2       1.0X
-OnHeapColumnVector                                  764            764           0        535.9           1.9       1.2X
-OffHeapColumnVector                                 762            763           1        537.5           1.9       1.2X
+ConstantColumnVector                                765            768           3        535.2           1.9       1.0X
+OnHeapColumnVector                                  772            773           1        530.4           1.9       1.0X
+OffHeapColumnVector                                 831            832           1        492.7           2.0       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with DoubleType:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                761            761           0        538.5           1.9       1.0X
-OnHeapColumnVector                                  765            765           1        535.7           1.9       1.0X
-OffHeapColumnVector                                 763            763           1        537.2           1.9       1.0X
+ConstantColumnVector                                892            893           1        459.2           2.2       1.0X
+OnHeapColumnVector                                  774            775           1        528.9           1.9       1.2X
+OffHeapColumnVector                                 831            831           0        493.0           2.0       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with StringType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  409190809.2           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211973.6           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  409190809.2           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with IntegerType:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  409190809.2           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211985.5           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  409190809.2           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with LongType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  409190809.2           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211985.5           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  409190809.2           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with FloatType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  409190809.2           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211985.5           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  409190809.2           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with DoubleType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  409190809.2           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211985.5           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  409190809.2           0.0       1.0X
 
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt
index bdc453db1735d..ea578d9f6d8aa 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt
@@ -2,437 +2,437 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BOOLEAN Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10214          10246          45          1.5         649.4       1.0X
-SQL Json                                           7831           7865          48          2.0         497.9       1.3X
-SQL Json with UnsafeRow                            8565           8571           8          1.8         544.6       1.2X
-SQL Parquet Vectorized: DataPageV1                   81             96          11        193.3           5.2     125.6X
-SQL Parquet Vectorized: DataPageV2                  201            210           8         78.4          12.8      50.9X
-SQL Parquet MR: DataPageV1                         1794           1818          34          8.8         114.1       5.7X
-SQL Parquet MR: DataPageV2                         1650           1651           1          9.5         104.9       6.2X
-SQL ORC Vectorized                                  120            132           8        130.5           7.7      84.8X
-SQL ORC MR                                         1447           1453           9         10.9          92.0       7.1X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           10281          10314          46          1.5         653.6       1.0X
+SQL Json                                           7952           8108         220          2.0         505.6       1.3X
+SQL Json with UnsafeRow                            9090           9092           3          1.7         577.9       1.1X
+SQL Parquet Vectorized: DataPageV1                   82             94          10        192.5           5.2     125.8X
+SQL Parquet Vectorized: DataPageV2                   92             99           8        171.9           5.8     112.3X
+SQL Parquet MR: DataPageV1                         1701           1728          38          9.2         108.2       6.0X
+SQL Parquet MR: DataPageV2                         1594           1607          19          9.9         101.3       6.5X
+SQL ORC Vectorized                                  137            142           6        114.9           8.7      75.1X
+SQL ORC MR                                         1464           1465           2         10.7          93.1       7.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single BOOLEAN Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                    84             86           1        187.3           5.3       1.0X
-ParquetReader Vectorized: DataPageV2                   208            211           4         75.7          13.2       0.4X
-ParquetReader Vectorized -> Row: DataPageV1             72             73           1        219.2           4.6       1.2X
-ParquetReader Vectorized -> Row: DataPageV2            199            201           4         79.2          12.6       0.4X
+ParquetReader Vectorized: DataPageV1                    84             86           2        186.8           5.4       1.0X
+ParquetReader Vectorized: DataPageV2                   100            101           1        157.9           6.3       0.8X
+ParquetReader Vectorized -> Row: DataPageV1             73             74           1        216.3           4.6       1.2X
+ParquetReader Vectorized -> Row: DataPageV2             90             91           1        175.2           5.7       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9574           9607          46          1.6         608.7       1.0X
-SQL Json                                           8719           8757          55          1.8         554.3       1.1X
-SQL Json with UnsafeRow                            9120           9130          13          1.7         579.9       1.0X
-SQL Parquet Vectorized: DataPageV1                   95            101           5        164.9           6.1     100.4X
-SQL Parquet Vectorized: DataPageV2                   95            104           8        165.3           6.0     100.6X
-SQL Parquet MR: DataPageV1                         1927           1938          15          8.2         122.5       5.0X
-SQL Parquet MR: DataPageV2                         1792           1851          84          8.8         114.0       5.3X
-SQL ORC Vectorized                                  110            118           7        143.1           7.0      87.1X
-SQL ORC MR                                         1579           1582           4         10.0         100.4       6.1X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                            9866           9904          53          1.6         627.3       1.0X
+SQL Json                                           9122           9125           5          1.7         579.9       1.1X
+SQL Json with UnsafeRow                           10109          10124          20          1.6         642.7       1.0X
+SQL Parquet Vectorized: DataPageV1                   96            104           8        163.5           6.1     102.6X
+SQL Parquet Vectorized: DataPageV2                   98            111           8        160.7           6.2     100.8X
+SQL Parquet MR: DataPageV1                         1870           1883          19          8.4         118.9       5.3X
+SQL Parquet MR: DataPageV2                         1857           1895          54          8.5         118.1       5.3X
+SQL ORC Vectorized                                  139            149          15        113.1           8.8      70.9X
+SQL ORC MR                                         1588           1591           4          9.9         101.0       6.2X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single TINYINT Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                    80             83           2        196.0           5.1       1.0X
-ParquetReader Vectorized: DataPageV2                    81             83           1        194.9           5.1       1.0X
-ParquetReader Vectorized -> Row: DataPageV1             44             46           2        353.7           2.8       1.8X
-ParquetReader Vectorized -> Row: DataPageV2             45             46           1        352.4           2.8       1.8X
+ParquetReader Vectorized: DataPageV1                    82             84           2        191.5           5.2       1.0X
+ParquetReader Vectorized: DataPageV2                    85             98           7        184.5           5.4       1.0X
+ParquetReader Vectorized -> Row: DataPageV1             46             51           6        341.6           2.9       1.8X
+ParquetReader Vectorized -> Row: DataPageV2             46             50           5        339.9           2.9       1.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10409          10436          39          1.5         661.8       1.0X
-SQL Json                                           8942           8944           4          1.8         568.5       1.2X
-SQL Json with UnsafeRow                            9693           9697           5          1.6         616.3       1.1X
-SQL Parquet Vectorized: DataPageV1                  118            134          17        133.8           7.5      88.5X
-SQL Parquet Vectorized: DataPageV2                  139            152          16        113.5           8.8      75.1X
-SQL Parquet MR: DataPageV1                         2019           2054          50          7.8         128.4       5.2X
-SQL Parquet MR: DataPageV2                         2011           2011           0          7.8         127.9       5.2X
-SQL ORC Vectorized                                  140            148           8        112.1           8.9      74.2X
-SQL ORC MR                                         1818           1825          10          8.7         115.6       5.7X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           10575          10586          14          1.5         672.4       1.0X
+SQL Json                                           9463           9503          57          1.7         601.6       1.1X
+SQL Json with UnsafeRow                           10388          10399          15          1.5         660.5       1.0X
+SQL Parquet Vectorized: DataPageV1                  118            131          14        133.4           7.5      89.7X
+SQL Parquet Vectorized: DataPageV2                  140            183          19        112.4           8.9      75.6X
+SQL Parquet MR: DataPageV1                         2010           2013           4          7.8         127.8       5.3X
+SQL Parquet MR: DataPageV2                         2018           2038          28          7.8         128.3       5.2X
+SQL ORC Vectorized                                  139            172          28        113.3           8.8      76.2X
+SQL ORC MR                                         1687           1701          20          9.3         107.3       6.3X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   138            146           9        113.8           8.8       1.0X
-ParquetReader Vectorized: DataPageV2                   169            176          10         93.2          10.7       0.8X
-ParquetReader Vectorized -> Row: DataPageV1            134            139           5        117.0           8.5       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            183            186           5         86.1          11.6       0.8X
+ParquetReader Vectorized: DataPageV1                   149            155           5        105.3           9.5       1.0X
+ParquetReader Vectorized: DataPageV2                   178            184           7         88.2          11.3       0.8X
+ParquetReader Vectorized -> Row: DataPageV1            135            140           5        116.9           8.6       1.1X
+ParquetReader Vectorized -> Row: DataPageV2            166            176          10         95.0          10.5       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11428          11435           9          1.4         726.6       1.0X
-SQL Json                                           9048           9074          37          1.7         575.2       1.3X
-SQL Json with UnsafeRow                            9790           9800          14          1.6         622.4       1.2X
-SQL Parquet Vectorized: DataPageV1                   97            110          13        162.2           6.2     117.8X
-SQL Parquet Vectorized: DataPageV2                  176            197          18         89.2          11.2      64.8X
-SQL Parquet MR: DataPageV1                         1974           1978           6          8.0         125.5       5.8X
-SQL Parquet MR: DataPageV2                         2028           2031           5          7.8         128.9       5.6X
-SQL ORC Vectorized                                  177            201          27         89.0          11.2      64.6X
-SQL ORC MR                                         2053           2059           9          7.7         130.5       5.6X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           11729          11735          10          1.3         745.7       1.0X
+SQL Json                                           9804           9835          43          1.6         623.3       1.2X
+SQL Json with UnsafeRow                           10754          10760           9          1.5         683.7       1.1X
+SQL Parquet Vectorized: DataPageV1                   97            113          14        162.9           6.1     121.5X
+SQL Parquet Vectorized: DataPageV2                  176            191          12         89.3          11.2      66.6X
+SQL Parquet MR: DataPageV1                         1949           1973          34          8.1         123.9       6.0X
+SQL Parquet MR: DataPageV2                         2019           2034          21          7.8         128.4       5.8X
+SQL ORC Vectorized                                  180            190          17         87.6          11.4      65.3X
+SQL ORC MR                                         1692           1707          22          9.3         107.5       6.9X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single INT Column Scan:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   158            162           5         99.5          10.0       1.0X
-ParquetReader Vectorized: DataPageV2                   237            248          18         66.4          15.1       0.7X
-ParquetReader Vectorized -> Row: DataPageV1            128            134           7        122.5           8.2       1.2X
-ParquetReader Vectorized -> Row: DataPageV2            209            216           6         75.3          13.3       0.8X
+ParquetReader Vectorized: DataPageV1                   130            138           6        120.9           8.3       1.0X
+ParquetReader Vectorized: DataPageV2                   214            219           6         73.6          13.6       0.6X
+ParquetReader Vectorized -> Row: DataPageV1            129            133           5        122.0           8.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            225            246          24         69.8          14.3       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11758          11763           8          1.3         747.6       1.0X
-SQL Json                                           9255           9264          12          1.7         588.4       1.3X
-SQL Json with UnsafeRow                            9871           9876           6          1.6         627.6       1.2X
-SQL Parquet Vectorized: DataPageV1                  286            308          13         54.9          18.2      41.1X
-SQL Parquet Vectorized: DataPageV2                  238            269          14         66.0          15.2      49.3X
-SQL Parquet MR: DataPageV1                         2493           2494           1          6.3         158.5       4.7X
-SQL Parquet MR: DataPageV2                         2053           2054           2          7.7         130.5       5.7X
-SQL ORC Vectorized                                  165            174          10         95.5          10.5      71.4X
-SQL ORC MR                                         1821           1822           1          8.6         115.8       6.5X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           11573          11671         139          1.4         735.8       1.0X
+SQL Json                                           9549           9558          12          1.6         607.1       1.2X
+SQL Json with UnsafeRow                           10532          10532           1          1.5         669.6       1.1X
+SQL Parquet Vectorized: DataPageV1                  279            300          17         56.3          17.8      41.4X
+SQL Parquet Vectorized: DataPageV2                  248            272          11         63.5          15.7      46.7X
+SQL Parquet MR: DataPageV1                         2453           2454           2          6.4         156.0       4.7X
+SQL Parquet MR: DataPageV2                         1991           1997           8          7.9         126.6       5.8X
+SQL ORC Vectorized                                  166            179          12         94.5          10.6      69.5X
+SQL ORC MR                                         1773           1776           4          8.9         112.7       6.5X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single BIGINT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   305            313          10         51.6          19.4       1.0X
-ParquetReader Vectorized: DataPageV2                   258            270          15         60.8          16.4       1.2X
-ParquetReader Vectorized -> Row: DataPageV1            317            319           3         49.6          20.2       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            254            268           9         61.9          16.2       1.2X
+ParquetReader Vectorized: DataPageV1                   306            309           3         51.5          19.4       1.0X
+ParquetReader Vectorized: DataPageV2                   278            284           6         56.5          17.7       1.1X
+ParquetReader Vectorized -> Row: DataPageV1            317            323           6         49.6          20.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            262            272           9         60.1          16.6       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11470          11490          28          1.4         729.3       1.0X
-SQL Json                                          10456          10495          56          1.5         664.8       1.1X
-SQL Json with UnsafeRow                           11508          11514          10          1.4         731.6       1.0X
-SQL Parquet Vectorized: DataPageV1                   85            101          17        185.0           5.4     134.9X
-SQL Parquet Vectorized: DataPageV2                   84             96          12        187.7           5.3     136.9X
-SQL Parquet MR: DataPageV1                         2003           2039          51          7.9         127.3       5.7X
-SQL Parquet MR: DataPageV2                         1969           1969           1          8.0         125.2       5.8X
-SQL ORC Vectorized                                  239            248          14         65.9          15.2      48.0X
-SQL ORC MR                                         1782           1791          13          8.8         113.3       6.4X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           11778          11795          24          1.3         748.8       1.0X
+SQL Json                                          11267          11356         127          1.4         716.3       1.0X
+SQL Json with UnsafeRow                           12181          12204          32          1.3         774.5       1.0X
+SQL Parquet Vectorized: DataPageV1                   84             99          14        187.8           5.3     140.6X
+SQL Parquet Vectorized: DataPageV2                   83             96          16        189.9           5.3     142.2X
+SQL Parquet MR: DataPageV1                         2002           2005           4          7.9         127.3       5.9X
+SQL Parquet MR: DataPageV2                         1943           1971          40          8.1         123.5       6.1X
+SQL ORC Vectorized                                  220            243          21         71.6          14.0      53.6X
+SQL ORC MR                                         1680           1688          11          9.4         106.8       7.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single FLOAT Column Scan:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   145            153          12        108.7           9.2       1.0X
-ParquetReader Vectorized: DataPageV2                   143            149           7        110.0           9.1       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            136            143           8        115.2           8.7       1.1X
-ParquetReader Vectorized -> Row: DataPageV2            135            141           6        116.3           8.6       1.1X
+ParquetReader Vectorized: DataPageV1                   135            152          37        116.7           8.6       1.0X
+ParquetReader Vectorized: DataPageV2                   132            138           6        119.0           8.4       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            129            135           5        121.8           8.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            145            147           2        108.3           9.2       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11799          11829          43          1.3         750.1       1.0X
-SQL Json                                          11125          11128           3          1.4         707.3       1.1X
-SQL Json with UnsafeRow                           11800          11815          22          1.3         750.2       1.0X
-SQL Parquet Vectorized: DataPageV1                  266            288          20         59.1          16.9      44.4X
-SQL Parquet Vectorized: DataPageV2                  263            286          14         59.7          16.8      44.8X
-SQL Parquet MR: DataPageV1                         2457           2472          22          6.4         156.2       4.8X
-SQL Parquet MR: DataPageV2                         2414           2423          13          6.5         153.5       4.9X
-SQL ORC Vectorized                                  576            581           9         27.3          36.6      20.5X
-SQL ORC MR                                         2192           2197           7          7.2         139.4       5.4X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           12383          12385           2          1.3         787.3       1.0X
+SQL Json                                          11720          11726           8          1.3         745.1       1.1X
+SQL Json with UnsafeRow                           12528          12562          47          1.3         796.5       1.0X
+SQL Parquet Vectorized: DataPageV1                  279            301          19         56.3          17.8      44.3X
+SQL Parquet Vectorized: DataPageV2                  267            288          14         58.9          17.0      46.3X
+SQL Parquet MR: DataPageV1                         2421           2431          14          6.5         154.0       5.1X
+SQL Parquet MR: DataPageV2                         2354           2382          39          6.7         149.7       5.3X
+SQL ORC Vectorized                                  585            598          16         26.9          37.2      21.2X
+SQL ORC MR                                         2199           2199           0          7.2         139.8       5.6X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single DOUBLE Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   341            346           6         46.2          21.6       1.0X
-ParquetReader Vectorized: DataPageV2                   351            358           5         44.8          22.3       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            324            331           6         48.5          20.6       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            323            326           4         48.7          20.5       1.1X
+ParquetReader Vectorized: DataPageV1                   334            342           8         47.1          21.2       1.0X
+ParquetReader Vectorized: DataPageV2                   334            338           5         47.1          21.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            333            336           5         47.2          21.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            335            338           2         46.9          21.3       1.0X
 
 
 ================================================================================================
 SQL Single Numeric Column Scan in Struct
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan in Struct:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2295           2333          53          6.9         145.9       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2261           2268          10          7.0         143.8       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             128            136          11        122.7           8.2      17.9X
-SQL Parquet MR: DataPageV1                                            2378           2387          13          6.6         151.2       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2801           2804           5          5.6         178.1       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             103            119          19        152.5           6.6      22.3X
-SQL Parquet MR: DataPageV2                                            2295           2312          25          6.9         145.9       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2669           2679          14          5.9         169.7       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             104            114          13        150.9           6.6      22.0X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL ORC MR                                                            2168           2196          39          7.3         137.9       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2168           2173           7          7.3         137.8       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             146            152          12        107.9           9.3      14.9X
+SQL Parquet MR: DataPageV1                                            2344           2367          33          6.7         149.0       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2798           2805           9          5.6         177.9       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             107            126          20        147.6           6.8      20.4X
+SQL Parquet MR: DataPageV2                                            2289           2318          41          6.9         145.5       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2687           2690           5          5.9         170.8       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             105            117          18        149.6           6.7      20.6X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan in Struct:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2265           2302          52          6.9         144.0       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2221           2276          78          7.1         141.2       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             261            274          21         60.3          16.6       8.7X
-SQL Parquet MR: DataPageV1                                            2435           2440           6          6.5         154.8       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2972           2982          15          5.3         188.9       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             110            121          14        143.6           7.0      20.7X
-SQL Parquet MR: DataPageV2                                            2429           2437          12          6.5         154.4       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2882           2884           4          5.5         183.2       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             142            160          21        110.5           9.0      15.9X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL ORC MR                                                            2155           2166          15          7.3         137.0       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2139           2150          17          7.4         136.0       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             276            283          12         57.0          17.6       7.8X
+SQL Parquet MR: DataPageV1                                            2477           2489          17          6.4         157.5       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2995           3013          26          5.3         190.4       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             113            148          19        139.1           7.2      19.1X
+SQL Parquet MR: DataPageV2                                            2394           2401          10          6.6         152.2       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2943           2994          73          5.3         187.1       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             139            159          21        113.1           8.8      15.5X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan in Struct:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2182           2205          32          7.2         138.7       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2192           2223          45          7.2         139.3       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             284            293          14         55.4          18.1       7.7X
-SQL Parquet MR: DataPageV1                                            2445           2464          26          6.4         155.4       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3037           3038           2          5.2         193.1       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             113            116           2        138.7           7.2      19.2X
-SQL Parquet MR: DataPageV2                                            2437           2448          17          6.5         154.9       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3036           3037           1          5.2         193.0       0.7X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             265            271           5         59.4          16.8       8.2X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL ORC MR                                                            2146           2196          72          7.3         136.4       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2099           2111          17          7.5         133.5       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             302            322          17         52.1          19.2       7.1X
+SQL Parquet MR: DataPageV1                                            2420           2446          36          6.5         153.9       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2844           2849           6          5.5         180.8       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             106            118          13        148.3           6.7      20.2X
+SQL Parquet MR: DataPageV2                                            2372           2383          14          6.6         150.8       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2871           2880          12          5.5         182.5       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             267            279          15         58.8          17.0       8.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2185           2193          12          7.2         138.9       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2202           2216          19          7.1         140.0       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             283            298          14         55.5          18.0       7.7X
-SQL Parquet MR: DataPageV1                                            2872           2882          14          5.5         182.6       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3376           3392          23          4.7         214.7       0.6X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             320            329           8         49.2          20.3       6.8X
-SQL Parquet MR: DataPageV2                                            2512           2518           9          6.3         159.7       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3009           3010           2          5.2         191.3       0.7X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             291            298          11         54.1          18.5       7.5X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL ORC MR                                                            2138           2162          35          7.4         135.9       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2118           2125          10          7.4         134.6       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             305            310           4         51.5          19.4       7.0X
+SQL Parquet MR: DataPageV1                                            2786           2802          23          5.6         177.1       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3459           3460           1          4.5         219.9       0.6X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             323            328           5         48.7          20.5       6.6X
+SQL Parquet MR: DataPageV2                                            2403           2419          22          6.5         152.8       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2896           2921          35          5.4         184.1       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             269            296          17         58.4          17.1       7.9X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan in Struct:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2205           2207           4          7.1         140.2       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2238           2243           7          7.0         142.3       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             346            374          27         45.5          22.0       6.4X
-SQL Parquet MR: DataPageV1                                            2463           2465           2          6.4         156.6       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3059           3060           2          5.1         194.5       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)              91            109          17        172.3           5.8      24.2X
-SQL Parquet MR: DataPageV2                                            2419           2446          37          6.5         153.8       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3078           3084           9          5.1         195.7       0.7X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)              91            108          16        172.9           5.8      24.2X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL ORC MR                                                            2198           2213          21          7.2         139.7       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2184           2219          49          7.2         138.9       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             360            374          25         43.7          22.9       6.1X
+SQL Parquet MR: DataPageV1                                            2434           2445          16          6.5         154.7       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3003           3008           7          5.2         191.0       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             106            115           6        148.1           6.8      20.7X
+SQL Parquet MR: DataPageV2                                            2354           2357           4          6.7         149.7       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2847           2860          17          5.5         181.0       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)              91            103           6        171.9           5.8      24.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2639           2643           6          6.0         167.8       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2676           2677           1          5.9         170.1       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             700            703           4         22.5          44.5       3.8X
-SQL Parquet MR: DataPageV1                                            2949           2962          17          5.3         187.5       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3307           3315          12          4.8         210.2       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             310            324          17         50.7          19.7       8.5X
-SQL Parquet MR: DataPageV2                                            2785           2810          36          5.6         177.0       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3269           3269           1          4.8         207.8       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             310            324          11         50.7          19.7       8.5X
+SQL ORC MR                                                            2598           2614          23          6.1         165.2       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2566           2583          24          6.1         163.1       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             713            720          11         22.0          45.4       3.6X
+SQL Parquet MR: DataPageV1                                            2767           2850         119          5.7         175.9       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3267           3280          18          4.8         207.7       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             284            294          16         55.4          18.1       9.1X
+SQL Parquet MR: DataPageV2                                            2713           2727          20          5.8         172.5       1.0X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3235           3237           2          4.9         205.7       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             281            314          23         55.9          17.9       9.2X
 
 
 ================================================================================================
 SQL Nested Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Nested Column Scan:                                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                           12995          13153         131          0.1       12393.4       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                          13011          13181         142          0.1       12408.4       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                            7084           7096          11          0.1        6755.6       1.8X
-SQL Parquet MR: DataPageV1                                            9427           9453          27          0.1        8990.6       1.4X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           9722           9802          39          0.1        9271.2       1.3X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            5931           6030          41          0.2        5656.2       2.2X
-SQL Parquet MR: DataPageV2                                            9704           9744          59          0.1        9254.3       1.3X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)          10391          10496          55          0.1        9909.7       1.3X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            5687           5729          23          0.2        5423.2       2.3X
+SQL ORC MR                                                           13204          13257          72          0.1       12592.7       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                          13023          13064          43          0.1       12419.4       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                            7170           7182          15          0.1        6837.7       1.8X
+SQL Parquet MR: DataPageV1                                            9320           9408          68          0.1        8887.8       1.4X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           9632           9684          27          0.1        9186.0       1.4X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            5954           6007          35          0.2        5678.3       2.2X
+SQL Parquet MR: DataPageV2                                            9823           9976         213          0.1        9368.0       1.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)          10198          10460         203          0.1        9725.6       1.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            5708           5778          39          0.2        5443.9       2.3X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10241          10290          70          1.0         976.6       1.0X
-SQL Json                                           9827           9840          19          1.1         937.1       1.0X
-SQL Parquet Vectorized: DataPageV1                 1711           1736          35          6.1         163.2       6.0X
-SQL Parquet Vectorized: DataPageV2                 1912           1916           6          5.5         182.3       5.4X
-SQL Parquet MR: DataPageV1                         4027           4028           1          2.6         384.1       2.5X
-SQL Parquet MR: DataPageV2                         3967           3967           1          2.6         378.3       2.6X
-SQL ORC Vectorized                                 1819           1845          37          5.8         173.5       5.6X
-SQL ORC MR                                         3460           3468          11          3.0         330.0       3.0X
+SQL CSV                                           10885          10952          95          1.0        1038.0       1.0X
+SQL Json                                          10052          10073          30          1.0         958.6       1.1X
+SQL Parquet Vectorized: DataPageV1                 1759           1768          13          6.0         167.7       6.2X
+SQL Parquet Vectorized: DataPageV2                 1974           1974           1          5.3         188.2       5.5X
+SQL Parquet MR: DataPageV1                         3896           3902           9          2.7         371.6       2.8X
+SQL Parquet MR: DataPageV2                         3869           3895          36          2.7         369.0       2.8X
+SQL ORC Vectorized                                 1823           1848          35          5.8         173.8       6.0X
+SQL ORC MR                                         3507           3524          24          3.0         334.4       3.1X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5689           5724          49          1.8         542.6       1.0X
-SQL Json                                           6157           6173          22          1.7         587.1       0.9X
-SQL Parquet Vectorized: DataPageV1                  465            470           4         22.5          44.4      12.2X
-SQL Parquet Vectorized: DataPageV2                  459            460           1         22.8          43.8      12.4X
-SQL Parquet MR: DataPageV1                         1551           1558          10          6.8         147.9       3.7X
-SQL Parquet MR: DataPageV2                         1501           1506           7          7.0         143.2       3.8X
-SQL ORC Vectorized                                  366            369           3         28.7          34.9      15.5X
-SQL ORC MR                                         1703           1740          51          6.2         162.4       3.3X
+SQL CSV                                            5972           5973           2          1.8         569.6       1.0X
+SQL Json                                           6515           6538          32          1.6         621.4       0.9X
+SQL Parquet Vectorized: DataPageV1                  481            499          18         21.8          45.9      12.4X
+SQL Parquet Vectorized: DataPageV2                  482            495          16         21.8          46.0      12.4X
+SQL Parquet MR: DataPageV1                         1603           1624          29          6.5         152.9       3.7X
+SQL Parquet MR: DataPageV2                         1543           1554          16          6.8         147.2       3.9X
+SQL ORC Vectorized                                  378            383           5         27.7          36.1      15.8X
+SQL ORC MR                                         1747           1750           3          6.0         166.6       3.4X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Data column - CSV                                          10920          10985          92          1.4         694.2       1.0X
-Data column - Json                                          9064           9065           2          1.7         576.3       1.2X
-Data column - Parquet Vectorized: DataPageV1                 117            124           6        134.6           7.4      93.5X
-Data column - Parquet Vectorized: DataPageV2                 223            239          14         70.5          14.2      49.0X
-Data column - Parquet MR: DataPageV1                        2287           2295          12          6.9         145.4       4.8X
-Data column - Parquet MR: DataPageV2                        2302           2305           4          6.8         146.4       4.7X
-Data column - ORC Vectorized                                 179            191          20         87.9          11.4      61.0X
-Data column - ORC MR                                        2135           2161          36          7.4         135.8       5.1X
-Partition column - CSV                                      3806           3806           0          4.1         242.0       2.9X
-Partition column - Json                                     8340           8352          16          1.9         530.3       1.3X
-Partition column - Parquet Vectorized: DataPageV1             30             34           6        529.7           1.9     367.7X
-Partition column - Parquet Vectorized: DataPageV2             29             34           6        549.7           1.8     381.6X
-Partition column - Parquet MR: DataPageV1                   1425           1435          14         11.0          90.6       7.7X
-Partition column - Parquet MR: DataPageV2                   1414           1428          20         11.1          89.9       7.7X
-Partition column - ORC Vectorized                             30             33           5        525.5           1.9     364.8X
-Partition column - ORC MR                                   1284           1293          13         12.3          81.6       8.5X
-Both columns - CSV                                         11211          11232          30          1.4         712.8       1.0X
-Both columns - Json                                         9167           9184          24          1.7         582.8       1.2X
-Both columns - Parquet Vectorized: DataPageV1                153            167          13        102.5           9.8      71.2X
-Both columns - Parquet Vectorized: DataPageV2                267            298          31         58.8          17.0      40.8X
-Both columns - Parquet MR: DataPageV1                       2567           2611          62          6.1         163.2       4.3X
-Both columns - Parquet MR: DataPageV2                       2647           2659          17          5.9         168.3       4.1X
-Both columns - ORC Vectorized                                178            200          26         88.3          11.3      61.3X
-Both columns - ORC MR                                       2119           2131          17          7.4         134.7       5.2X
+Data column - CSV                                          11700          11730          43          1.3         743.9       1.0X
+Data column - Json                                          9276           9304          40          1.7         589.8       1.3X
+Data column - Parquet Vectorized: DataPageV1                 102            131          25        154.9           6.5     115.2X
+Data column - Parquet Vectorized: DataPageV2                 220            252          37         71.6          14.0      53.3X
+Data column - Parquet MR: DataPageV1                        2276           2345          97          6.9         144.7       5.1X
+Data column - Parquet MR: DataPageV2                        2205           2216          15          7.1         140.2       5.3X
+Data column - ORC Vectorized                                 178            189          13         88.4          11.3      65.8X
+Data column - ORC MR                                        1942           1952          14          8.1         123.5       6.0X
+Partition column - CSV                                      3761           3778          24          4.2         239.1       3.1X
+Partition column - Json                                     8482           8581         141          1.9         539.3       1.4X
+Partition column - Parquet Vectorized: DataPageV1             30             37           8        528.0           1.9     392.7X
+Partition column - Parquet Vectorized: DataPageV2             28             35           7        561.2           1.8     417.5X
+Partition column - Parquet MR: DataPageV1                   1184           1185           2         13.3          75.3       9.9X
+Partition column - Parquet MR: DataPageV2                   1179           1228          69         13.3          74.9       9.9X
+Partition column - ORC Vectorized                             30             33           6        531.9           1.9     395.6X
+Partition column - ORC MR                                   1209           1211           3         13.0          76.9       9.7X
+Both columns - CSV                                         11640          11652          17          1.4         740.0       1.0X
+Both columns - Json                                         9733           9757          34          1.6         618.8       1.2X
+Both columns - Parquet Vectorized: DataPageV1                141            162          15        111.2           9.0      82.7X
+Both columns - Parquet Vectorized: DataPageV2                269            288          24         58.4          17.1      43.4X
+Both columns - Parquet MR: DataPageV1                       2487           2500          18          6.3         158.1       4.7X
+Both columns - Parquet MR: DataPageV2                       2441           2489          68          6.4         155.2       4.8X
+Both columns - ORC Vectorized                                203            214          16         77.6          12.9      57.8X
+Both columns - ORC MR                                       2001           2006           7          7.9         127.2       5.8X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            7385           7393          11          1.4         704.3       1.0X
-SQL Json                                           8624           8638          20          1.2         822.4       0.9X
-SQL Parquet Vectorized: DataPageV1                 1123           1130          10          9.3         107.1       6.6X
-SQL Parquet Vectorized: DataPageV2                 1398           1403           6          7.5         133.3       5.3X
-SQL Parquet MR: DataPageV1                         3770           3795          35          2.8         359.6       2.0X
-SQL Parquet MR: DataPageV2                         3738           3769          43          2.8         356.5       2.0X
-ParquetReader Vectorized: DataPageV1                753            760           7         13.9          71.8       9.8X
-ParquetReader Vectorized: DataPageV2               1084           1095          16          9.7         103.3       6.8X
-SQL ORC Vectorized                                  818            836          23         12.8          78.1       9.0X
-SQL ORC MR                                         2885           2904          27          3.6         275.1       2.6X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                            7656           7673          24          1.4         730.1       1.0X
+SQL Json                                           8974           8995          30          1.2         855.8       0.9X
+SQL Parquet Vectorized: DataPageV1                 1114           1143          41          9.4         106.2       6.9X
+SQL Parquet Vectorized: DataPageV2                 1477           1501          34          7.1         140.8       5.2X
+SQL Parquet MR: DataPageV1                         3613           3614           1          2.9         344.6       2.1X
+SQL Parquet MR: DataPageV2                         3877           3877           0          2.7         369.7       2.0X
+ParquetReader Vectorized: DataPageV1                765            773          12         13.7          72.9      10.0X
+ParquetReader Vectorized: DataPageV2               1109           1130          30          9.5         105.8       6.9X
+SQL ORC Vectorized                                  841            851          18         12.5          80.2       9.1X
+SQL ORC MR                                         2849           2862          19          3.7         271.7       2.7X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5899           5900           2          1.8         562.6       1.0X
-SQL Json                                           7189           7199          14          1.5         685.6       0.8X
-SQL Parquet Vectorized: DataPageV1                  737            756          22         14.2          70.3       8.0X
-SQL Parquet Vectorized: DataPageV2                 1004           1035          45         10.4          95.7       5.9X
-SQL Parquet MR: DataPageV1                         2744           2752          12          3.8         261.6       2.2X
-SQL Parquet MR: DataPageV2                         2917           2923           8          3.6         278.2       2.0X
-ParquetReader Vectorized: DataPageV1                719            734          19         14.6          68.6       8.2X
-ParquetReader Vectorized: DataPageV2                950            957          12         11.0          90.6       6.2X
-SQL ORC Vectorized                                  986           1002          22         10.6          94.1       6.0X
-SQL ORC MR                                         2840           2866          36          3.7         270.9       2.1X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                            5670           5681          16          1.8         540.7       1.0X
+SQL Json                                           7363           7363           1          1.4         702.2       0.8X
+SQL Parquet Vectorized: DataPageV1                  747            770          35         14.0          71.2       7.6X
+SQL Parquet Vectorized: DataPageV2                  981           1019          53         10.7          93.6       5.8X
+SQL Parquet MR: DataPageV1                         2684           2693          13          3.9         256.0       2.1X
+SQL Parquet MR: DataPageV2                         2820           2830          14          3.7         269.0       2.0X
+ParquetReader Vectorized: DataPageV1                697            706          11         15.1          66.4       8.1X
+ParquetReader Vectorized: DataPageV2                920            935          20         11.4          87.8       6.2X
+SQL ORC Vectorized                                  976           1000          35         10.7          93.1       5.8X
+SQL ORC MR                                         2670           2690          28          3.9         254.6       2.1X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            3951           3956           7          2.7         376.8       1.0X
-SQL Json                                           4888           4888           1          2.1         466.1       0.8X
-SQL Parquet Vectorized: DataPageV1                  173            193          11         60.5          16.5      22.8X
-SQL Parquet Vectorized: DataPageV2                  194            199           3         54.0          18.5      20.3X
-SQL Parquet MR: DataPageV1                         1666           1672           8          6.3         158.9       2.4X
-SQL Parquet MR: DataPageV2                         1626           1633          10          6.5         155.0       2.4X
-ParquetReader Vectorized: DataPageV1                174            178           5         60.2          16.6      22.7X
-ParquetReader Vectorized: DataPageV2                201            203           2         52.1          19.2      19.6X
-SQL ORC Vectorized                                  328            331           4         32.0          31.2      12.1X
-SQL ORC MR                                         1633           1636           3          6.4         155.8       2.4X
+SQL CSV                                            4208           4236          40          2.5         401.3       1.0X
+SQL Json                                           5288           5295          11          2.0         504.3       0.8X
+SQL Parquet Vectorized: DataPageV1                  165            174           6         63.7          15.7      25.6X
+SQL Parquet Vectorized: DataPageV2                  194            198           5         54.1          18.5      21.7X
+SQL Parquet MR: DataPageV1                         1693           1697           5          6.2         161.5       2.5X
+SQL Parquet MR: DataPageV2                         1668           1686          25          6.3         159.0       2.5X
+ParquetReader Vectorized: DataPageV1                155            157           2         67.6          14.8      27.1X
+ParquetReader Vectorized: DataPageV2                184            186           2         56.9          17.6      22.8X
+SQL ORC Vectorized                                  327            340          17         32.1          31.2      12.9X
+SQL ORC MR                                         1521           1538          23          6.9         145.1       2.8X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            1259           1261           2          0.8        1201.0       1.0X
-SQL Json                                           1688           1695           9          0.6        1610.1       0.7X
-SQL Parquet Vectorized: DataPageV1                   24             29           6         43.9          22.8      52.7X
-SQL Parquet Vectorized: DataPageV2                   32             36           6         32.8          30.5      39.4X
-SQL Parquet MR: DataPageV1                          169            176           6          6.2         161.2       7.5X
-SQL Parquet MR: DataPageV2                          157            165           7          6.7         149.6       8.0X
-SQL ORC Vectorized                                   29             35           6         36.2          27.6      43.5X
-SQL ORC MR                                          132            140           6          7.9         126.2       9.5X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                            1322           1325           4          0.8        1261.2       1.0X
+SQL Json                                           1758           1767          13          0.6        1676.5       0.8X
+SQL Parquet Vectorized: DataPageV1                   24             28           6         44.0          22.7      55.5X
+SQL Parquet Vectorized: DataPageV2                   33             36           6         32.2          31.1      40.6X
+SQL Parquet MR: DataPageV1                          154            160           5          6.8         147.1       8.6X
+SQL Parquet MR: DataPageV2                          163            166           2          6.4         155.6       8.1X
+SQL ORC Vectorized                                   28             33           6         37.8          26.5      47.6X
+SQL ORC MR                                          134            141           5          7.8         127.7       9.9X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            2656           2659           4          0.4        2533.4       1.0X
-SQL Json                                           6186           6199          19          0.2        5899.5       0.4X
-SQL Parquet Vectorized: DataPageV1                   27             33           7         39.1          25.6      99.1X
-SQL Parquet Vectorized: DataPageV2                   35             40           6         30.3          33.0      76.9X
-SQL Parquet MR: DataPageV1                          170            176           6          6.2         162.4      15.6X
-SQL Parquet MR: DataPageV2                          163            173          10          6.5         155.0      16.3X
-SQL ORC Vectorized                                   33             38           6         32.3          31.0      81.7X
-SQL ORC MR                                          137            145           8          7.7         130.4      19.4X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                            2634           2639           7          0.4        2511.9       1.0X
+SQL Json                                           5624           5655          44          0.2        5363.7       0.5X
+SQL Parquet Vectorized: DataPageV1                   27             33           7         39.4          25.4      99.0X
+SQL Parquet Vectorized: DataPageV2                   34             41           7         30.5          32.8      76.6X
+SQL Parquet MR: DataPageV1                          158            167           6          6.6         150.9      16.6X
+SQL Parquet MR: DataPageV2                          153            159           6          6.9         145.9      17.2X
+SQL ORC Vectorized                                   31             35           6         34.0          29.4      85.3X
+SQL ORC MR                                          131            137           5          8.0         124.6      20.2X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            4422           4439          25          0.2        4217.1       1.0X
-SQL Json                                          11222          11248          37          0.1       10702.2       0.4X
-SQL Parquet Vectorized: DataPageV1                   35             41           6         30.2          33.1     127.5X
-SQL Parquet Vectorized: DataPageV2                   42             46           6         25.0          40.0     105.5X
-SQL Parquet MR: DataPageV1                          182            191           8          5.8         173.8      24.3X
-SQL Parquet MR: DataPageV2                          182            185           2          5.8         173.6      24.3X
-SQL ORC Vectorized                                   39             44           5         27.0          37.0     114.0X
-SQL ORC MR                                          148            159           6          7.1         141.2      29.9X
+SQL CSV                                            4252           4309          81          0.2        4054.8       1.0X
+SQL Json                                          10496          10648         215          0.1       10009.3       0.4X
+SQL Parquet Vectorized: DataPageV1                   35             48          10         30.2          33.1     122.4X
+SQL Parquet Vectorized: DataPageV2                   42             46           6         25.2          39.6     102.4X
+SQL Parquet MR: DataPageV1                          167            177           7          6.3         159.5      25.4X
+SQL Parquet MR: DataPageV2                          177            182           6          5.9         168.4      24.1X
+SQL ORC Vectorized                                   38             44           7         27.9          35.9     113.1X
+SQL ORC MR                                          138            146           8          7.6         131.3      30.9X
 
 
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
index d2180ecb771d5..948694d17066c 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
@@ -2,437 +2,437 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BOOLEAN Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10580          10590          15          1.5         672.6       1.0X
-SQL Json                                           8244           8399         219          1.9         524.2       1.3X
-SQL Json with UnsafeRow                            9338           9354          22          1.7         593.7       1.1X
-SQL Parquet Vectorized: DataPageV1                  103            117           7        152.2           6.6     102.4X
-SQL Parquet Vectorized: DataPageV2                  105            116           8        149.7           6.7     100.7X
-SQL Parquet MR: DataPageV1                         1871           1932          87          8.4         118.9       5.7X
-SQL Parquet MR: DataPageV2                         1762           1767           8          8.9         112.0       6.0X
-SQL ORC Vectorized                                  142            151           6        110.8           9.0      74.5X
-SQL ORC MR                                         1697           1702           7          9.3         107.9       6.2X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           11082          11121          55          1.4         704.6       1.0X
+SQL Json                                           8235           8413         252          1.9         523.6       1.3X
+SQL Json with UnsafeRow                            9534           9547          17          1.6         606.2       1.2X
+SQL Parquet Vectorized: DataPageV1                   99            114           9        158.3           6.3     111.6X
+SQL Parquet Vectorized: DataPageV2                   99            106           5        158.1           6.3     111.4X
+SQL Parquet MR: DataPageV1                         1781           1787           9          8.8         113.2       6.2X
+SQL Parquet MR: DataPageV2                         1685           1760         106          9.3         107.1       6.6X
+SQL ORC Vectorized                                  139            145           4        112.9           8.9      79.5X
+SQL ORC MR                                         1447           1449           3         10.9          92.0       7.7X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single BOOLEAN Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                    94             96           3        167.8           6.0       1.0X
-ParquetReader Vectorized: DataPageV2                   101            103           1        155.0           6.4       0.9X
-ParquetReader Vectorized -> Row: DataPageV1             74             76           2        211.8           4.7       1.3X
-ParquetReader Vectorized -> Row: DataPageV2             83             84           2        190.4           5.3       1.1X
+ParquetReader Vectorized: DataPageV1                    88             90           1        178.8           5.6       1.0X
+ParquetReader Vectorized: DataPageV2                    95             96           1        165.3           6.0       0.9X
+ParquetReader Vectorized -> Row: DataPageV1             73             74           1        214.9           4.7       1.2X
+ParquetReader Vectorized -> Row: DataPageV2             81             82           1        193.4           5.2       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11731          11783          74          1.3         745.8       1.0X
-SQL Json                                           9315           9364          69          1.7         592.2       1.3X
-SQL Json with UnsafeRow                           10241          10246           7          1.5         651.1       1.1X
-SQL Parquet Vectorized: DataPageV1                  113            120           7        138.9           7.2     103.6X
-SQL Parquet Vectorized: DataPageV2                  111            118           6        142.1           7.0     106.0X
-SQL Parquet MR: DataPageV1                         1992           2010          26          7.9         126.6       5.9X
-SQL Parquet MR: DataPageV2                         1918           1939          29          8.2         122.0       6.1X
-SQL ORC Vectorized                                  112            120           6        139.9           7.1     104.4X
-SQL ORC MR                                         1643           1647           5          9.6         104.5       7.1X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           10768          10815          67          1.5         684.6       1.0X
+SQL Json                                           9495           9518          33          1.7         603.7       1.1X
+SQL Json with UnsafeRow                           10257          10262           7          1.5         652.1       1.0X
+SQL Parquet Vectorized: DataPageV1                   91            100          10        173.0           5.8     118.4X
+SQL Parquet Vectorized: DataPageV2                   90             99          10        175.0           5.7     119.8X
+SQL Parquet MR: DataPageV1                         1839           1839           0          8.6         116.9       5.9X
+SQL Parquet MR: DataPageV2                         1807           1816          13          8.7         114.9       6.0X
+SQL ORC Vectorized                                  114            118           3        138.1           7.2      94.5X
+SQL ORC MR                                         1485           1485           0         10.6          94.4       7.3X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single TINYINT Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                    83             85           2        190.1           5.3       1.0X
-ParquetReader Vectorized: DataPageV2                    83             84           2        189.8           5.3       1.0X
-ParquetReader Vectorized -> Row: DataPageV1             62             63           1        254.3           3.9       1.3X
-ParquetReader Vectorized -> Row: DataPageV2             62             64           2        253.5           3.9       1.3X
+ParquetReader Vectorized: DataPageV1                    68             69           1        232.8           4.3       1.0X
+ParquetReader Vectorized: DataPageV2                    68             70           2        232.1           4.3       1.0X
+ParquetReader Vectorized -> Row: DataPageV1             46             48           2        338.4           3.0       1.5X
+ParquetReader Vectorized -> Row: DataPageV2             46             48           2        340.2           2.9       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           12442          12467          35          1.3         791.1       1.0X
-SQL Json                                           9536           9578          58          1.6         606.3       1.3X
-SQL Json with UnsafeRow                           10484          10484           1          1.5         666.5       1.2X
-SQL Parquet Vectorized: DataPageV1                  110            115           3        142.4           7.0     112.6X
-SQL Parquet Vectorized: DataPageV2                  139            144           5        112.9           8.9      89.3X
-SQL Parquet MR: DataPageV1                         2082           2122          57          7.6         132.4       6.0X
-SQL Parquet MR: DataPageV2                         2050           2071          30          7.7         130.3       6.1X
-SQL ORC Vectorized                                  143            148           4        110.2           9.1      87.2X
-SQL ORC MR                                         1722           1723           1          9.1         109.5       7.2X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           11107          11122          21          1.4         706.2       1.0X
+SQL Json                                           9895           9916          30          1.6         629.1       1.1X
+SQL Json with UnsafeRow                           10606          10615          13          1.5         674.3       1.0X
+SQL Parquet Vectorized: DataPageV1                  100            107           6        157.6           6.3     111.3X
+SQL Parquet Vectorized: DataPageV2                  129            135           6        122.2           8.2      86.3X
+SQL Parquet MR: DataPageV1                         1978           1980           3          8.0         125.8       5.6X
+SQL Parquet MR: DataPageV2                         1877           1894          24          8.4         119.3       5.9X
+SQL ORC Vectorized                                  138            143           4        113.9           8.8      80.5X
+SQL ORC MR                                         1570           1572           2         10.0          99.8       7.1X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   140            144           3        112.3           8.9       1.0X
-ParquetReader Vectorized: DataPageV2                   168            170           2         93.8          10.7       0.8X
-ParquetReader Vectorized -> Row: DataPageV1            138            140           3        114.1           8.8       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            166            167           2         95.0          10.5       0.8X
+ParquetReader Vectorized: DataPageV1                   144            145           2        109.2           9.2       1.0X
+ParquetReader Vectorized: DataPageV2                   172            174           2         91.4          10.9       0.8X
+ParquetReader Vectorized -> Row: DataPageV1            136            138           2        115.4           8.7       1.1X
+ParquetReader Vectorized -> Row: DataPageV2            168            170           3         93.7          10.7       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           13427          13451          33          1.2         853.7       1.0X
-SQL Json                                          10000          10014          20          1.6         635.8       1.3X
-SQL Json with UnsafeRow                           10816          10829          18          1.5         687.7       1.2X
-SQL Parquet Vectorized: DataPageV1                  121            126           3        130.5           7.7     111.4X
-SQL Parquet Vectorized: DataPageV2                  197            203          12         79.7          12.5      68.0X
-SQL Parquet MR: DataPageV1                         2149           2246         137          7.3         136.7       6.2X
-SQL Parquet MR: DataPageV2                         2058           2072          19          7.6         130.9       6.5X
-SQL ORC Vectorized                                  159            165           6         98.8          10.1      84.3X
-SQL ORC MR                                         1868           1869           1          8.4         118.8       7.2X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           12198          12209          16          1.3         775.5       1.0X
+SQL Json                                          10381          10390          13          1.5         660.0       1.2X
+SQL Json with UnsafeRow                           11101          11149          69          1.4         705.8       1.1X
+SQL Parquet Vectorized: DataPageV1                  106            109           3        147.9           6.8     114.7X
+SQL Parquet Vectorized: DataPageV2                  181            186           8         87.1          11.5      67.5X
+SQL Parquet MR: DataPageV1                         2004           2004           0          7.9         127.4       6.1X
+SQL Parquet MR: DataPageV2                         1962           1976          20          8.0         124.7       6.2X
+SQL ORC Vectorized                                  146            149           3        107.9           9.3      83.7X
+SQL ORC MR                                         1583           1585           3          9.9         100.7       7.7X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single INT Column Scan:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   142            144           1        110.5           9.0       1.0X
-ParquetReader Vectorized: DataPageV2                   215            219           7         73.0          13.7       0.7X
-ParquetReader Vectorized -> Row: DataPageV1            141            142           1        111.9           8.9       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            212            213           1         74.2          13.5       0.7X
+ParquetReader Vectorized: DataPageV1                   146            147           1        107.8           9.3       1.0X
+ParquetReader Vectorized: DataPageV2                   216            217           1         73.0          13.7       0.7X
+ParquetReader Vectorized -> Row: DataPageV1            139            142           6        113.1           8.8       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            211            214           4         74.4          13.4       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           13182          13188           8          1.2         838.1       1.0X
-SQL Json                                          10134          10141          10          1.6         644.3       1.3X
-SQL Json with UnsafeRow                           10915          10920           7          1.4         693.9       1.2X
-SQL Parquet Vectorized: DataPageV1                  281            285           4         55.9          17.9      46.8X
-SQL Parquet Vectorized: DataPageV2                  176            181           4         89.2          11.2      74.8X
-SQL Parquet MR: DataPageV1                         2659           2694          49          5.9         169.1       5.0X
-SQL Parquet MR: DataPageV2                         2191           2194           5          7.2         139.3       6.0X
-SQL ORC Vectorized                                  144            151           4        109.2           9.2      91.5X
-SQL ORC MR                                         1814           1887         103          8.7         115.4       7.3X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           12198          12205          10          1.3         775.5       1.0X
+SQL Json                                          10391          10400          13          1.5         660.6       1.2X
+SQL Json with UnsafeRow                           11102          11110          12          1.4         705.8       1.1X
+SQL Parquet Vectorized: DataPageV1                  280            284           3         56.3          17.8      43.6X
+SQL Parquet Vectorized: DataPageV2                  175            179           4         90.0          11.1      69.8X
+SQL Parquet MR: DataPageV1                         2379           2432          75          6.6         151.2       5.1X
+SQL Parquet MR: DataPageV2                         1910           1917          11          8.2         121.4       6.4X
+SQL ORC Vectorized                                  127            132           5        124.2           8.1      96.3X
+SQL ORC MR                                         1701           1717          23          9.2         108.1       7.2X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single BIGINT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   316            317           1         49.8          20.1       1.0X
-ParquetReader Vectorized: DataPageV2                   214            218           7         73.5          13.6       1.5X
-ParquetReader Vectorized -> Row: DataPageV1            338            343           8         46.6          21.5       0.9X
-ParquetReader Vectorized -> Row: DataPageV2            234            236           2         67.1          14.9       1.3X
+ParquetReader Vectorized: DataPageV1                   335            337           2         47.0          21.3       1.0X
+ParquetReader Vectorized: DataPageV2                   217            231           9         72.4          13.8       1.5X
+ParquetReader Vectorized -> Row: DataPageV1            347            353           6         45.4          22.0       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            243            248           4         64.6          15.5       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           13724          13734          14          1.1         872.5       1.0X
-SQL Json                                          11883          11914          45          1.3         755.5       1.2X
-SQL Json with UnsafeRow                           12737          12740           4          1.2         809.8       1.1X
-SQL Parquet Vectorized: DataPageV1                   86             97          10        183.4           5.5     160.0X
-SQL Parquet Vectorized: DataPageV2                   94            107           8        168.1           5.9     146.7X
-SQL Parquet MR: DataPageV1                         2291           2295           6          6.9         145.7       6.0X
-SQL Parquet MR: DataPageV2                         2156           2157           2          7.3         137.1       6.4X
-SQL ORC Vectorized                                  258            270          11         60.9          16.4      53.1X
-SQL ORC MR                                         1903           1908           7          8.3         121.0       7.2X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           12624          12635          16          1.2         802.6       1.0X
+SQL Json                                          11911          11924          19          1.3         757.3       1.1X
+SQL Json with UnsafeRow                           12643          12657          21          1.2         803.8       1.0X
+SQL Parquet Vectorized: DataPageV1                   90             98           8        175.2           5.7     140.6X
+SQL Parquet Vectorized: DataPageV2                   90            103          10        174.5           5.7     140.0X
+SQL Parquet MR: DataPageV1                         2018           2022           5          7.8         128.3       6.3X
+SQL Parquet MR: DataPageV2                         1947           1965          25          8.1         123.8       6.5X
+SQL ORC Vectorized                                  251            268          16         62.6          16.0      50.2X
+SQL ORC MR                                         1729           1732           4          9.1         109.9       7.3X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single FLOAT Column Scan:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   141            141           0        111.9           8.9       1.0X
-ParquetReader Vectorized: DataPageV2                   152            155           4        103.2           9.7       0.9X
-ParquetReader Vectorized -> Row: DataPageV1            152            157           4        103.2           9.7       0.9X
-ParquetReader Vectorized -> Row: DataPageV2            152            156           6        103.6           9.7       0.9X
+ParquetReader Vectorized: DataPageV1                   139            142           2        113.0           8.8       1.0X
+ParquetReader Vectorized: DataPageV2                   156            158           4        101.0           9.9       0.9X
+ParquetReader Vectorized -> Row: DataPageV1            151            153           3        104.3           9.6       0.9X
+ParquetReader Vectorized -> Row: DataPageV2            150            153           4        104.5           9.6       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           13926          13932           8          1.1         885.4       1.0X
-SQL Json                                          12135          12148          19          1.3         771.5       1.1X
-SQL Json with UnsafeRow                           12983          13003          29          1.2         825.4       1.1X
-SQL Parquet Vectorized: DataPageV1                  292            298           7         53.9          18.5      47.7X
-SQL Parquet Vectorized: DataPageV2                  292            297           4         53.9          18.5      47.7X
-SQL Parquet MR: DataPageV1                         2769           2775           9          5.7         176.1       5.0X
-SQL Parquet MR: DataPageV2                         2619           2623           6          6.0         166.5       5.3X
-SQL ORC Vectorized                                  632            649          18         24.9          40.2      22.0X
-SQL ORC MR                                         2386           2405          27          6.6         151.7       5.8X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                           12881          13006         176          1.2         819.0       1.0X
+SQL Json                                          12083          12109          37          1.3         768.2       1.1X
+SQL Json with UnsafeRow                           12697          12731          49          1.2         807.2       1.0X
+SQL Parquet Vectorized: DataPageV1                  281            286           9         56.0          17.9      45.8X
+SQL Parquet Vectorized: DataPageV2                  280            287           5         56.2          17.8      46.0X
+SQL Parquet MR: DataPageV1                         2442           2490          67          6.4         155.3       5.3X
+SQL Parquet MR: DataPageV2                         2356           2370          19          6.7         149.8       5.5X
+SQL ORC Vectorized                                  639            643           3         24.6          40.7      20.1X
+SQL ORC MR                                         2155           2161           7          7.3         137.0       6.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single DOUBLE Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   357            360           2         44.0          22.7       1.0X
-ParquetReader Vectorized: DataPageV2                   356            359           2         44.1          22.7       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            365            371           7         43.1          23.2       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            367            370           4         42.9          23.3       1.0X
+ParquetReader Vectorized: DataPageV1                   334            335           2         47.1          21.2       1.0X
+ParquetReader Vectorized: DataPageV2                   335            339           3         46.9          21.3       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            346            353           7         45.4          22.0       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            346            351           4         45.4          22.0       1.0X
 
 
 ================================================================================================
 SQL Single Numeric Column Scan in Struct
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan in Struct:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2238           2269          44          7.0         142.3       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2290           2319          42          6.9         145.6       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             129            144          34        121.9           8.2      17.3X
-SQL Parquet MR: DataPageV1                                            2487           2501          20          6.3         158.1       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3250           3274          35          4.8         206.6       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             112            118           7        140.2           7.1      19.9X
-SQL Parquet MR: DataPageV2                                            2368           2393          35          6.6         150.5       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3091           3118          37          5.1         196.5       0.7X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             113            118           7        139.2           7.2      19.8X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL ORC MR                                                            2062           2069          10          7.6         131.1       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2068           2085          24          7.6         131.5       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             119            132          28        132.3           7.6      17.3X
+SQL Parquet MR: DataPageV1                                            2402           2421          27          6.5         152.7       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2910           2913           5          5.4         185.0       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             102            108           5        153.9           6.5      20.2X
+SQL Parquet MR: DataPageV2                                            2340           2361          29          6.7         148.8       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2765           2774          12          5.7         175.8       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             100            104           4        157.4           6.4      20.6X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan in Struct:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2156           2195          55          7.3         137.0       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2174           2191          24          7.2         138.2       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             259            264           4         60.6          16.5       8.3X
-SQL Parquet MR: DataPageV1                                            2617           2631          20          6.0         166.4       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3209           3215           8          4.9         204.0       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             171            182          14         92.0          10.9      12.6X
-SQL Parquet MR: DataPageV2                                            2463           2498          50          6.4         156.6       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3018           3023           6          5.2         191.9       0.7X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             287            303          14         54.8          18.2       7.5X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL ORC MR                                                            2092           2099          10          7.5         133.0       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2093           2110          24          7.5         133.1       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             280            286           7         56.2          17.8       7.5X
+SQL Parquet MR: DataPageV1                                            2341           2354          18          6.7         148.9       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2922           2926           6          5.4         185.8       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             144            150           4        109.3           9.1      14.5X
+SQL Parquet MR: DataPageV2                                            2276           2287          16          6.9         144.7       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2828           2831           4          5.6         179.8       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             266            283          18         59.2          16.9       7.9X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan in Struct:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2301           2367          94          6.8         146.3       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2417           2421           6          6.5         153.7       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             282            288           4         55.7          17.9       8.2X
-SQL Parquet MR: DataPageV1                                            2681           2694          18          5.9         170.5       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3187           3213          36          4.9         202.6       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             152            159           6        103.8           9.6      15.2X
-SQL Parquet MR: DataPageV2                                            2636           2650          20          6.0         167.6       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3077           3089          17          5.1         195.6       0.7X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             287            304          31         54.7          18.3       8.0X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL ORC MR                                                            2149           2172          33          7.3         136.6       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2074           2107          47          7.6         131.8       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             274            282           8         57.4          17.4       7.8X
+SQL Parquet MR: DataPageV1                                            2363           2370          10          6.7         150.2       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2863           2898          49          5.5         182.0       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             120            134           7        130.8           7.6      17.9X
+SQL Parquet MR: DataPageV2                                            2301           2318          23          6.8         146.3       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2729           2763          48          5.8         173.5       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             267            274           6         58.9          17.0       8.1X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2419           2419           1          6.5         153.8       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2342           2392          71          6.7         148.9       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             285            291           4         55.2          18.1       8.5X
-SQL Parquet MR: DataPageV1                                            2915           2931          23          5.4         185.4       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3405           3418          19          4.6         216.5       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             321            346          15         49.0          20.4       7.5X
-SQL Parquet MR: DataPageV2                                            2554           2570          24          6.2         162.4       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2943           2954          15          5.3         187.1       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             210            215           3         74.9          13.4      11.5X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL ORC MR                                                            2115           2121           8          7.4         134.5       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2103           2130          37          7.5         133.7       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             270            305          76         58.2          17.2       7.8X
+SQL Parquet MR: DataPageV1                                            2791           2796           8          5.6         177.4       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3190           3211          29          4.9         202.8       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             307            315           9         51.2          19.5       6.9X
+SQL Parquet MR: DataPageV2                                            2447           2458          15          6.4         155.6       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2786           2804          26          5.6         177.1       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             201            208          10         78.4          12.8      10.5X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan in Struct:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2457           2629         243          6.4         156.2       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2469           2481          17          6.4         157.0       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             354            368          10         44.4          22.5       6.9X
-SQL Parquet MR: DataPageV1                                            2592           2592           1          6.1         164.8       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3017           3022           7          5.2         191.8       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             104            123          14        151.0           6.6      23.6X
-SQL Parquet MR: DataPageV2                                            2511           2554          61          6.3         159.6       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2932           2964          44          5.4         186.4       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             102            106           7        154.9           6.5      24.2X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL ORC MR                                                            2200           2240          56          7.1         139.9       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2211           2231          28          7.1         140.5       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             356            376          18         44.2          22.6       6.2X
+SQL Parquet MR: DataPageV1                                            2249           2280          43          7.0         143.0       1.0X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2676           2677           2          5.9         170.1       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)              93            121          14        169.6           5.9      23.7X
+SQL Parquet MR: DataPageV2                                            2244           2258          19          7.0         142.7       1.0X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2605           2631          37          6.0         165.6       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             104            121          15        151.7           6.6      21.2X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2921           2933          17          5.4         185.7       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2929           2950          30          5.4         186.2       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             790            793           4         19.9          50.2       3.7X
-SQL Parquet MR: DataPageV1                                            2944           2952          12          5.3         187.2       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3441           3485          62          4.6         218.8       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             300            313          12         52.5          19.0       9.8X
-SQL Parquet MR: DataPageV2                                            2922           2972          71          5.4         185.8       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3389           3393           7          4.6         215.4       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             299            306           7         52.6          19.0       9.8X
+SQL ORC MR                                                            2579           2580           1          6.1         164.0       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2595           2624          40          6.1         165.0       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             748            767          23         21.0          47.5       3.4X
+SQL Parquet MR: DataPageV1                                            2668           2686          26          5.9         169.6       1.0X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3358           3363           7          4.7         213.5       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             291            302          12         54.0          18.5       8.9X
+SQL Parquet MR: DataPageV2                                            2652           2655           4          5.9         168.6       1.0X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3264           3284          28          4.8         207.5       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             292            299           7         53.9          18.5       8.8X
 
 
 ================================================================================================
 SQL Nested Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Nested Column Scan:                                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                           14325          14522         154          0.1       13661.0       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                          14107          14392         251          0.1       13453.2       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                            7445           7470          16          0.1        7099.8       1.9X
-SQL Parquet MR: DataPageV1                                            8992           9032          32          0.1        8575.8       1.6X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           9615           9741          77          0.1        9169.2       1.5X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            6242           6391          95          0.2        5952.4       2.3X
-SQL Parquet MR: DataPageV2                                           10019          10415         264          0.1        9555.2       1.4X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)          10273          10371         146          0.1        9796.8       1.4X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            5730           5779          33          0.2        5464.9       2.5X
+SQL ORC MR                                                           12979          13152         162          0.1       12377.6       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                          12920          12989          53          0.1       12321.6       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                            7225           7249          17          0.1        6890.2       1.8X
+SQL Parquet MR: DataPageV1                                            8620           8655          24          0.1        8221.1       1.5X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           8972           8983           7          0.1        8556.5       1.4X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            5756           5799          31          0.2        5489.8       2.3X
+SQL Parquet MR: DataPageV2                                            9485           9514          18          0.1        9045.5       1.4X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           9765           9805          19          0.1        9312.8       1.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            5567           5600          19          0.2        5309.3       2.3X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           12003          12156         217          0.9        1144.7       1.0X
-SQL Json                                          10706          10711           7          1.0        1021.0       1.1X
-SQL Parquet Vectorized: DataPageV1                 1800           1807          10          5.8         171.6       6.7X
-SQL Parquet Vectorized: DataPageV2                 1923           1930          10          5.5         183.4       6.2X
-SQL Parquet MR: DataPageV1                         4008           4018          14          2.6         382.2       3.0X
-SQL Parquet MR: DataPageV2                         4075           4082          10          2.6         388.7       2.9X
-SQL ORC Vectorized                                 1903           1925          30          5.5         181.5       6.3X
-SQL ORC MR                                         3934           3949          21          2.7         375.2       3.1X
+SQL CSV                                           11208          11255          67          0.9        1068.9       1.0X
+SQL Json                                          10457          10487          41          1.0         997.3       1.1X
+SQL Parquet Vectorized: DataPageV1                 1820           1834          20          5.8         173.5       6.2X
+SQL Parquet Vectorized: DataPageV2                 1917           1918           1          5.5         182.8       5.8X
+SQL Parquet MR: DataPageV1                         3975           3976           1          2.6         379.1       2.8X
+SQL Parquet MR: DataPageV2                         3974           3994          28          2.6         379.0       2.8X
+SQL ORC Vectorized                                 1939           1944           7          5.4         184.9       5.8X
+SQL ORC MR                                         3490           3502          17          3.0         332.8       3.2X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            7254           7268          20          1.4         691.8       1.0X
-SQL Json                                           6959           6959           1          1.5         663.6       1.0X
-SQL Parquet Vectorized: DataPageV1                  477            482           6         22.0          45.5      15.2X
-SQL Parquet Vectorized: DataPageV2                  475            488          21         22.1          45.3      15.3X
-SQL Parquet MR: DataPageV1                         1778           1780           3          5.9         169.6       4.1X
-SQL Parquet MR: DataPageV2                         1723           1726           5          6.1         164.3       4.2X
-SQL ORC Vectorized                                  396            409          22         26.5          37.7      18.3X
-SQL ORC MR                                         1884           1905          30          5.6         179.6       3.9X
+SQL CSV                                            6355           6393          55          1.7         606.0       1.0X
+SQL Json                                           6798           6811          17          1.5         648.4       0.9X
+SQL Parquet Vectorized: DataPageV1                  517            522           4         20.3          49.3      12.3X
+SQL Parquet Vectorized: DataPageV2                  511            521          13         20.5          48.7      12.4X
+SQL Parquet MR: DataPageV1                         1725           1746          30          6.1         164.5       3.7X
+SQL Parquet MR: DataPageV2                         1631           1650          27          6.4         155.5       3.9X
+SQL ORC Vectorized                                  371            377           4         28.3          35.3      17.1X
+SQL ORC MR                                         1701           1713          17          6.2         162.2       3.7X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Data column - CSV                                          13466          13514          67          1.2         856.2       1.0X
-Data column - Json                                         10162          10191          42          1.5         646.1       1.3X
-Data column - Parquet Vectorized: DataPageV1                 119            134          10        132.4           7.6     113.3X
-Data column - Parquet Vectorized: DataPageV2                 294            302          13         53.6          18.7      45.9X
-Data column - Parquet MR: DataPageV1                        2489           2627         195          6.3         158.2       5.4X
-Data column - Parquet MR: DataPageV2                        2443           2466          33          6.4         155.3       5.5X
-Data column - ORC Vectorized                                 187            200          11         84.0          11.9      71.9X
-Data column - ORC MR                                        2306           2313           9          6.8         146.6       5.8X
-Partition column - CSV                                      3790           3809          27          4.2         241.0       3.6X
-Partition column - Json                                     8570           8579          12          1.8         544.9       1.6X
-Partition column - Parquet Vectorized: DataPageV1             35             38           3        444.9           2.2     380.9X
-Partition column - Parquet Vectorized: DataPageV2             35             38           3        452.0           2.2     387.0X
-Partition column - Parquet MR: DataPageV1                   1411           1422          15         11.1          89.7       9.5X
-Partition column - Parquet MR: DataPageV2                   1396           1435          54         11.3          88.8       9.6X
-Partition column - ORC Vectorized                             36             39           3        432.0           2.3     369.9X
-Partition column - ORC MR                                   1503           1514          16         10.5          95.6       9.0X
-Both columns - CSV                                         13408          13425          24          1.2         852.5       1.0X
-Both columns - Json                                        10284          10301          24          1.5         653.9       1.3X
-Both columns - Parquet Vectorized: DataPageV1                154            182          24        101.8           9.8      87.2X
-Both columns - Parquet Vectorized: DataPageV2                341            350          17         46.1          21.7      39.5X
-Both columns - Parquet MR: DataPageV1                       2465           2490          35          6.4         156.7       5.5X
-Both columns - Parquet MR: DataPageV2                       2450           2489          55          6.4         155.8       5.5X
-Both columns - ORC Vectorized                                220            245          19         71.4          14.0      61.1X
-Both columns - ORC MR                                       2333           2334           1          6.7         148.4       5.8X
+Data column - CSV                                          12083          12159         107          1.3         768.2       1.0X
+Data column - Json                                         10115          10122          10          1.6         643.1       1.2X
+Data column - Parquet Vectorized: DataPageV1                 102            107           4        154.0           6.5     118.3X
+Data column - Parquet Vectorized: DataPageV2                 237            242           4         66.3          15.1      50.9X
+Data column - Parquet MR: DataPageV1                        2228           2369         199          7.1         141.7       5.4X
+Data column - Parquet MR: DataPageV2                        2196           2201           7          7.2         139.6       5.5X
+Data column - ORC Vectorized                                 138            142           4        113.7           8.8      87.4X
+Data column - ORC MR                                        1925           1944          27          8.2         122.4       6.3X
+Partition column - CSV                                      3593           3619          37          4.4         228.4       3.4X
+Partition column - Json                                     8708           8717          13          1.8         553.6       1.4X
+Partition column - Parquet Vectorized: DataPageV1             29             32           4        549.4           1.8     422.0X
+Partition column - Parquet Vectorized: DataPageV2             28             32           4        554.1           1.8     425.7X
+Partition column - Parquet MR: DataPageV1                   1173           1183          14         13.4          74.6      10.3X
+Partition column - Parquet MR: DataPageV2                   1168           1176          11         13.5          74.3      10.3X
+Partition column - ORC Vectorized                             30             36           6        525.7           1.9     403.8X
+Partition column - ORC MR                                   1210           1211           1         13.0          76.9      10.0X
+Both columns - CSV                                         12007          12141         189          1.3         763.4       1.0X
+Both columns - Json                                        10312          10333          29          1.5         655.6       1.2X
+Both columns - Parquet Vectorized: DataPageV1                136            157          21        115.5           8.7      88.7X
+Both columns - Parquet Vectorized: DataPageV2                279            310          24         56.4          17.7      43.3X
+Both columns - Parquet MR: DataPageV1                       2345           2361          23          6.7         149.1       5.2X
+Both columns - Parquet MR: DataPageV2                       2257           2309          74          7.0         143.5       5.4X
+Both columns - ORC Vectorized                                183            211          19         85.8          11.6      65.9X
+Both columns - ORC MR                                       2075           2086          15          7.6         131.9       5.8X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            8487           8504          24          1.2         809.4       1.0X
-SQL Json                                           9230           9236           9          1.1         880.3       0.9X
-SQL Parquet Vectorized: DataPageV1                 1279           1294          20          8.2         122.0       6.6X
-SQL Parquet Vectorized: DataPageV2                 1327           1382          78          7.9         126.5       6.4X
-SQL Parquet MR: DataPageV1                         3655           3662          10          2.9         348.6       2.3X
-SQL Parquet MR: DataPageV2                         3708           3742          47          2.8         353.6       2.3X
-ParquetReader Vectorized: DataPageV1                837            838           1         12.5          79.8      10.1X
-ParquetReader Vectorized: DataPageV2                898            900           3         11.7          85.7       9.4X
-SQL ORC Vectorized                                  970           1025          77         10.8          92.5       8.7X
-SQL ORC MR                                         3092           3123          44          3.4         294.9       2.7X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                            7818           7868          70          1.3         745.6       1.0X
+SQL Json                                           9376           9390          20          1.1         894.1       0.8X
+SQL Parquet Vectorized: DataPageV1                 1240           1242           3          8.5         118.2       6.3X
+SQL Parquet Vectorized: DataPageV2                 1301           1302           2          8.1         124.1       6.0X
+SQL Parquet MR: DataPageV1                         3359           3365           9          3.1         320.3       2.3X
+SQL Parquet MR: DataPageV2                         3670           3674           6          2.9         350.0       2.1X
+ParquetReader Vectorized: DataPageV1                830            834           3         12.6          79.2       9.4X
+ParquetReader Vectorized: DataPageV2                896            898           2         11.7          85.4       8.7X
+SQL ORC Vectorized                                  867            887          32         12.1          82.7       9.0X
+SQL ORC MR                                         2905           2907           3          3.6         277.1       2.7X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            6254           6267          18          1.7         596.4       1.0X
-SQL Json                                           7852           7857           8          1.3         748.8       0.8X
-SQL Parquet Vectorized: DataPageV1                  889            907          17         11.8          84.8       7.0X
-SQL Parquet Vectorized: DataPageV2                  978            983           8         10.7          93.3       6.4X
-SQL Parquet MR: DataPageV1                         2939           2948          14          3.6         280.2       2.1X
-SQL Parquet MR: DataPageV2                         3175           3189          20          3.3         302.8       2.0X
-ParquetReader Vectorized: DataPageV1                756            761           7         13.9          72.1       8.3X
-ParquetReader Vectorized: DataPageV2                853            858           5         12.3          81.3       7.3X
-SQL ORC Vectorized                                 1024           1027           4         10.2          97.6       6.1X
-SQL ORC MR                                         2930           2933           4          3.6         279.4       2.1X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                            6074           6083          13          1.7         579.2       1.0X
+SQL Json                                           7930           7931           2          1.3         756.2       0.8X
+SQL Parquet Vectorized: DataPageV1                  862            888          22         12.2          82.2       7.0X
+SQL Parquet Vectorized: DataPageV2                  951            959           9         11.0          90.7       6.4X
+SQL Parquet MR: DataPageV1                         2636           2703          95          4.0         251.4       2.3X
+SQL Parquet MR: DataPageV2                         2697           2706          13          3.9         257.2       2.3X
+ParquetReader Vectorized: DataPageV1                758            765          10         13.8          72.3       8.0X
+ParquetReader Vectorized: DataPageV2                824            826           4         12.7          78.6       7.4X
+SQL ORC Vectorized                                  982            993          10         10.7          93.6       6.2X
+SQL ORC MR                                         2763           2774          15          3.8         263.5       2.2X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            4333           4340          10          2.4         413.3       1.0X
-SQL Json                                           5440           5448          11          1.9         518.8       0.8X
-SQL Parquet Vectorized: DataPageV1                  166            173          10         63.1          15.8      26.1X
-SQL Parquet Vectorized: DataPageV2                  184            187           3         56.9          17.6      23.5X
-SQL Parquet MR: DataPageV1                         1846           1854          11          5.7         176.0       2.3X
-SQL Parquet MR: DataPageV2                         1813           1815           2          5.8         172.9       2.4X
-ParquetReader Vectorized: DataPageV1                171            174           4         61.2          16.3      25.3X
-ParquetReader Vectorized: DataPageV2                190            191           1         55.2          18.1      22.8X
-SQL ORC Vectorized                                  308            310           1         34.0          29.4      14.1X
-SQL ORC MR                                         1700           1707          10          6.2         162.2       2.5X
+SQL CSV                                            4577           4595          25          2.3         436.5       1.0X
+SQL Json                                           5583           5595          16          1.9         532.5       0.8X
+SQL Parquet Vectorized: DataPageV1                  165            171           7         63.5          15.7      27.7X
+SQL Parquet Vectorized: DataPageV2                  179            182           2         58.7          17.0      25.6X
+SQL Parquet MR: DataPageV1                         1643           1652          13          6.4         156.7       2.8X
+SQL Parquet MR: DataPageV2                         1603           1604           1          6.5         152.9       2.9X
+ParquetReader Vectorized: DataPageV1                170            172           4         61.9          16.2      27.0X
+ParquetReader Vectorized: DataPageV2                184            185           1         56.9          17.6      24.8X
+SQL ORC Vectorized                                  317            322           4         33.1          30.2      14.4X
+SQL ORC MR                                         1585           1598          18          6.6         151.1       2.9X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            1183           1208          34          0.9        1128.7       1.0X
-SQL Json                                           1836           1837           2          0.6        1750.6       0.6X
-SQL Parquet Vectorized: DataPageV1                   28             31           3         37.5          26.6      42.4X
-SQL Parquet Vectorized: DataPageV2                   38             42           5         27.8          35.9      31.4X
-SQL Parquet MR: DataPageV1                          185            189           3          5.7         176.1       6.4X
-SQL Parquet MR: DataPageV2                          180            188          10          5.8         171.9       6.6X
-SQL ORC Vectorized                                   33             36           3         31.4          31.8      35.5X
-SQL ORC MR                                          167            175           5          6.3         159.1       7.1X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                            1232           1233           0          0.9        1175.3       1.0X
+SQL Json                                           1765           1775          14          0.6        1683.6       0.7X
+SQL Parquet Vectorized: DataPageV1                   24             27           4         43.1          23.2      50.7X
+SQL Parquet Vectorized: DataPageV2                   33             36           4         31.8          31.4      37.4X
+SQL Parquet MR: DataPageV1                          156            160           3          6.7         148.6       7.9X
+SQL Parquet MR: DataPageV2                          151            156           3          6.9         144.1       8.2X
+SQL ORC Vectorized                                   29             32           4         35.7          28.0      42.0X
+SQL ORC MR                                          124            129           4          8.5         118.1      10.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            2586           2589           4          0.4        2466.4       1.0X
-SQL Json                                           6706           6852         207          0.2        6395.3       0.4X
-SQL Parquet Vectorized: DataPageV1                   32             35           3         33.0          30.3      81.5X
-SQL Parquet Vectorized: DataPageV2                   42             47           6         25.0          40.0      61.7X
-SQL Parquet MR: DataPageV1                          187            193           4          5.6         178.3      13.8X
-SQL Parquet MR: DataPageV2                          181            186           5          5.8         172.2      14.3X
-SQL ORC Vectorized                                   38             41           3         27.3          36.7      67.2X
-SQL ORC MR                                          171            178          11          6.1         163.5      15.1X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+SQL CSV                                            2667           2671           6          0.4        2543.6       1.0X
+SQL Json                                           6256           6274          25          0.2        5966.2       0.4X
+SQL Parquet Vectorized: DataPageV1                   27             29           4         38.9          25.7      99.0X
+SQL Parquet Vectorized: DataPageV2                   36             39           4         29.3          34.1      74.5X
+SQL Parquet MR: DataPageV1                          160            166           5          6.5         152.9      16.6X
+SQL Parquet MR: DataPageV2                          155            160           4          6.8         147.9      17.2X
+SQL ORC Vectorized                                   33             38           6         32.0          31.3      81.3X
+SQL ORC MR                                          127            131           4          8.2         121.3      21.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            4290           4320          42          0.2        4091.7       1.0X
-SQL Json                                          12544          12642         139          0.1       11963.0       0.3X
-SQL Parquet Vectorized: DataPageV1                   41             45           5         25.7          38.9     105.2X
-SQL Parquet Vectorized: DataPageV2                   50             57           9         20.8          48.0      85.3X
-SQL Parquet MR: DataPageV1                          199            205           4          5.3         189.9      21.5X
-SQL Parquet MR: DataPageV2                          196            200           2          5.3         187.0      21.9X
-SQL ORC Vectorized                                   46             49           4         22.6          44.2      92.5X
-SQL ORC MR                                          181            185           3          5.8         172.7      23.7X
+SQL CSV                                            4437           4470          46          0.2        4231.7       1.0X
+SQL Json                                          11849          12082         329          0.1       11300.4       0.4X
+SQL Parquet Vectorized: DataPageV1                   34             38           5         30.8          32.4     130.5X
+SQL Parquet Vectorized: DataPageV2                   43             47           6         24.5          40.9     103.5X
+SQL Parquet MR: DataPageV1                          169            174           3          6.2         161.3      26.2X
+SQL Parquet MR: DataPageV2                          167            172           5          6.3         159.0      26.6X
+SQL ORC Vectorized                                   38             41           4         27.3          36.7     115.4X
+SQL ORC MR                                          134            138           3          7.8         127.5      33.2X
 
 
diff --git a/sql/core/benchmarks/DatasetBenchmark-jdk21-results.txt b/sql/core/benchmarks/DatasetBenchmark-jdk21-results.txt
index a98af93289208..80f7753f53541 100644
--- a/sql/core/benchmarks/DatasetBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DatasetBenchmark-jdk21-results.txt
@@ -2,45 +2,45 @@
 Dataset Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                6410           6514         147         15.6          64.1       1.0X
-DataFrame                                          1121           1133          17         89.2          11.2       5.7X
-Dataset                                            1691           1698          10         59.1          16.9       3.8X
+RDD                                                6419           6615         277         15.6          64.2       1.0X
+DataFrame                                          1215           1219           6         82.3          12.2       5.3X
+Dataset                                            1694           1698           7         59.0          16.9       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                7313           7329          23         13.7          73.1       1.0X
-DataFrame                                          2721           2764          60         36.7          27.2       2.7X
-Dataset                                            6563           6672         155         15.2          65.6       1.1X
+RDD                                                7768           7769           1         12.9          77.7       1.0X
+DataFrame                                          2860           2869          13         35.0          28.6       2.7X
+Dataset                                            7013           7025          17         14.3          70.1       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                3870           3894          35         25.8          38.7       1.0X
-DataFrame                                           723            733          11        138.3           7.2       5.4X
-Dataset                                            1534           1566          45         65.2          15.3       2.5X
+RDD                                                4387           4430          61         22.8          43.9       1.0X
+DataFrame                                           755            782          28        132.4           7.6       5.8X
+Dataset                                            1571           1581          14         63.7          15.7       2.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                1967           1996          41         50.8          19.7       1.0X
-DataFrame                                           116            126          10        864.5           1.2      17.0X
-Dataset                                            2234           2273          55         44.8          22.3       0.9X
+RDD                                                2090           2092           3         47.9          20.9       1.0X
+DataFrame                                           106            121           8        941.0           1.1      19.7X
+Dataset                                            2335           2438         146         42.8          23.4       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD sum                                            1470           1504          48         68.0          14.7       1.0X
-DataFrame sum                                        66             84          13       1506.4           0.7      22.1X
-Dataset sum using Aggregator                       1929           1944          21         51.8          19.3       0.8X
-Dataset complex Aggregator                         4979           5163         260         20.1          49.8       0.3X
+RDD sum                                            1419           1424           7         70.5          14.2       1.0X
+DataFrame sum                                        57             70          11       1765.6           0.6      25.1X
+Dataset sum using Aggregator                       1942           1973          44         51.5          19.4       0.7X
+Dataset complex Aggregator                         5348           5593         347         18.7          53.5       0.3X
 
 
diff --git a/sql/core/benchmarks/DatasetBenchmark-results.txt b/sql/core/benchmarks/DatasetBenchmark-results.txt
index 78e90cf783593..6cd94dd233f80 100644
--- a/sql/core/benchmarks/DatasetBenchmark-results.txt
+++ b/sql/core/benchmarks/DatasetBenchmark-results.txt
@@ -2,45 +2,45 @@
 Dataset Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                6908           6969          86         14.5          69.1       1.0X
-DataFrame                                          1286           1300          21         77.8          12.9       5.4X
-Dataset                                            1763           1778          21         56.7          17.6       3.9X
+RDD                                                5780           5868         124         17.3          57.8       1.0X
+DataFrame                                          1150           1157           9         86.9          11.5       5.0X
+Dataset                                            1584           1664         114         63.1          15.8       3.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                8250           8274          34         12.1          82.5       1.0X
-DataFrame                                          2867           2868           2         34.9          28.7       2.9X
-Dataset                                            6939           6971          45         14.4          69.4       1.2X
+RDD                                                6968           7114         205         14.4          69.7       1.0X
+DataFrame                                          2743           2753          15         36.5          27.4       2.5X
+Dataset                                            7436           7456          29         13.4          74.4       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                4265           4343         110         23.4          42.7       1.0X
-DataFrame                                           712            763          45        140.5           7.1       6.0X
-Dataset                                            1722           1732          14         58.1          17.2       2.5X
+RDD                                                4086           4184         139         24.5          40.9       1.0X
+DataFrame                                           663            692          48        150.7           6.6       6.2X
+Dataset                                            1515           1528          18         66.0          15.1       2.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                2250           2275          36         44.4          22.5       1.0X
-DataFrame                                           115            126          10        873.3           1.1      19.7X
-Dataset                                            2441           2459          25         41.0          24.4       0.9X
+RDD                                                2091           2111          29         47.8          20.9       1.0X
+DataFrame                                           112            127          13        892.0           1.1      18.7X
+Dataset                                            2456           2476          29         40.7          24.6       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD sum                                            1434           1444          14         69.7          14.3       1.0X
-DataFrame sum                                        67             80          10       1483.2           0.7      21.3X
-Dataset sum using Aggregator                       2083           2146          90         48.0          20.8       0.7X
-Dataset complex Aggregator                         5100           5116          23         19.6          51.0       0.3X
+RDD sum                                            1426           1474          68         70.1          14.3       1.0X
+DataFrame sum                                        68             79          11       1478.3           0.7      21.1X
+Dataset sum using Aggregator                       2117           2130          19         47.2          21.2       0.7X
+Dataset complex Aggregator                         5121           5237         165         19.5          51.2       0.3X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt b/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt
index c230aea8da606..e4b39687c4080 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                 1185           1217          45          8.4         118.5       1.0X
-date + interval(m, d)                              1166           1188          31          8.6         116.6       1.0X
-date + interval(m, d, ms)                          3784           3794          13          2.6         378.4       0.3X
-date - interval(m)                                 1098           1101           3          9.1         109.8       1.1X
-date - interval(m, d)                              1119           1128          13          8.9         111.9       1.1X
-date - interval(m, d, ms)                          3792           3799           9          2.6         379.2       0.3X
-timestamp + interval(m)                            1516           1522           8          6.6         151.6       0.8X
-timestamp + interval(m, d)                         1571           1573           3          6.4         157.1       0.8X
-timestamp + interval(m, d, ms)                     1716           1717           2          5.8         171.6       0.7X
-timestamp - interval(m)                            1503           1505           2          6.7         150.3       0.8X
-timestamp - interval(m, d)                         1557           1558           1          6.4         155.7       0.8X
-timestamp - interval(m, d, ms)                     1714           1716           3          5.8         171.4       0.7X
+date + interval(m)                                 1127           1142          21          8.9         112.7       1.0X
+date + interval(m, d)                              1098           1100           3          9.1         109.8       1.0X
+date + interval(m, d, ms)                          3853           3857           5          2.6         385.3       0.3X
+date - interval(m)                                 1084           1089           7          9.2         108.4       1.0X
+date - interval(m, d)                              1104           1106           4          9.1         110.4       1.0X
+date - interval(m, d, ms)                          3908           3946          54          2.6         390.8       0.3X
+timestamp + interval(m)                            1985           1988           4          5.0         198.5       0.6X
+timestamp + interval(m, d)                         2020           2026           9          5.0         202.0       0.6X
+timestamp + interval(m, d, ms)                     2106           2122          22          4.7         210.6       0.5X
+timestamp - interval(m)                            1935           1938           4          5.2         193.5       0.6X
+timestamp - interval(m, d)                         1973           1981          11          5.1         197.3       0.6X
+timestamp - interval(m, d, ms)                     2100           2110          14          4.8         210.0       0.5X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    200            202           3         49.9          20.0       1.0X
-cast to timestamp wholestage on                     220            231           9         45.5          22.0       0.9X
+cast to timestamp wholestage off                    199            202           4         50.3          19.9       1.0X
+cast to timestamp wholestage on                     219            223           3         45.7          21.9       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                    707            709           3         14.2          70.7       1.0X
-year of timestamp wholestage on                     718            721           4         13.9          71.8       1.0X
+year of timestamp wholestage off                    695            697           3         14.4          69.5       1.0X
+year of timestamp wholestage on                     705            713          10         14.2          70.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                 743            744           2         13.5          74.3       1.0X
-quarter of timestamp wholestage on                  747            754           9         13.4          74.7       1.0X
+quarter of timestamp wholestage off                 752            758          10         13.3          75.2       1.0X
+quarter of timestamp wholestage on                  790            795           4         12.7          79.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                   720            724           5         13.9          72.0       1.0X
-month of timestamp wholestage on                    729            731           2         13.7          72.9       1.0X
+month of timestamp wholestage off                   709            716           9         14.1          70.9       1.0X
+month of timestamp wholestage on                    722            727           5         13.8          72.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             1098           1098           0          9.1         109.8       1.0X
-weekofyear of timestamp wholestage on              1141           1151          17          8.8         114.1       1.0X
+weekofyear of timestamp wholestage off             1113           1113           1          9.0         111.3       1.0X
+weekofyear of timestamp wholestage on              1137           1151          22          8.8         113.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                     732            737           7         13.7          73.2       1.0X
-day of timestamp wholestage on                      756            760           3         13.2          75.6       1.0X
+day of timestamp wholestage off                     724            726           2         13.8          72.4       1.0X
+day of timestamp wholestage on                      766            779           9         13.1          76.6       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off               767            770           4         13.0          76.7       1.0X
-dayofyear of timestamp wholestage on                780            785           3         12.8          78.0       1.0X
+dayofyear of timestamp wholestage off               751            753           3         13.3          75.1       1.0X
+dayofyear of timestamp wholestage on                763            767           4         13.1          76.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off              755            763          11         13.2          75.5       1.0X
-dayofmonth of timestamp wholestage on               758            764           7         13.2          75.8       1.0X
+dayofmonth of timestamp wholestage off              742            744           3         13.5          74.2       1.0X
+dayofmonth of timestamp wholestage on               777            782           5         12.9          77.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off               886            889           5         11.3          88.6       1.0X
-dayofweek of timestamp wholestage on                933            943           9         10.7          93.3       0.9X
+dayofweek of timestamp wholestage off               885            887           2         11.3          88.5       1.0X
+dayofweek of timestamp wholestage on                974            976           1         10.3          97.4       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                 822            822           0         12.2          82.2       1.0X
-weekday of timestamp wholestage on                  839            845           9         11.9          83.9       1.0X
+weekday of timestamp wholestage off                 818            821           5         12.2          81.8       1.0X
+weekday of timestamp wholestage on                  901            911           9         11.1          90.1       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    549            558          13         18.2          54.9       1.0X
-hour of timestamp wholestage on                     564            567           2         17.7          56.4       1.0X
+hour of timestamp wholestage off                    545            547           2         18.4          54.5       1.0X
+hour of timestamp wholestage on                     617            626           9         16.2          61.7       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                  547            549           2         18.3          54.7       1.0X
-minute of timestamp wholestage on                   561            567           4         17.8          56.1       1.0X
+minute of timestamp wholestage off                  542            550          11         18.4          54.2       1.0X
+minute of timestamp wholestage on                   556            565          13         18.0          55.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  552            555           5         18.1          55.2       1.0X
-second of timestamp wholestage on                   561            564           3         17.8          56.1       1.0X
+second of timestamp wholestage off                  541            546           8         18.5          54.1       1.0X
+second of timestamp wholestage on                   560            566           4         17.8          56.0       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         179            179           0         55.9          17.9       1.0X
-current_date wholestage on                          224            228           6         44.7          22.4       0.8X
+current_date wholestage off                         176            178           3         56.7          17.6       1.0X
+current_date wholestage on                          216            223           8         46.3          21.6       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    176            181           6         56.7          17.6       1.0X
-current_timestamp wholestage on                     236            241           5         42.3          23.6       0.7X
+current_timestamp wholestage off                    182            190          12         55.1          18.2       1.0X
+current_timestamp wholestage on                     232            240           8         43.1          23.2       0.8X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                         639            641           2         15.6          63.9       1.0X
-cast to date wholestage on                          717            721           6         14.0          71.7       0.9X
+cast to date wholestage off                         638            643           7         15.7          63.8       1.0X
+cast to date wholestage on                          714            720           7         14.0          71.4       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                             794            797           3         12.6          79.4       1.0X
-last_day wholestage on                              817            821           4         12.2          81.7       1.0X
+last_day wholestage off                             773            777           6         12.9          77.3       1.0X
+last_day wholestage on                              809            811           1         12.4          80.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                             724            728           6         13.8          72.4       1.0X
-next_day wholestage on                              744            747           3         13.4          74.4       1.0X
+next_day wholestage off                             715            751          51         14.0          71.5       1.0X
+next_day wholestage on                              735            739           7         13.6          73.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                             676            679           4         14.8          67.6       1.0X
-date_add wholestage on                              700            704           3         14.3          70.0       1.0X
+date_add wholestage off                             669            681          16         14.9          66.9       1.0X
+date_add wholestage on                              684            689           4         14.6          68.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                             684            684           1         14.6          68.4       1.0X
-date_sub wholestage on                              698            701           3         14.3          69.8       1.0X
+date_sub wholestage off                             670            670           1         14.9          67.0       1.0X
+date_sub wholestage on                              684            694          14         14.6          68.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                           983            984           2         10.2          98.3       1.0X
-add_months wholestage on                           1069           1074           7          9.4         106.9       0.9X
+add_months wholestage off                           960            962           3         10.4          96.0       1.0X
+add_months wholestage on                           1058           1065           5          9.5         105.8       0.9X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         3043           3062          27          3.3         304.3       1.0X
-format date wholestage on                          3118           3133          14          3.2         311.8       1.0X
+format date wholestage off                         3175           3213          53          3.1         317.5       1.0X
+format date wholestage on                          3134           3152          22          3.2         313.4       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       2560           2560           0          3.9         256.0       1.0X
-from_unixtime wholestage on                        2594           2653          37          3.9         259.4       1.0X
+from_unixtime wholestage off                       2599           2601           3          3.8         259.9       1.0X
+from_unixtime wholestage on                        2682           2692          10          3.7         268.2       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                   641            642           2         15.6          64.1       1.0X
-from_utc_timestamp wholestage on                    767            770           3         13.0          76.7       0.8X
+from_utc_timestamp wholestage off                   623            625           2         16.0          62.3       1.0X
+from_utc_timestamp wholestage on                    751            753           2         13.3          75.1       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                     809            812           5         12.4          80.9       1.0X
-to_utc_timestamp wholestage on                      882            889           7         11.3          88.2       0.9X
+to_utc_timestamp wholestage off                     769            772           4         13.0          76.9       1.0X
+to_utc_timestamp wholestage on                      826            828           1         12.1          82.6       0.9X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        225            242          24         44.5          22.5       1.0X
-cast interval wholestage on                         225            226           2         44.5          22.5       1.0X
+cast interval wholestage off                        227            233           9         44.0          22.7       1.0X
+cast interval wholestage on                         217            223           8         46.1          21.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                            1226           1229           3          8.2         122.6       1.0X
-datediff wholestage on                             1220           1224           3          8.2         122.0       1.0X
+datediff wholestage off                            1231           1234           4          8.1         123.1       1.0X
+datediff wholestage on                             1225           1230           8          8.2         122.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      3576           3582           8          2.8         357.6       1.0X
-months_between wholestage on                       3568           3581          23          2.8         356.8       1.0X
+months_between wholestage off                      3561           3561           1          2.8         356.1       1.0X
+months_between wholestage on                       3597           3607           7          2.8         359.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                               383            395          18          2.6         382.5       1.0X
-window wholestage on                                634            657          25          1.6         633.8       0.6X
+window wholestage off                               389            396          11          2.6         388.6       1.0X
+window wholestage on                                669            685          15          1.5         668.8       0.6X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     1862           1863           0          5.4         186.2       1.0X
-date_trunc YEAR wholestage on                      1867           1875           6          5.4         186.7       1.0X
+date_trunc YEAR wholestage off                     1902           1902           0          5.3         190.2       1.0X
+date_trunc YEAR wholestage on                      1909           1915           6          5.2         190.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     1864           1867           4          5.4         186.4       1.0X
-date_trunc YYYY wholestage on                      1865           1871           4          5.4         186.5       1.0X
+date_trunc YYYY wholestage off                     1908           1908           1          5.2         190.8       1.0X
+date_trunc YYYY wholestage on                      1909           1911           1          5.2         190.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       1867           1869           3          5.4         186.7       1.0X
-date_trunc YY wholestage on                        1867           1874           5          5.4         186.7       1.0X
+date_trunc YY wholestage off                       1899           1901           2          5.3         189.9       1.0X
+date_trunc YY wholestage on                        1907           1918           6          5.2         190.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      1897           1904          10          5.3         189.7       1.0X
-date_trunc MON wholestage on                       1857           1862           5          5.4         185.7       1.0X
+date_trunc MON wholestage off                      1925           1926           2          5.2         192.5       1.0X
+date_trunc MON wholestage on                       1887           1899          14          5.3         188.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    1901           1901           1          5.3         190.1       1.0X
-date_trunc MONTH wholestage on                     1858           1863           4          5.4         185.8       1.0X
+date_trunc MONTH wholestage off                    1930           1932           2          5.2         193.0       1.0X
+date_trunc MONTH wholestage on                     1890           1895           6          5.3         189.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       1890           1895           7          5.3         189.0       1.0X
-date_trunc MM wholestage on                        1858           1861           2          5.4         185.8       1.0X
+date_trunc MM wholestage off                       1928           1930           2          5.2         192.8       1.0X
+date_trunc MM wholestage on                        1889           1895           5          5.3         188.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      1232           1234           2          8.1         123.2       1.0X
-date_trunc DAY wholestage on                       1330           1336           4          7.5         133.0       0.9X
+date_trunc DAY wholestage off                      1216           1219           5          8.2         121.6       1.0X
+date_trunc DAY wholestage on                       1183           1187           3          8.5         118.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       1231           1233           4          8.1         123.1       1.0X
-date_trunc DD wholestage on                        1334           1337           5          7.5         133.4       0.9X
+date_trunc DD wholestage off                       1216           1217           2          8.2         121.6       1.0X
+date_trunc DD wholestage on                        1185           1194          17          8.4         118.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     1200           1201           3          8.3         120.0       1.0X
-date_trunc HOUR wholestage on                      1162           1168           5          8.6         116.2       1.0X
+date_trunc HOUR wholestage off                     1212           1215           4          8.2         121.2       1.0X
+date_trunc HOUR wholestage on                      1169           1174           5          8.6         116.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                   1206           1209           5          8.3         120.6       1.0X
-date_trunc MINUTE wholestage on                    1170           1174           3          8.5         117.0       1.0X
+date_trunc MINUTE wholestage off                   1233           1233           0          8.1         123.3       1.0X
+date_trunc MINUTE wholestage on                    1199           1204           3          8.3         119.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    289            289           0         34.6          28.9       1.0X
-date_trunc SECOND wholestage on                     264            271           4         37.8          26.4       1.1X
+date_trunc SECOND wholestage off                    307            309           3         32.6          30.7       1.0X
+date_trunc SECOND wholestage on                     281            283           1         35.6          28.1       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     1788           1794           8          5.6         178.8       1.0X
-date_trunc WEEK wholestage on                      1753           1756           3          5.7         175.3       1.0X
+date_trunc WEEK wholestage off                     1810           1810           0          5.5         181.0       1.0X
+date_trunc WEEK wholestage on                      1775           1789          16          5.6         177.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  2320           2323           4          4.3         232.0       1.0X
-date_trunc QUARTER wholestage on                   2324           2349          54          4.3         232.4       1.0X
+date_trunc QUARTER wholestage off                  2367           2370           4          4.2         236.7       1.0X
+date_trunc QUARTER wholestage on                   2414           2419           6          4.1         241.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                           849            851           2         11.8          84.9       1.0X
-trunc year wholestage on                            829            832           3         12.1          82.9       1.0X
+trunc year wholestage off                           873            876           4         11.5          87.3       1.0X
+trunc year wholestage on                            847            852           8         11.8          84.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                           850            853           4         11.8          85.0       1.0X
-trunc yyyy wholestage on                            829            843          20         12.1          82.9       1.0X
+trunc yyyy wholestage off                           870            882          16         11.5          87.0       1.0X
+trunc yyyy wholestage on                            844            846           3         11.9          84.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                             851            851           0         11.8          85.1       1.0X
-trunc yy wholestage on                              827            831           4         12.1          82.7       1.0X
+trunc yy wholestage off                             869            870           1         11.5          86.9       1.0X
+trunc yy wholestage on                              844            850           7         11.9          84.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            815            817           3         12.3          81.5       1.0X
-trunc mon wholestage on                             809            812           4         12.4          80.9       1.0X
+trunc mon wholestage off                            836            840           5         12.0          83.6       1.0X
+trunc mon wholestage on                             810            815           7         12.3          81.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          815            817           4         12.3          81.5       1.0X
-trunc month wholestage on                           806            809           2         12.4          80.6       1.0X
+trunc month wholestage off                          833            835           3         12.0          83.3       1.0X
+trunc month wholestage on                           812            815           2         12.3          81.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             812            813           1         12.3          81.2       1.0X
-trunc mm wholestage on                              805            810           4         12.4          80.5       1.0X
+trunc mm wholestage off                             838            840           3         11.9          83.8       1.0X
+trunc mm wholestage on                              815            818           2         12.3          81.5       1.0X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                      97             99           2         10.3          96.9       1.0X
-to timestamp str wholestage on                      103            106           3          9.7         102.9       0.9X
+to timestamp str wholestage off                     106            107           1          9.4         106.2       1.0X
+to timestamp str wholestage on                      101            102           1          9.9         100.7       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                         678            679           2          1.5         677.5       1.0X
-to_timestamp wholestage on                          676            680           3          1.5         676.1       1.0X
+to_timestamp wholestage off                         681            683           3          1.5         680.9       1.0X
+to_timestamp wholestage on                          680            684           3          1.5         680.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                    677            682           6          1.5         677.3       1.0X
-to_unix_timestamp wholestage on                     669            672           2          1.5         669.1       1.0X
+to_unix_timestamp wholestage off                    662            662           0          1.5         662.1       1.0X
+to_unix_timestamp wholestage on                     658            661           2          1.5         657.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          133            135           2          7.5         133.1       1.0X
-to date str wholestage on                           126            131           3          7.9         126.4       1.1X
+to date str wholestage off                          133            138           7          7.5         133.4       1.0X
+to date str wholestage on                           129            132           3          7.7         129.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                              716            723          10          1.4         716.2       1.0X
-to_date wholestage on                               690            693           3          1.4         690.4       1.0X
+to_date wholestage off                              676            679           4          1.5         676.2       1.0X
+to_date wholestage on                               672            674           2          1.5         671.6       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  282            284           3         17.8          56.3       1.0X
-From java.time.LocalDate                            265            276          12         18.8          53.1       1.1X
-Collect java.sql.Date                              1145           1206          97          4.4         229.0       0.2X
-Collect java.time.LocalDate                         959           1050         100          5.2         191.7       0.3X
-From java.sql.Timestamp                             229            245          22         21.9          45.7       1.2X
-From java.time.Instant                              173            176           6         28.9          34.5       1.6X
-Collect longs                                       910            960          73          5.5         182.0       0.3X
-Collect java.sql.Timestamp                          920           1118         173          5.4         183.9       0.3X
-Collect java.time.Instant                           877            967          79          5.7         175.3       0.3X
-java.sql.Date to Hive string                       3960           4078         184          1.3         792.0       0.1X
-java.time.LocalDate to Hive string                 3039           3117         128          1.6         607.8       0.1X
-java.sql.Timestamp to Hive string                  6521           6619         162          0.8        1304.1       0.0X
-java.time.Instant to Hive string                   4252           4346          91          1.2         850.4       0.1X
+From java.sql.Date                                  283            285           2         17.6          56.7       1.0X
+From java.time.LocalDate                            265            267           1         18.8          53.1       1.1X
+Collect java.sql.Date                              1215           1255          34          4.1         243.1       0.2X
+Collect java.time.LocalDate                        1061           1141         113          4.7         212.1       0.3X
+From java.sql.Timestamp                             232            248          14         21.5          46.4       1.2X
+From java.time.Instant                              196            203          11         25.5          39.2       1.4X
+Collect longs                                       811           1029         201          6.2         162.2       0.3X
+Collect java.sql.Timestamp                          904           1113         183          5.5         180.8       0.3X
+Collect java.time.Instant                           943           1080         143          5.3         188.7       0.3X
+java.sql.Date to Hive string                       4049           4296         215          1.2         809.8       0.1X
+java.time.LocalDate to Hive string                 3393           3476          72          1.5         678.6       0.1X
+java.sql.Timestamp to Hive string                  6599           6712          98          0.8        1319.7       0.0X
+java.time.Instant to Hive string                   4387           4475          83          1.1         877.3       0.1X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index 176cc1dc361b8..91e954cec7253 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                 1253           1259           9          8.0         125.3       1.0X
-date + interval(m, d)                              1258           1260           2          7.9         125.8       1.0X
-date + interval(m, d, ms)                          3904           3923          26          2.6         390.4       0.3X
-date - interval(m)                                 1201           1214          18          8.3         120.1       1.0X
-date - interval(m, d)                              1236           1238           4          8.1         123.6       1.0X
-date - interval(m, d, ms)                          3983           3987           5          2.5         398.3       0.3X
-timestamp + interval(m)                            1846           1852           9          5.4         184.6       0.7X
-timestamp + interval(m, d)                         1919           1932          18          5.2         191.9       0.7X
-timestamp + interval(m, d, ms)                     2264           2273          12          4.4         226.4       0.6X
-timestamp - interval(m)                            2025           2027           3          4.9         202.5       0.6X
-timestamp - interval(m, d)                         2097           2104          10          4.8         209.7       0.6X
-timestamp - interval(m, d, ms)                     2265           2270           8          4.4         226.5       0.6X
+date + interval(m)                                 1218           1236          26          8.2         121.8       1.0X
+date + interval(m, d)                              1194           1214          28          8.4         119.4       1.0X
+date + interval(m, d, ms)                          3975           3982          11          2.5         397.5       0.3X
+date - interval(m)                                 1140           1149          13          8.8         114.0       1.1X
+date - interval(m, d)                              1180           1180           1          8.5         118.0       1.0X
+date - interval(m, d, ms)                          4014           4017           4          2.5         401.4       0.3X
+timestamp + interval(m)                            1801           1819          25          5.6         180.1       0.7X
+timestamp + interval(m, d)                         1857           1861           5          5.4         185.7       0.7X
+timestamp + interval(m, d, ms)                     2184           2185           1          4.6         218.4       0.6X
+timestamp - interval(m)                            1950           1952           3          5.1         195.0       0.6X
+timestamp - interval(m, d)                         2016           2027          15          5.0         201.6       0.6X
+timestamp - interval(m, d, ms)                     2173           2174           1          4.6         217.3       0.6X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    193            195           3         51.9          19.3       1.0X
-cast to timestamp wholestage on                     213            220           7         47.0          21.3       0.9X
+cast to timestamp wholestage off                    200            201           1         49.9          20.0       1.0X
+cast to timestamp wholestage on                     210            220           8         47.5          21.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                    828            832           5         12.1          82.8       1.0X
-year of timestamp wholestage on                     855            865          11         11.7          85.5       1.0X
+year of timestamp wholestage off                    821            827           8         12.2          82.1       1.0X
+year of timestamp wholestage on                     825            828           4         12.1          82.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                 854            854           0         11.7          85.4       1.0X
-quarter of timestamp wholestage on                  884            893           9         11.3          88.4       1.0X
+quarter of timestamp wholestage off                 872            876           5         11.5          87.2       1.0X
+quarter of timestamp wholestage on                  843            846           4         11.9          84.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                   833            834           1         12.0          83.3       1.0X
-month of timestamp wholestage on                    845            849           4         11.8          84.5       1.0X
+month of timestamp wholestage off                   820            821           1         12.2          82.0       1.0X
+month of timestamp wholestage on                    827            829           3         12.1          82.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             1210           1214           5          8.3         121.0       1.0X
-weekofyear of timestamp wholestage on              1255           1266          12          8.0         125.5       1.0X
+weekofyear of timestamp wholestage off             1207           1208           2          8.3         120.7       1.0X
+weekofyear of timestamp wholestage on              1221           1224           3          8.2         122.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                     828            830           3         12.1          82.8       1.0X
-day of timestamp wholestage on                      847            854          12         11.8          84.7       1.0X
+day of timestamp wholestage off                     821            826           7         12.2          82.1       1.0X
+day of timestamp wholestage on                      823            831           6         12.2          82.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off               854            855           2         11.7          85.4       1.0X
-dayofyear of timestamp wholestage on                913            921           5         10.9          91.3       0.9X
+dayofyear of timestamp wholestage off               871            872           1         11.5          87.1       1.0X
+dayofyear of timestamp wholestage on                858            861           4         11.7          85.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off              849            854           8         11.8          84.9       1.0X
-dayofmonth of timestamp wholestage on               848            859           9         11.8          84.8       1.0X
+dayofmonth of timestamp wholestage off              832            834           3         12.0          83.2       1.0X
+dayofmonth of timestamp wholestage on               823            826           3         12.1          82.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off               984            989           7         10.2          98.4       1.0X
-dayofweek of timestamp wholestage on               1026           1038           7          9.7         102.6       1.0X
+dayofweek of timestamp wholestage off               969            972           5         10.3          96.9       1.0X
+dayofweek of timestamp wholestage on                976            978           2         10.3          97.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                 944            945           2         10.6          94.4       1.0X
-weekday of timestamp wholestage on                  978            985           7         10.2          97.8       1.0X
+weekday of timestamp wholestage off                 941            943           3         10.6          94.1       1.0X
+weekday of timestamp wholestage on                  926            930           2         10.8          92.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    601            604           3         16.6          60.1       1.0X
-hour of timestamp wholestage on                     609            613           4         16.4          60.9       1.0X
+hour of timestamp wholestage off                    605            610           7         16.5          60.5       1.0X
+hour of timestamp wholestage on                     610            613           4         16.4          61.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                  603            606           5         16.6          60.3       1.0X
-minute of timestamp wholestage on                   609            622          21         16.4          60.9       1.0X
+minute of timestamp wholestage off                  600            603           5         16.7          60.0       1.0X
+minute of timestamp wholestage on                   609            610           2         16.4          60.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  603            604           1         16.6          60.3       1.0X
-second of timestamp wholestage on                   612            617           5         16.3          61.2       1.0X
+second of timestamp wholestage off                  604            611          10         16.6          60.4       1.0X
+second of timestamp wholestage on                   608            610           3         16.5          60.8       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         188            190           2         53.1          18.8       1.0X
-current_date wholestage on                          213            217           3         47.0          21.3       0.9X
+current_date wholestage off                         183            184           1         54.6          18.3       1.0X
+current_date wholestage on                          216            218           3         46.3          21.6       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    185            189           6         54.0          18.5       1.0X
-current_timestamp wholestage on                     225            228           2         44.4          22.5       0.8X
+current_timestamp wholestage off                    192            205          19         52.2          19.2       1.0X
+current_timestamp wholestage on                     220            231           9         45.4          22.0       0.9X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                         754            757           5         13.3          75.4       1.0X
-cast to date wholestage on                          771            777           6         13.0          77.1       1.0X
+cast to date wholestage off                         862            863           1         11.6          86.2       1.0X
+cast to date wholestage on                          876            893          22         11.4          87.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                             854            855           1         11.7          85.4       1.0X
-last_day wholestage on                              868            871           3         11.5          86.8       1.0X
+last_day wholestage off                             967            970           4         10.3          96.7       1.0X
+last_day wholestage on                              982            985           3         10.2          98.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                             782            783           2         12.8          78.2       1.0X
-next_day wholestage on                              811            818           9         12.3          81.1       1.0X
+next_day wholestage off                             888            892           5         11.3          88.8       1.0X
+next_day wholestage on                              899            901           2         11.1          89.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                             735            736           1         13.6          73.5       1.0X
-date_add wholestage on                              754            759           8         13.3          75.4       1.0X
+date_add wholestage off                             843            843           1         11.9          84.3       1.0X
+date_add wholestage on                              875            882          12         11.4          87.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                             740            741           0         13.5          74.0       1.0X
-date_sub wholestage on                              753            757           6         13.3          75.3       1.0X
+date_sub wholestage off                             842            845           5         11.9          84.2       1.0X
+date_sub wholestage on                              876            883           6         11.4          87.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                          1142           1143           1          8.8         114.2       1.0X
-add_months wholestage on                           1138           1149          14          8.8         113.8       1.0X
+add_months wholestage off                          1182           1185           4          8.5         118.2       1.0X
+add_months wholestage on                           1205           1210           5          8.3         120.5       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         3591           3598           9          2.8         359.1       1.0X
-format date wholestage on                          3704           3724          25          2.7         370.4       1.0X
+format date wholestage off                         4003           4012          14          2.5         400.3       1.0X
+format date wholestage on                          4044           4048           5          2.5         404.4       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       3881           3886           7          2.6         388.1       1.0X
-from_unixtime wholestage on                        3844           4051         117          2.6         384.4       1.0X
+from_unixtime wholestage off                       4055           4059           5          2.5         405.5       1.0X
+from_unixtime wholestage on                        4081           4091           8          2.5         408.1       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                   673            677           6         14.9          67.3       1.0X
-from_utc_timestamp wholestage on                    782            788           6         12.8          78.2       0.9X
+from_utc_timestamp wholestage off                   675            685          14         14.8          67.5       1.0X
+from_utc_timestamp wholestage on                    802            810           7         12.5          80.2       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                    1038           1038           0          9.6         103.8       1.0X
-to_utc_timestamp wholestage on                     1025           1031           5          9.8         102.5       1.0X
+to_utc_timestamp wholestage off                    1054           1055           1          9.5         105.4       1.0X
+to_utc_timestamp wholestage on                     1073           1076           3          9.3         107.3       1.0X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        250            257          10         40.1          25.0       1.0X
-cast interval wholestage on                         215            220           6         46.6          21.5       1.2X
+cast interval wholestage off                        218            221           4         45.8          21.8       1.0X
+cast interval wholestage on                         217            221           4         46.2          21.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                            1340           1342           4          7.5         134.0       1.0X
-datediff wholestage on                             1389           1395           5          7.2         138.9       1.0X
+datediff wholestage off                            1492           1500          12          6.7         149.2       1.0X
+datediff wholestage on                             1514           1517           2          6.6         151.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      3444           3450           8          2.9         344.4       1.0X
-months_between wholestage on                       3439           3453          14          2.9         343.9       1.0X
+months_between wholestage off                      3634           3641          10          2.8         363.4       1.0X
+months_between wholestage on                       3577           3583           4          2.8         357.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                               427            449          31          2.3         427.2       1.0X
-window wholestage on                                656            690          23          1.5         655.8       0.7X
+window wholestage off                               445            445           0          2.2         445.3       1.0X
+window wholestage on                                645            660          12          1.6         645.0       0.7X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     1828           1834           8          5.5         182.8       1.0X
-date_trunc YEAR wholestage on                      1776           1780           6          5.6         177.6       1.0X
+date_trunc YEAR wholestage off                     1870           1870           0          5.3         187.0       1.0X
+date_trunc YEAR wholestage on                      1837           1843           8          5.4         183.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     1847           1849           2          5.4         184.7       1.0X
-date_trunc YYYY wholestage on                      1774           1781           6          5.6         177.4       1.0X
+date_trunc YYYY wholestage off                     1867           1870           5          5.4         186.7       1.0X
+date_trunc YYYY wholestage on                      1841           1844           4          5.4         184.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       1843           1844           1          5.4         184.3       1.0X
-date_trunc YY wholestage on                        1778           1781           2          5.6         177.8       1.0X
+date_trunc YY wholestage off                       1868           1871           4          5.4         186.8       1.0X
+date_trunc YY wholestage on                        1838           1842           4          5.4         183.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      1857           1861           6          5.4         185.7       1.0X
-date_trunc MON wholestage on                       1786           1791           6          5.6         178.6       1.0X
+date_trunc MON wholestage off                      1961           1964           4          5.1         196.1       1.0X
+date_trunc MON wholestage on                       1880           1884           3          5.3         188.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    1860           1871          15          5.4         186.0       1.0X
-date_trunc MONTH wholestage on                     1782           1789           4          5.6         178.2       1.0X
+date_trunc MONTH wholestage off                    1966           1966           1          5.1         196.6       1.0X
+date_trunc MONTH wholestage on                     1881           1884           3          5.3         188.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       1860           1868          11          5.4         186.0       1.0X
-date_trunc MM wholestage on                        1789           1792           4          5.6         178.9       1.0X
+date_trunc MM wholestage off                       1966           1971           7          5.1         196.6       1.0X
+date_trunc MM wholestage on                        1881           1885           4          5.3         188.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      1328           1338          14          7.5         132.8       1.0X
-date_trunc DAY wholestage on                       1281           1286           5          7.8         128.1       1.0X
+date_trunc DAY wholestage off                      1318           1319           1          7.6         131.8       1.0X
+date_trunc DAY wholestage on                       1278           1284           5          7.8         127.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       1330           1335           6          7.5         133.0       1.0X
-date_trunc DD wholestage on                        1277           1280           2          7.8         127.7       1.0X
+date_trunc DD wholestage off                       1310           1312           2          7.6         131.0       1.0X
+date_trunc DD wholestage on                        1280           1285           6          7.8         128.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     1342           1347           7          7.5         134.2       1.0X
-date_trunc HOUR wholestage on                      1281           1285           3          7.8         128.1       1.0X
+date_trunc HOUR wholestage off                     1325           1328           5          7.5         132.5       1.0X
+date_trunc HOUR wholestage on                      1288           1294           4          7.8         128.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                   1344           1346           2          7.4         134.4       1.0X
-date_trunc MINUTE wholestage on                    1306           1310           3          7.7         130.6       1.0X
+date_trunc MINUTE wholestage off                   1335           1339           5          7.5         133.5       1.0X
+date_trunc MINUTE wholestage on                    1316           1321           4          7.6         131.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    331            334           4         30.2          33.1       1.0X
-date_trunc SECOND wholestage on                     278            282           6         35.9          27.8       1.2X
+date_trunc SECOND wholestage off                    317            321           6         31.6          31.7       1.0X
+date_trunc SECOND wholestage on                     276            279           5         36.3          27.6       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     1742           1746           5          5.7         174.2       1.0X
-date_trunc WEEK wholestage on                      1688           1692           7          5.9         168.8       1.0X
+date_trunc WEEK wholestage off                     1812           1816           6          5.5         181.2       1.0X
+date_trunc WEEK wholestage on                      1764           1768           3          5.7         176.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  2385           2385           1          4.2         238.5       1.0X
-date_trunc QUARTER wholestage on                   2479           2495          32          4.0         247.9       1.0X
+date_trunc QUARTER wholestage off                  2664           2666           2          3.8         266.4       1.0X
+date_trunc QUARTER wholestage on                   2670           2684          16          3.7         267.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                          1025           1025           1          9.8         102.5       1.0X
-trunc year wholestage on                            995           1003           8         10.0          99.5       1.0X
+trunc year wholestage off                          1123           1123           0          8.9         112.3       1.0X
+trunc year wholestage on                           1082           1085           3          9.2         108.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                          1024           1027           4          9.8         102.4       1.0X
-trunc yyyy wholestage on                            995            999           4         10.1          99.5       1.0X
+trunc yyyy wholestage off                          1119           1119           1          8.9         111.9       1.0X
+trunc yyyy wholestage on                           1081           1092          16          9.3         108.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                            1026           1026           0          9.8         102.6       1.0X
-trunc yy wholestage on                              999           1001           2         10.0          99.9       1.0X
+trunc yy wholestage off                            1121           1122           2          8.9         112.1       1.0X
+trunc yy wholestage on                             1083           1088          10          9.2         108.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            998           1000           3         10.0          99.8       1.0X
-trunc mon wholestage on                             952            953           1         10.5          95.2       1.0X
+trunc mon wholestage off                           1110           1123          18          9.0         111.0       1.0X
+trunc mon wholestage on                            1052           1061          12          9.5         105.2       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          999           1000           1         10.0          99.9       1.0X
-trunc month wholestage on                           951            961          18         10.5          95.1       1.1X
+trunc month wholestage off                         1102           1105           4          9.1         110.2       1.0X
+trunc month wholestage on                          1054           1057           2          9.5         105.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                            1001           1003           3         10.0         100.1       1.0X
-trunc mm wholestage on                              951            953           2         10.5          95.1       1.1X
+trunc mm wholestage off                            1103           1103           0          9.1         110.3       1.0X
+trunc mm wholestage on                             1056           1067          12          9.5         105.6       1.0X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                     104            113          12          9.6         104.0       1.0X
-to timestamp str wholestage on                      100            103           3         10.0          99.5       1.0X
+to timestamp str wholestage off                      97             98           2         10.3          96.7       1.0X
+to timestamp str wholestage on                       99            102           4         10.1          99.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                         760            763           3          1.3         760.4       1.0X
-to_timestamp wholestage on                          757            766          12          1.3         757.5       1.0X
+to_timestamp wholestage off                         721            721           1          1.4         720.6       1.0X
+to_timestamp wholestage on                          724            728           4          1.4         723.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                    754            754           0          1.3         753.6       1.0X
-to_unix_timestamp wholestage on                     742            743           2          1.3         742.0       1.0X
+to_unix_timestamp wholestage off                    731            731           0          1.4         730.6       1.0X
+to_unix_timestamp wholestage on                     731            734           4          1.4         731.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          137            141           6          7.3         137.0       1.0X
-to date str wholestage on                           130            136           3          7.7         130.0       1.1X
+to date str wholestage off                          133            134           2          7.5         132.8       1.0X
+to date str wholestage on                           131            134           3          7.6         131.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                              655            656           1          1.5         655.0       1.0X
-to_date wholestage on                               637            642           5          1.6         636.8       1.0X
+to_date wholestage off                              648            648           0          1.5         647.9       1.0X
+to_date wholestage on                               640            643           2          1.6         640.0       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  266            269           3         18.8          53.2       1.0X
-From java.time.LocalDate                            286            294          12         17.5          57.2       0.9X
-Collect java.sql.Date                              1152           1337         171          4.3         230.5       0.2X
-Collect java.time.LocalDate                         962           1123         144          5.2         192.4       0.3X
-From java.sql.Timestamp                             197            202           5         25.3          39.5       1.3X
-From java.time.Instant                              176            196          33         28.3          35.3       1.5X
-Collect longs                                       847           1023         198          5.9         169.4       0.3X
-Collect java.sql.Timestamp                         1160           1208          80          4.3         232.0       0.2X
-Collect java.time.Instant                          1083           1158          78          4.6         216.6       0.2X
-java.sql.Date to Hive string                       4114           4175          91          1.2         822.8       0.1X
-java.time.LocalDate to Hive string                 3656           3737          98          1.4         731.2       0.1X
-java.sql.Timestamp to Hive string                  6474           6727         243          0.8        1294.8       0.0X
-java.time.Instant to Hive string                   5303           5420         117          0.9        1060.6       0.1X
+From java.sql.Date                                  281            282           2         17.8          56.1       1.0X
+From java.time.LocalDate                            280            283           4         17.8          56.0       1.0X
+Collect java.sql.Date                              1328           1427         118          3.8         265.6       0.2X
+Collect java.time.LocalDate                         984           1125         124          5.1         196.7       0.3X
+From java.sql.Timestamp                             199            204           6         25.1          39.8       1.4X
+From java.time.Instant                              181            183           2         27.7          36.1       1.6X
+Collect longs                                       945            998          46          5.3         189.0       0.3X
+Collect java.sql.Timestamp                         1008           1209         196          5.0         201.6       0.3X
+Collect java.time.Instant                           822           1017         206          6.1         164.4       0.3X
+java.sql.Date to Hive string                       3880           4013         125          1.3         775.9       0.1X
+java.time.LocalDate to Hive string                 3584           3632          45          1.4         716.8       0.1X
+java.sql.Timestamp to Hive string                  6366           6433          58          0.8        1273.2       0.0X
+java.time.Instant to Hive string                   5133           5224         113          1.0        1026.7       0.1X
 
 
diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk21-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk21-results.txt
index c15fb78f2f165..fbe7b36bffa60 100644
--- a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk21-results.txt
@@ -2,153 +2,153 @@
 Rebasing dates/timestamps in Parquet datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Save DATE to parquet:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  11193          11193           0          8.9         111.9       1.0X
-before 1582, noop                                  7070           7070           0         14.1          70.7       1.6X
-after 1582, rebase EXCEPTION                      19836          19836           0          5.0         198.4       0.6X
-after 1582, rebase LEGACY                         19368          19368           0          5.2         193.7       0.6X
-after 1582, rebase CORRECTED                      19627          19627           0          5.1         196.3       0.6X
-before 1582, rebase LEGACY                        16301          16301           0          6.1         163.0       0.7X
-before 1582, rebase CORRECTED                     15612          15612           0          6.4         156.1       0.7X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+after 1582, noop                                  11144          11144           0          9.0         111.4       1.0X
+before 1582, noop                                  7066           7066           0         14.2          70.7       1.6X
+after 1582, rebase EXCEPTION                      19440          19440           0          5.1         194.4       0.6X
+after 1582, rebase LEGACY                         19280          19280           0          5.2         192.8       0.6X
+after 1582, rebase CORRECTED                      19431          19431           0          5.1         194.3       0.6X
+before 1582, rebase LEGACY                        15530          15530           0          6.4         155.3       0.7X
+before 1582, rebase CORRECTED                     15717          15717           0          6.4         157.2       0.7X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Load DATE from parquet:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off, rebase EXCEPTION             11601          11657          95          8.6         116.0       1.0X
-after 1582, vec off, rebase LEGACY                11671          11751          72          8.6         116.7       1.0X
-after 1582, vec off, rebase CORRECTED             11593          11632          67          8.6         115.9       1.0X
-after 1582, vec on, rebase EXCEPTION               2394           2427          33         41.8          23.9       4.8X
-after 1582, vec on, rebase LEGACY                  2466           2489          38         40.6          24.7       4.7X
-after 1582, vec on, rebase CORRECTED               2487           2500          18         40.2          24.9       4.7X
-before 1582, vec off, rebase LEGACY               11937          11951          14          8.4         119.4       1.0X
-before 1582, vec off, rebase CORRECTED            11542          11600          60          8.7         115.4       1.0X
-before 1582, vec on, rebase LEGACY                 2708           2729          30         36.9          27.1       4.3X
-before 1582, vec on, rebase CORRECTED              2436           2445           8         41.1          24.4       4.8X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+after 1582, vec off, rebase EXCEPTION             11704          11746          52          8.5         117.0       1.0X
+after 1582, vec off, rebase LEGACY                11525          11573          66          8.7         115.2       1.0X
+after 1582, vec off, rebase CORRECTED             11505          11532          25          8.7         115.0       1.0X
+after 1582, vec on, rebase EXCEPTION               2347           2370          21         42.6          23.5       5.0X
+after 1582, vec on, rebase LEGACY                  2450           2453           2         40.8          24.5       4.8X
+after 1582, vec on, rebase CORRECTED               2431           2446          17         41.1          24.3       4.8X
+before 1582, vec off, rebase LEGACY               11748          11779          39          8.5         117.5       1.0X
+before 1582, vec off, rebase CORRECTED            11591          11630          33          8.6         115.9       1.0X
+before 1582, vec on, rebase LEGACY                 2781           2786           6         36.0          27.8       4.2X
+before 1582, vec on, rebase CORRECTED              2420           2425           7         41.3          24.2       4.8X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_INT96 to parquet:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2403           2403           0         41.6          24.0       1.0X
-before 1900, noop                                  2443           2443           0         40.9          24.4       1.0X
-after 1900, rebase EXCEPTION                      12805          12805           0          7.8         128.1       0.2X
-after 1900, rebase LEGACY                         12529          12529           0          8.0         125.3       0.2X
-after 1900, rebase CORRECTED                      12474          12474           0          8.0         124.7       0.2X
-before 1900, rebase LEGACY                        14628          14628           0          6.8         146.3       0.2X
-before 1900, rebase CORRECTED                     12601          12601           0          7.9         126.0       0.2X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+after 1900, noop                                   2518           2518           0         39.7          25.2       1.0X
+before 1900, noop                                  2375           2375           0         42.1          23.8       1.1X
+after 1900, rebase EXCEPTION                      13654          13654           0          7.3         136.5       0.2X
+after 1900, rebase LEGACY                         13187          13187           0          7.6         131.9       0.2X
+after 1900, rebase CORRECTED                      13174          13174           0          7.6         131.7       0.2X
+before 1900, rebase LEGACY                        15129          15129           0          6.6         151.3       0.2X
+before 1900, rebase CORRECTED                     13438          13438           0          7.4         134.4       0.2X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_INT96 from parquet:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             15545          15658          99          6.4         155.4       1.0X
-after 1900, vec off, rebase LEGACY                15927          15945          25          6.3         159.3       1.0X
-after 1900, vec off, rebase CORRECTED             15558          15620          54          6.4         155.6       1.0X
-after 1900, vec on, rebase EXCEPTION               4050           4074          34         24.7          40.5       3.8X
-after 1900, vec on, rebase LEGACY                  4024           4059          32         24.9          40.2       3.9X
-after 1900, vec on, rebase CORRECTED               4062           4074          17         24.6          40.6       3.8X
-before 1900, vec off, rebase LEGACY               18219          18234          22          5.5         182.2       0.9X
-before 1900, vec off, rebase CORRECTED            15584          15633          45          6.4         155.8       1.0X
-before 1900, vec on, rebase LEGACY                 6080           6106          23         16.4          60.8       2.6X
-before 1900, vec on, rebase CORRECTED              4045           4057          14         24.7          40.4       3.8X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+after 1900, vec off, rebase EXCEPTION             16824          16877          53          5.9         168.2       1.0X
+after 1900, vec off, rebase LEGACY                16304          16337          31          6.1         163.0       1.0X
+after 1900, vec off, rebase CORRECTED             16164          16239          76          6.2         161.6       1.0X
+after 1900, vec on, rebase EXCEPTION               4041           4045           7         24.7          40.4       4.2X
+after 1900, vec on, rebase LEGACY                  4015           4039          36         24.9          40.2       4.2X
+after 1900, vec on, rebase CORRECTED               4012           4040          27         24.9          40.1       4.2X
+before 1900, vec off, rebase LEGACY               18457          18537          71          5.4         184.6       0.9X
+before 1900, vec off, rebase CORRECTED            16232          16269          55          6.2         162.3       1.0X
+before 1900, vec on, rebase LEGACY                 6106           6120          17         16.4          61.1       2.8X
+before 1900, vec on, rebase CORRECTED              4018           4053          48         24.9          40.2       4.2X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_MICROS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2378           2378           0         42.0          23.8       1.0X
-before 1900, noop                                  2426           2426           0         41.2          24.3       1.0X
-after 1900, rebase EXCEPTION                      14475          14475           0          6.9         144.8       0.2X
-after 1900, rebase LEGACY                         13685          13685           0          7.3         136.8       0.2X
-after 1900, rebase CORRECTED                      13448          13448           0          7.4         134.5       0.2X
-before 1900, rebase LEGACY                        15085          15085           0          6.6         150.8       0.2X
-before 1900, rebase CORRECTED                     13668          13668           0          7.3         136.7       0.2X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+after 1900, noop                                   2412           2412           0         41.5          24.1       1.0X
+before 1900, noop                                  2413           2413           0         41.4          24.1       1.0X
+after 1900, rebase EXCEPTION                      11406          11406           0          8.8         114.1       0.2X
+after 1900, rebase LEGACY                         11249          11249           0          8.9         112.5       0.2X
+after 1900, rebase CORRECTED                      11318          11318           0          8.8         113.2       0.2X
+before 1900, rebase LEGACY                        13104          13104           0          7.6         131.0       0.2X
+before 1900, rebase CORRECTED                     11269          11269           0          8.9         112.7       0.2X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_MICROS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             14595          14621          23          6.9         146.0       1.0X
-after 1900, vec off, rebase LEGACY                14689          14699          12          6.8         146.9       1.0X
-after 1900, vec off, rebase CORRECTED             14626          14648          25          6.8         146.3       1.0X
-after 1900, vec on, rebase EXCEPTION               3732           3745          14         26.8          37.3       3.9X
-after 1900, vec on, rebase LEGACY                  3753           3771          29         26.6          37.5       3.9X
-after 1900, vec on, rebase CORRECTED               3714           3734          23         26.9          37.1       3.9X
-before 1900, vec off, rebase LEGACY               17073          17151         107          5.9         170.7       0.9X
-before 1900, vec off, rebase CORRECTED            14575          14613          33          6.9         145.8       1.0X
-before 1900, vec on, rebase LEGACY                 5581           5602          34         17.9          55.8       2.6X
-before 1900, vec on, rebase CORRECTED              3680           3698          30         27.2          36.8       4.0X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+after 1900, vec off, rebase EXCEPTION             15047          15100          59          6.6         150.5       1.0X
+after 1900, vec off, rebase LEGACY                14965          15033          59          6.7         149.7       1.0X
+after 1900, vec off, rebase CORRECTED             15041          15064          35          6.6         150.4       1.0X
+after 1900, vec on, rebase EXCEPTION               3714           3737          21         26.9          37.1       4.1X
+after 1900, vec on, rebase LEGACY                  3748           3766          26         26.7          37.5       4.0X
+after 1900, vec on, rebase CORRECTED               3733           3743          14         26.8          37.3       4.0X
+before 1900, vec off, rebase LEGACY               17297          17350          52          5.8         173.0       0.9X
+before 1900, vec off, rebase CORRECTED            14977          14993          24          6.7         149.8       1.0X
+before 1900, vec on, rebase LEGACY                 5709           5720          10         17.5          57.1       2.6X
+before 1900, vec on, rebase CORRECTED              3696           3717          19         27.1          37.0       4.1X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_MILLIS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2418           2418           0         41.4          24.2       1.0X
-before 1900, noop                                  2413           2413           0         41.5          24.1       1.0X
-after 1900, rebase EXCEPTION                      11749          11749           0          8.5         117.5       0.2X
-after 1900, rebase LEGACY                         11757          11757           0          8.5         117.6       0.2X
-after 1900, rebase CORRECTED                      12081          12081           0          8.3         120.8       0.2X
-before 1900, rebase LEGACY                        13503          13503           0          7.4         135.0       0.2X
-before 1900, rebase CORRECTED                     11649          11649           0          8.6         116.5       0.2X
-
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+after 1900, noop                                   2382           2382           0         42.0          23.8       1.0X
+before 1900, noop                                  2414           2414           0         41.4          24.1       1.0X
+after 1900, rebase EXCEPTION                      11542          11542           0          8.7         115.4       0.2X
+after 1900, rebase LEGACY                         11074          11074           0          9.0         110.7       0.2X
+after 1900, rebase CORRECTED                      11275          11275           0          8.9         112.7       0.2X
+before 1900, rebase LEGACY                        13166          13166           0          7.6         131.7       0.2X
+before 1900, rebase CORRECTED                     11341          11341           0          8.8         113.4       0.2X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_MILLIS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             14322          14376          53          7.0         143.2       1.0X
-after 1900, vec off, rebase LEGACY                14453          14478          22          6.9         144.5       1.0X
-after 1900, vec off, rebase CORRECTED             14429          14523          94          6.9         144.3       1.0X
-after 1900, vec on, rebase EXCEPTION               4876           4914          42         20.5          48.8       2.9X
-after 1900, vec on, rebase LEGACY                  4418           4465          41         22.6          44.2       3.2X
-after 1900, vec on, rebase CORRECTED               4876           4909          50         20.5          48.8       2.9X
-before 1900, vec off, rebase LEGACY               17196          17238          45          5.8         172.0       0.8X
-before 1900, vec off, rebase CORRECTED            14462          14509          57          6.9         144.6       1.0X
-before 1900, vec on, rebase LEGACY                 6120           6135          16         16.3          61.2       2.3X
-before 1900, vec on, rebase CORRECTED              4887           4929          42         20.5          48.9       2.9X
+after 1900, vec off, rebase EXCEPTION             14250          14288          40          7.0         142.5       1.0X
+after 1900, vec off, rebase LEGACY                14235          14315          82          7.0         142.4       1.0X
+after 1900, vec off, rebase CORRECTED             14284          14304          25          7.0         142.8       1.0X
+after 1900, vec on, rebase EXCEPTION               4925           4941          27         20.3          49.2       2.9X
+after 1900, vec on, rebase LEGACY                  4489           4499          10         22.3          44.9       3.2X
+after 1900, vec on, rebase CORRECTED               4916           4943          33         20.3          49.2       2.9X
+before 1900, vec off, rebase LEGACY               16801          16813          11          6.0         168.0       0.8X
+before 1900, vec off, rebase CORRECTED            14259          14307          50          7.0         142.6       1.0X
+before 1900, vec on, rebase LEGACY                 5958           5966           7         16.8          59.6       2.4X
+before 1900, vec on, rebase CORRECTED              4900           4920          19         20.4          49.0       2.9X
 
 
 ================================================================================================
 Rebasing dates/timestamps in ORC datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Save DATE to ORC:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  12614          12614           0          7.9         126.1       1.0X
-before 1582, noop                                  6620           6620           0         15.1          66.2       1.9X
-after 1582                                        17066          17066           0          5.9         170.7       0.7X
-before 1582                                       10573          10573           0          9.5         105.7       1.2X
+after 1582, noop                                  10754          10754           0          9.3         107.5       1.0X
+before 1582, noop                                  6783           6783           0         14.7          67.8       1.6X
+after 1582                                        15425          15425           0          6.5         154.2       0.7X
+before 1582                                       10856          10856           0          9.2         108.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Load DATE from ORC:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off                                8422           8523         131         11.9          84.2       1.0X
-after 1582, vec on                                 2386           2401          20         41.9          23.9       3.5X
-before 1582, vec off                               8447           8474          42         11.8          84.5       1.0X
-before 1582, vec on                                2526           2542          24         39.6          25.3       3.3X
+after 1582, vec off                                8437           8530          90         11.9          84.4       1.0X
+after 1582, vec on                                 2419           2430          10         41.3          24.2       3.5X
+before 1582, vec off                               8505           8526          19         11.8          85.1       1.0X
+before 1582, vec on                                2557           2566          11         39.1          25.6       3.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP to ORC:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2336           2336           0         42.8          23.4       1.0X
-before 1900, noop                                  2309           2309           0         43.3          23.1       1.0X
-after 1900                                         9646           9646           0         10.4          96.5       0.2X
-before 1900                                       12150          12150           0          8.2         121.5       0.2X
+after 1900, noop                                   2308           2308           0         43.3          23.1       1.0X
+before 1900, noop                                  2302           2302           0         43.4          23.0       1.0X
+after 1900                                         9526           9526           0         10.5          95.3       0.2X
+before 1900                                       11558          11558           0          8.7         115.6       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP from ORC:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off                                9781           9845          86         10.2          97.8       1.0X
-after 1900, vec on                                 3778           3792          13         26.5          37.8       2.6X
-before 1900, vec off                              11757          11781          21          8.5         117.6       0.8X
-before 1900, vec on                                5490           5511          21         18.2          54.9       1.8X
+after 1900, vec off                               10757          10772          13          9.3         107.6       1.0X
+after 1900, vec on                                 3892           3899          11         25.7          38.9       2.8X
+before 1900, vec off                              13141          13195          52          7.6         131.4       0.8X
+before 1900, vec on                                6226           6301         129         16.1          62.3       1.7X
 
 
diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
index 249b478e772a8..eed620cdeced6 100644
--- a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
@@ -2,153 +2,153 @@
 Rebasing dates/timestamps in Parquet datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Save DATE to parquet:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  11170          11170           0          9.0         111.7       1.0X
-before 1582, noop                                  6505           6505           0         15.4          65.0       1.7X
-after 1582, rebase EXCEPTION                      19873          19873           0          5.0         198.7       0.6X
-after 1582, rebase LEGACY                         19726          19726           0          5.1         197.3       0.6X
-after 1582, rebase CORRECTED                      19931          19931           0          5.0         199.3       0.6X
-before 1582, rebase LEGACY                        15590          15590           0          6.4         155.9       0.7X
-before 1582, rebase CORRECTED                     15523          15523           0          6.4         155.2       0.7X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+after 1582, noop                                  13169          13169           0          7.6         131.7       1.0X
+before 1582, noop                                  7787           7787           0         12.8          77.9       1.7X
+after 1582, rebase EXCEPTION                      21399          21399           0          4.7         214.0       0.6X
+after 1582, rebase LEGACY                         21530          21530           0          4.6         215.3       0.6X
+after 1582, rebase CORRECTED                      21579          21579           0          4.6         215.8       0.6X
+before 1582, rebase LEGACY                        16095          16095           0          6.2         160.9       0.8X
+before 1582, rebase CORRECTED                     16011          16011           0          6.2         160.1       0.8X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Load DATE from parquet:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off, rebase EXCEPTION             11571          11596          22          8.6         115.7       1.0X
-after 1582, vec off, rebase LEGACY                11892          11909          27          8.4         118.9       1.0X
-after 1582, vec off, rebase CORRECTED             11681          11724          47          8.6         116.8       1.0X
-after 1582, vec on, rebase EXCEPTION               2516           2530          13         39.7          25.2       4.6X
-after 1582, vec on, rebase LEGACY                  2555           2563           8         39.1          25.5       4.5X
-after 1582, vec on, rebase CORRECTED               2487           2503          22         40.2          24.9       4.7X
-before 1582, vec off, rebase LEGACY               11947          11996          69          8.4         119.5       1.0X
-before 1582, vec off, rebase CORRECTED            11792          11821          41          8.5         117.9       1.0X
-before 1582, vec on, rebase LEGACY                 2826           2856          25         35.4          28.3       4.1X
-before 1582, vec on, rebase CORRECTED              2465           2489          21         40.6          24.6       4.7X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+after 1582, vec off, rebase EXCEPTION             11598          11638          62          8.6         116.0       1.0X
+after 1582, vec off, rebase LEGACY                11716          11730          17          8.5         117.2       1.0X
+after 1582, vec off, rebase CORRECTED             11556          11616          52          8.7         115.6       1.0X
+after 1582, vec on, rebase EXCEPTION               2528           2537          10         39.6          25.3       4.6X
+after 1582, vec on, rebase LEGACY                  2564           2569           6         39.0          25.6       4.5X
+after 1582, vec on, rebase CORRECTED               2487           2534          44         40.2          24.9       4.7X
+before 1582, vec off, rebase LEGACY               11740          11799          56          8.5         117.4       1.0X
+before 1582, vec off, rebase CORRECTED            11606          11656          50          8.6         116.1       1.0X
+before 1582, vec on, rebase LEGACY                 2840           2871          27         35.2          28.4       4.1X
+before 1582, vec on, rebase CORRECTED              2401           2429          31         41.6          24.0       4.8X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_INT96 to parquet:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2340           2340           0         42.7          23.4       1.0X
-before 1900, noop                                  2284           2284           0         43.8          22.8       1.0X
-after 1900, rebase EXCEPTION                      13230          13230           0          7.6         132.3       0.2X
-after 1900, rebase LEGACY                         13238          13238           0          7.6         132.4       0.2X
-after 1900, rebase CORRECTED                      13264          13264           0          7.5         132.6       0.2X
-before 1900, rebase LEGACY                        15216          15216           0          6.6         152.2       0.2X
-before 1900, rebase CORRECTED                     13382          13382           0          7.5         133.8       0.2X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+after 1900, noop                                   2304           2304           0         43.4          23.0       1.0X
+before 1900, noop                                  2242           2242           0         44.6          22.4       1.0X
+after 1900, rebase EXCEPTION                      13198          13198           0          7.6         132.0       0.2X
+after 1900, rebase LEGACY                         12894          12894           0          7.8         128.9       0.2X
+after 1900, rebase CORRECTED                      12991          12991           0          7.7         129.9       0.2X
+before 1900, rebase LEGACY                        14288          14288           0          7.0         142.9       0.2X
+before 1900, rebase CORRECTED                     12614          12614           0          7.9         126.1       0.2X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_INT96 from parquet:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             16170          16195          22          6.2         161.7       1.0X
-after 1900, vec off, rebase LEGACY                16720          16755          31          6.0         167.2       1.0X
-after 1900, vec off, rebase CORRECTED             16152          16213          54          6.2         161.5       1.0X
-after 1900, vec on, rebase EXCEPTION               4090           4101          18         24.4          40.9       4.0X
-after 1900, vec on, rebase LEGACY                  4114           4144          33         24.3          41.1       3.9X
-after 1900, vec on, rebase CORRECTED               4158           4191          28         24.0          41.6       3.9X
-before 1900, vec off, rebase LEGACY               18554          18584          31          5.4         185.5       0.9X
-before 1900, vec off, rebase CORRECTED            16192          16267          84          6.2         161.9       1.0X
-before 1900, vec on, rebase LEGACY                 6256           6271          22         16.0          62.6       2.6X
-before 1900, vec on, rebase CORRECTED              4074           4104          27         24.5          40.7       4.0X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+after 1900, vec off, rebase EXCEPTION             15238          15263          23          6.6         152.4       1.0X
+after 1900, vec off, rebase LEGACY                14777          14793          22          6.8         147.8       1.0X
+after 1900, vec off, rebase CORRECTED             14578          14650         107          6.9         145.8       1.0X
+after 1900, vec on, rebase EXCEPTION               4051           4103          67         24.7          40.5       3.8X
+after 1900, vec on, rebase LEGACY                  4097           4123          34         24.4          41.0       3.7X
+after 1900, vec on, rebase CORRECTED               4080           4092          16         24.5          40.8       3.7X
+before 1900, vec off, rebase LEGACY               17402          17431          26          5.7         174.0       0.9X
+before 1900, vec off, rebase CORRECTED            15337          15394          51          6.5         153.4       1.0X
+before 1900, vec on, rebase LEGACY                 6180           6197          17         16.2          61.8       2.5X
+before 1900, vec on, rebase CORRECTED              4082           4094          14         24.5          40.8       3.7X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_MICROS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2390           2390           0         41.8          23.9       1.0X
-before 1900, noop                                  2291           2291           0         43.6          22.9       1.0X
-after 1900, rebase EXCEPTION                      12537          12537           0          8.0         125.4       0.2X
-after 1900, rebase LEGACY                         12047          12047           0          8.3         120.5       0.2X
-after 1900, rebase CORRECTED                      12151          12151           0          8.2         121.5       0.2X
-before 1900, rebase LEGACY                        13960          13960           0          7.2         139.6       0.2X
-before 1900, rebase CORRECTED                     11985          11985           0          8.3         119.9       0.2X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+after 1900, noop                                   2285           2285           0         43.8          22.9       1.0X
+before 1900, noop                                  2287           2287           0         43.7          22.9       1.0X
+after 1900, rebase EXCEPTION                      12295          12295           0          8.1         122.9       0.2X
+after 1900, rebase LEGACY                         11653          11653           0          8.6         116.5       0.2X
+after 1900, rebase CORRECTED                      11718          11718           0          8.5         117.2       0.2X
+before 1900, rebase LEGACY                        13462          13462           0          7.4         134.6       0.2X
+before 1900, rebase CORRECTED                     11886          11886           0          8.4         118.9       0.2X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_MICROS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             14977          15018          36          6.7         149.8       1.0X
-after 1900, vec off, rebase LEGACY                14924          14960          33          6.7         149.2       1.0X
-after 1900, vec off, rebase CORRECTED             14965          14994          26          6.7         149.7       1.0X
-after 1900, vec on, rebase EXCEPTION               3810           3819           8         26.2          38.1       3.9X
-after 1900, vec on, rebase LEGACY                  3829           3835           8         26.1          38.3       3.9X
-after 1900, vec on, rebase CORRECTED               3785           3837          47         26.4          37.9       4.0X
-before 1900, vec off, rebase LEGACY               17323          17343          19          5.8         173.2       0.9X
-before 1900, vec off, rebase CORRECTED            14933          14962          26          6.7         149.3       1.0X
-before 1900, vec on, rebase LEGACY                 5763           5783          17         17.4          57.6       2.6X
-before 1900, vec on, rebase CORRECTED              3798           3817          32         26.3          38.0       3.9X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+after 1900, vec off, rebase EXCEPTION             14824          14877          57          6.7         148.2       1.0X
+after 1900, vec off, rebase LEGACY                14876          14899          26          6.7         148.8       1.0X
+after 1900, vec off, rebase CORRECTED             14924          14947          24          6.7         149.2       1.0X
+after 1900, vec on, rebase EXCEPTION               3813           3817           5         26.2          38.1       3.9X
+after 1900, vec on, rebase LEGACY                  3829           3855          28         26.1          38.3       3.9X
+after 1900, vec on, rebase CORRECTED               3803           3811          11         26.3          38.0       3.9X
+before 1900, vec off, rebase LEGACY               17141          17177          53          5.8         171.4       0.9X
+before 1900, vec off, rebase CORRECTED            14916          14936          26          6.7         149.2       1.0X
+before 1900, vec on, rebase LEGACY                 5638           5656          15         17.7          56.4       2.6X
+before 1900, vec on, rebase CORRECTED              3792           3820          43         26.4          37.9       3.9X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_MILLIS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2309           2309           0         43.3          23.1       1.0X
-before 1900, noop                                  2358           2358           0         42.4          23.6       1.0X
-after 1900, rebase EXCEPTION                      11266          11266           0          8.9         112.7       0.2X
-after 1900, rebase LEGACY                         11582          11582           0          8.6         115.8       0.2X
-after 1900, rebase CORRECTED                      11555          11555           0          8.7         115.5       0.2X
-before 1900, rebase LEGACY                        13600          13600           0          7.4         136.0       0.2X
-before 1900, rebase CORRECTED                     12113          12113           0          8.3         121.1       0.2X
-
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+after 1900, noop                                   2283           2283           0         43.8          22.8       1.0X
+before 1900, noop                                  2286           2286           0         43.8          22.9       1.0X
+after 1900, rebase EXCEPTION                      11040          11040           0          9.1         110.4       0.2X
+after 1900, rebase LEGACY                         11421          11421           0          8.8         114.2       0.2X
+after 1900, rebase CORRECTED                      11132          11132           0          9.0         111.3       0.2X
+before 1900, rebase LEGACY                        13097          13097           0          7.6         131.0       0.2X
+before 1900, rebase CORRECTED                     11359          11359           0          8.8         113.6       0.2X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_MILLIS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             15076          15125          46          6.6         150.8       1.0X
-after 1900, vec off, rebase LEGACY                15480          15491          16          6.5         154.8       1.0X
-after 1900, vec off, rebase CORRECTED             15171          15189          17          6.6         151.7       1.0X
-after 1900, vec on, rebase EXCEPTION               3976           4001          28         25.2          39.8       3.8X
-after 1900, vec on, rebase LEGACY                  4582           4609          46         21.8          45.8       3.3X
-after 1900, vec on, rebase CORRECTED               3934           3953          29         25.4          39.3       3.8X
-before 1900, vec off, rebase LEGACY               17602          17644          37          5.7         176.0       0.9X
-before 1900, vec off, rebase CORRECTED            15201          15238          34          6.6         152.0       1.0X
-before 1900, vec on, rebase LEGACY                 6306           6311           6         15.9          63.1       2.4X
-before 1900, vec on, rebase CORRECTED              3926           3961          50         25.5          39.3       3.8X
+after 1900, vec off, rebase EXCEPTION             15017          15053          38          6.7         150.2       1.0X
+after 1900, vec off, rebase LEGACY                14941          15013          75          6.7         149.4       1.0X
+after 1900, vec off, rebase CORRECTED             15057          15070          17          6.6         150.6       1.0X
+after 1900, vec on, rebase EXCEPTION               3942           3949           7         25.4          39.4       3.8X
+after 1900, vec on, rebase LEGACY                  4605           4628          26         21.7          46.1       3.3X
+after 1900, vec on, rebase CORRECTED               4002           4027          22         25.0          40.0       3.8X
+before 1900, vec off, rebase LEGACY               17121          17169          47          5.8         171.2       0.9X
+before 1900, vec off, rebase CORRECTED            15086          15132          42          6.6         150.9       1.0X
+before 1900, vec on, rebase LEGACY                 6262           6271          10         16.0          62.6       2.4X
+before 1900, vec on, rebase CORRECTED              3942           3960          24         25.4          39.4       3.8X
 
 
 ================================================================================================
 Rebasing dates/timestamps in ORC datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Save DATE to ORC:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  11208          11208           0          8.9         112.1       1.0X
-before 1582, noop                                  6567           6567           0         15.2          65.7       1.7X
-after 1582                                        15130          15130           0          6.6         151.3       0.7X
-before 1582                                       10992          10992           0          9.1         109.9       1.0X
+after 1582, noop                                  13322          13322           0          7.5         133.2       1.0X
+before 1582, noop                                  7967           7967           0         12.6          79.7       1.7X
+after 1582                                        17193          17193           0          5.8         171.9       0.8X
+before 1582                                       12729          12729           0          7.9         127.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Load DATE from ORC:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off                                8770           8777           8         11.4          87.7       1.0X
-after 1582, vec on                                 2445           2478          31         40.9          24.5       3.6X
-before 1582, vec off                               8820           8896         106         11.3          88.2       1.0X
-before 1582, vec on                                2580           2615          37         38.8          25.8       3.4X
+after 1582, vec off                                8797           8843          71         11.4          88.0       1.0X
+after 1582, vec on                                 2457           2469          14         40.7          24.6       3.6X
+before 1582, vec off                               8555           8572          16         11.7          85.5       1.0X
+before 1582, vec on                                2613           2621          11         38.3          26.1       3.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP to ORC:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2221           2221           0         45.0          22.2       1.0X
-before 1900, noop                                  2218           2218           0         45.1          22.2       1.0X
-after 1900                                         9916           9916           0         10.1          99.2       0.2X
-before 1900                                       12130          12130           0          8.2         121.3       0.2X
+after 1900, noop                                   2182           2182           0         45.8          21.8       1.0X
+before 1900, noop                                  2169           2169           0         46.1          21.7       1.0X
+after 1900                                        10099          10099           0          9.9         101.0       0.2X
+before 1900                                       12162          12162           0          8.2         121.6       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP from ORC:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off                               10569          11038         802          9.5         105.7       1.0X
-after 1900, vec on                                 4361           4415          55         22.9          43.6       2.4X
-before 1900, vec off                              12223          12227           7          8.2         122.2       0.9X
-before 1900, vec on                                6103           6136          30         16.4          61.0       1.7X
+after 1900, vec off                                9898           9923          26         10.1          99.0       1.0X
+after 1900, vec on                                 4013           4048          55         24.9          40.1       2.5X
+before 1900, vec off                              11962          11980          18          8.4         119.6       0.8X
+before 1900, vec on                                5608           5635          43         17.8          56.1       1.8X
 
 
diff --git a/sql/core/benchmarks/EncodeBenchmark-jdk21-results.txt b/sql/core/benchmarks/EncodeBenchmark-jdk21-results.txt
index d74eb426cf341..a7aebf3e61025 100644
--- a/sql/core/benchmarks/EncodeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/EncodeBenchmark-jdk21-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 encode:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UTF-32                                            47715          47833         167          0.2        4771.5       1.0X
-UTF-16                                            57379          57408          42          0.2        5737.9       0.8X
-UTF-8                                              2840           2872          45          3.5         284.0      16.8X
+UTF-32                                            64447          64482          50          0.2        6444.7       1.0X
+UTF-16                                            60035          60070          49          0.2        6003.5       1.1X
+UTF-8                                             33512          33524          16          0.3        3351.2       1.9X
 
diff --git a/sql/core/benchmarks/EncodeBenchmark-results.txt b/sql/core/benchmarks/EncodeBenchmark-results.txt
index 5fdbbf72d7e77..bd888d90c17de 100644
--- a/sql/core/benchmarks/EncodeBenchmark-results.txt
+++ b/sql/core/benchmarks/EncodeBenchmark-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 encode:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UTF-32                                            29962          30019          81          0.3        2996.2       1.0X
-UTF-16                                            47699          47702           3          0.2        4769.9       0.6X
-UTF-8                                              3112           3154          59          3.2         311.2       9.6X
+UTF-32                                            33442          33457          21          0.3        3344.2       1.0X
+UTF-16                                            50707          50731          35          0.2        5070.7       0.7X
+UTF-8                                             30829          30847          25          0.3        3082.9       1.1X
 
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk21-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk21-results.txt
index 08f3d54f5ae81..3bc77b17102fe 100644
--- a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk21-results.txt
@@ -2,44 +2,44 @@
 WITHOUT SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        2456           2456           0         41.7          24.0       1.0X
-ExternalAppendOnlyUnsafeRowArray                   3572           3595          33         28.7          34.9       0.7X
+ArrayBuffer                                        2569           2579          14         39.9          25.1       1.0X
+ExternalAppendOnlyUnsafeRowArray                   3494           3513          27         29.3          34.1       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        5511           5519          11         47.6          21.0       1.0X
-ExternalAppendOnlyUnsafeRowArray                  12331          12382          73         21.3          47.0       0.4X
+ArrayBuffer                                        5447           5500          75         48.1          20.8       1.0X
+ExternalAppendOnlyUnsafeRowArray                  11886          11907          29         22.1          45.3       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                       10731          10759          39         45.8          21.8       1.0X
-ExternalAppendOnlyUnsafeRowArray                  18516          18568          72         26.5          37.7       0.6X
+ArrayBuffer                                       10664          10664           1         46.1          21.7       1.0X
+ExternalAppendOnlyUnsafeRowArray                  17290          17397         151         28.4          35.2       0.6X
 
 
 ================================================================================================
 WITH SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                               8284           8328          63         31.6          31.6       1.0X
-ExternalAppendOnlyUnsafeRowArray                   6615           6624          14         39.6          25.2       1.3X
+UnsafeExternalSorter                               8436           8440           6         31.1          32.2       1.0X
+ExternalAppendOnlyUnsafeRowArray                   6686           6713          39         39.2          25.5       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                                  5              5           0         32.8          30.5       1.0X
-ExternalAppendOnlyUnsafeRowArray                      4              4           0         38.5          26.0       1.2X
+UnsafeExternalSorter                                  5              5           0         33.4          29.9       1.0X
+ExternalAppendOnlyUnsafeRowArray                      4              4           0         39.5          25.3       1.2X
 
 
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
index ca447f9e97dbc..cd6241caf25b0 100644
--- a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
@@ -2,44 +2,44 @@
 WITHOUT SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        2496           2499           4         41.0          24.4       1.0X
-ExternalAppendOnlyUnsafeRowArray                   3495           3513          24         29.3          34.1       0.7X
+ArrayBuffer                                        2453           2458           8         41.7          24.0       1.0X
+ExternalAppendOnlyUnsafeRowArray                   3401           3413          18         30.1          33.2       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        5277           5284          10         49.7          20.1       1.0X
-ExternalAppendOnlyUnsafeRowArray                  12169          12171           3         21.5          46.4       0.4X
+ArrayBuffer                                        5330           5332           3         49.2          20.3       1.0X
+ExternalAppendOnlyUnsafeRowArray                  12411          12462          72         21.1          47.3       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                       10107          10110           4         48.6          20.6       1.0X
-ExternalAppendOnlyUnsafeRowArray                  17021          17035          20         28.9          34.6       0.6X
+ArrayBuffer                                       10236          10250          20         48.0          20.8       1.0X
+ExternalAppendOnlyUnsafeRowArray                  16811          16821          15         29.2          34.2       0.6X
 
 
 ================================================================================================
 WITH SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                               8435           8499          89         31.1          32.2       1.0X
-ExternalAppendOnlyUnsafeRowArray                   7126           7131           6         36.8          27.2       1.2X
+UnsafeExternalSorter                               8715           8747          45         30.1          33.2       1.0X
+ExternalAppendOnlyUnsafeRowArray                   6495           6507          16         40.4          24.8       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                                  5              5           0         34.5          29.0       1.0X
-ExternalAppendOnlyUnsafeRowArray                      4              4           0         36.6          27.3       1.1X
+UnsafeExternalSorter                                  5              5           0         33.5          29.9       1.0X
+ExternalAppendOnlyUnsafeRowArray                      4              4           0         40.5          24.7       1.2X
 
 
diff --git a/sql/core/benchmarks/ExtractBenchmark-jdk21-results.txt b/sql/core/benchmarks/ExtractBenchmark-jdk21-results.txt
index 78df1f6557073..9420529bb5166 100644
--- a/sql/core/benchmarks/ExtractBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-jdk21-results.txt
@@ -1,104 +1,104 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   278            295          28         35.9          27.8       1.0X
-YEAR of timestamp                                   604            616          15         16.6          60.4       0.5X
-YEAROFWEEK of timestamp                             648            659          10         15.4          64.8       0.4X
-QUARTER of timestamp                                653            672          30         15.3          65.3       0.4X
-MONTH of timestamp                                  572            581          11         17.5          57.2       0.5X
-WEEK of timestamp                                   865            868           3         11.6          86.5       0.3X
-DAY of timestamp                                    576            583           9         17.4          57.6       0.5X
-DAYOFWEEK of timestamp                              755            759           7         13.3          75.5       0.4X
-DOW of timestamp                                    751            775          39         13.3          75.1       0.4X
-DOW_ISO of timestamp                                709            716           6         14.1          70.9       0.4X
-DAYOFWEEK_ISO of timestamp                          708            709           1         14.1          70.8       0.4X
-DOY of timestamp                                    603            614          18         16.6          60.3       0.5X
-HOUR of timestamp                                   475            479           3         21.1          47.5       0.6X
-MINUTE of timestamp                                 479            479           1         20.9          47.9       0.6X
-SECOND of timestamp                                 533            536           3         18.7          53.3       0.5X
+cast to timestamp                                   260            281          28         38.5          26.0       1.0X
+YEAR of timestamp                                   660            684          27         15.1          66.0       0.4X
+YEAROFWEEK of timestamp                             621            623           2         16.1          62.1       0.4X
+QUARTER of timestamp                                635            637           2         15.8          63.5       0.4X
+MONTH of timestamp                                  553            555           2         18.1          55.3       0.5X
+WEEK of timestamp                                   847            882          41         11.8          84.7       0.3X
+DAY of timestamp                                    561            562           1         17.8          56.1       0.5X
+DAYOFWEEK of timestamp                              739            743           3         13.5          73.9       0.4X
+DOW of timestamp                                    744            744           1         13.4          74.4       0.3X
+DOW_ISO of timestamp                                670            676           9         14.9          67.0       0.4X
+DAYOFWEEK_ISO of timestamp                          668            670           2         15.0          66.8       0.4X
+DOY of timestamp                                    596            597           1         16.8          59.6       0.4X
+HOUR of timestamp                                   465            468           3         21.5          46.5       0.6X
+MINUTE of timestamp                                 464            467           2         21.5          46.4       0.6X
+SECOND of timestamp                                 531            537           6         18.8          53.1       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   251            251           1         39.9          25.1       1.0X
-YEAR of timestamp                                   537            539           2         18.6          53.7       0.5X
-YEAROFWEEK of timestamp                             624            633           8         16.0          62.4       0.4X
-QUARTER of timestamp                                634            635           1         15.8          63.4       0.4X
-MONTH of timestamp                                  556            564          10         18.0          55.6       0.5X
-WEEK of timestamp                                   854            859           4         11.7          85.4       0.3X
-DAY of timestamp                                    572            579          11         17.5          57.2       0.4X
-DAYOFWEEK of timestamp                              741            747           5         13.5          74.1       0.3X
-DOW of timestamp                                    741            743           2         13.5          74.1       0.3X
-DOW_ISO of timestamp                                703            704           1         14.2          70.3       0.4X
-DAYOFWEEK_ISO of timestamp                          701            701           1         14.3          70.1       0.4X
-DOY of timestamp                                    592            595           3         16.9          59.2       0.4X
-HOUR of timestamp                                   474            476           2         21.1          47.4       0.5X
-MINUTE of timestamp                                 476            479           5         21.0          47.6       0.5X
-SECOND of timestamp                                 528            530           2         18.9          52.8       0.5X
+cast to timestamp                                   234            244           9         42.8          23.4       1.0X
+YEAR of timestamp                                   532            538           6         18.8          53.2       0.4X
+YEAROFWEEK of timestamp                             602            606           3         16.6          60.2       0.4X
+QUARTER of timestamp                                618            625           7         16.2          61.8       0.4X
+MONTH of timestamp                                  540            549          10         18.5          54.0       0.4X
+WEEK of timestamp                                   835            837           2         12.0          83.5       0.3X
+DAY of timestamp                                    553            558           6         18.1          55.3       0.4X
+DAYOFWEEK of timestamp                              732            735           3         13.7          73.2       0.3X
+DOW of timestamp                                    733            736           3         13.6          73.3       0.3X
+DOW_ISO of timestamp                                664            670           8         15.1          66.4       0.4X
+DAYOFWEEK_ISO of timestamp                          664            668           6         15.1          66.4       0.4X
+DOY of timestamp                                    591            593           1         16.9          59.1       0.4X
+HOUR of timestamp                                   461            468           7         21.7          46.1       0.5X
+MINUTE of timestamp                                 462            464           2         21.6          46.2       0.5X
+SECOND of timestamp                                 530            530           1         18.9          53.0       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        523            526           4         19.1          52.3       1.0X
-YEAR of date                                        533            535           3         18.8          53.3       1.0X
-YEAROFWEEK of date                                  628            639          17         15.9          62.8       0.8X
-QUARTER of date                                     629            631           3         15.9          62.9       0.8X
-MONTH of date                                       566            577          18         17.7          56.6       0.9X
-WEEK of date                                        859            872          21         11.6          85.9       0.6X
-DAY of date                                         572            590          18         17.5          57.2       0.9X
-DAYOFWEEK of date                                   741            746           9         13.5          74.1       0.7X
-DOW of date                                         740            766          45         13.5          74.0       0.7X
-DOW_ISO of date                                     700            707          10         14.3          70.0       0.7X
-DAYOFWEEK_ISO of date                               698            703           7         14.3          69.8       0.7X
-DOY of date                                         592            596           5         16.9          59.2       0.9X
-HOUR of date                                        993           1014          24         10.1          99.3       0.5X
-MINUTE of date                                      995           1003          10         10.0          99.5       0.5X
-SECOND of date                                     1058           1058           0          9.5         105.8       0.5X
+cast to date                                        511            514           2         19.6          51.1       1.0X
+YEAR of date                                        526            529           3         19.0          52.6       1.0X
+YEAROFWEEK of date                                  601            607           8         16.6          60.1       0.9X
+QUARTER of date                                     617            627           9         16.2          61.7       0.8X
+MONTH of date                                       537            538           1         18.6          53.7       1.0X
+WEEK of date                                        836            847          14         12.0          83.6       0.6X
+DAY of date                                         551            557           9         18.2          55.1       0.9X
+DAYOFWEEK of date                                   734            742           7         13.6          73.4       0.7X
+DOW of date                                         731            734           4         13.7          73.1       0.7X
+DOW_ISO of date                                     664            667           5         15.1          66.4       0.8X
+DAYOFWEEK_ISO of date                               661            666           4         15.1          66.1       0.8X
+DOY of date                                         588            593           8         17.0          58.8       0.9X
+HOUR of date                                        985            986           1         10.2          98.5       0.5X
+MINUTE of date                                      980            991          14         10.2          98.0       0.5X
+SECOND of date                                     1035           1043          13          9.7         103.5       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        525            574          83         19.1          52.5       1.0X
-YEAR of date                                        539            540           1         18.6          53.9       1.0X
-YEAROFWEEK of date                                  628            631           5         15.9          62.8       0.8X
-QUARTER of date                                     629            640          15         15.9          62.9       0.8X
-MONTH of date                                       553            555           2         18.1          55.3       0.9X
-WEEK of date                                        850            852           1         11.8          85.0       0.6X
-DAY of date                                         568            574          10         17.6          56.8       0.9X
-DAYOFWEEK of date                                   740            741           1         13.5          74.0       0.7X
-DOW of date                                         739            746           6         13.5          73.9       0.7X
-DOW_ISO of date                                     699            703           4         14.3          69.9       0.8X
-DAYOFWEEK_ISO of date                               699            700           1         14.3          69.9       0.8X
-DOY of date                                         590            592           3         17.0          59.0       0.9X
-HOUR of date                                        991            992           0         10.1          99.1       0.5X
-MINUTE of date                                      989            990           1         10.1          98.9       0.5X
-SECOND of date                                     1058           1062           5          9.4         105.8       0.5X
+cast to date                                        512            515           3         19.5          51.2       1.0X
+YEAR of date                                        526            534           8         19.0          52.6       1.0X
+YEAROFWEEK of date                                  600            602           2         16.7          60.0       0.9X
+QUARTER of date                                     616            623          11         16.2          61.6       0.8X
+MONTH of date                                       538            543           9         18.6          53.8       1.0X
+WEEK of date                                        837            838           1         12.0          83.7       0.6X
+DAY of date                                         550            553           3         18.2          55.0       0.9X
+DAYOFWEEK of date                                   734            739           5         13.6          73.4       0.7X
+DOW of date                                         733            759          43         13.7          73.3       0.7X
+DOW_ISO of date                                     664            668           3         15.1          66.4       0.8X
+DAYOFWEEK_ISO of date                               665            666           0         15.0          66.5       0.8X
+DOY of date                                         593            594           1         16.9          59.3       0.9X
+HOUR of date                                        983            986           3         10.2          98.3       0.5X
+MINUTE of date                                      979            981           3         10.2          97.9       0.5X
+SECOND of date                                     1038           1039           1          9.6         103.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for interval:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                    738            741           2         13.5          73.8       1.0X
-YEAR of interval                                    718            721           4         13.9          71.8       1.0X
-MONTH of interval                                   721            725           3         13.9          72.1       1.0X
-DAY of interval                                     718            722           4         13.9          71.8       1.0X
-HOUR of interval                                    730            733           4         13.7          73.0       1.0X
-MINUTE of interval                                  724            728           3         13.8          72.4       1.0X
-SECOND of interval                                  775            785          13         12.9          77.5       1.0X
+cast to interval                                    723            728           5         13.8          72.3       1.0X
+YEAR of interval                                    717            718           2         13.9          71.7       1.0X
+MONTH of interval                                   720            722           2         13.9          72.0       1.0X
+DAY of interval                                     716            719           2         14.0          71.6       1.0X
+HOUR of interval                                    729            731           2         13.7          72.9       1.0X
+MINUTE of interval                                  725            726           1         13.8          72.5       1.0X
+SECOND of interval                                  769            771           2         13.0          76.9       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for interval:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                    742            745           4         13.5          74.2       1.0X
-YEAR of interval                                    724            725           0         13.8          72.4       1.0X
-MONTH of interval                                   722            724           2         13.9          72.2       1.0X
-DAY of interval                                     728            730           1         13.7          72.8       1.0X
-HOUR of interval                                    731            739           8         13.7          73.1       1.0X
-MINUTE of interval                                  733            740          11         13.6          73.3       1.0X
-SECOND of interval                                  785            800          16         12.7          78.5       0.9X
+cast to interval                                    728            729           1         13.7          72.8       1.0X
+YEAR of interval                                    722            722           1         13.9          72.2       1.0X
+MONTH of interval                                   718            723           5         13.9          71.8       1.0X
+DAY of interval                                     713            718           7         14.0          71.3       1.0X
+HOUR of interval                                    726            727           2         13.8          72.6       1.0X
+MINUTE of interval                                  734            736           3         13.6          73.4       1.0X
+SECOND of interval                                  770            771           2         13.0          77.0       0.9X
 
diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt
index a60f24142bc60..b472b3fea998b 100644
--- a/sql/core/benchmarks/ExtractBenchmark-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-results.txt
@@ -1,104 +1,104 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   286            311          23         34.9          28.6       1.0X
-YEAR of timestamp                                   792            798           8         12.6          79.2       0.4X
-YEAROFWEEK of timestamp                             879            901          19         11.4          87.9       0.3X
-QUARTER of timestamp                                842            849           9         11.9          84.2       0.3X
-MONTH of timestamp                                  799            804           5         12.5          79.9       0.4X
-WEEK of timestamp                                  1104           1107           3          9.1         110.4       0.3X
-DAY of timestamp                                    780            788           9         12.8          78.0       0.4X
-DAYOFWEEK of timestamp                              967            973           5         10.3          96.7       0.3X
-DOW of timestamp                                    965            970           5         10.4          96.5       0.3X
-DOW_ISO of timestamp                               1022           1024           4          9.8         102.2       0.3X
-DAYOFWEEK_ISO of timestamp                         1022           1024           4          9.8         102.2       0.3X
-DOY of timestamp                                    844            855          13         11.8          84.4       0.3X
-HOUR of timestamp                                   558            563           5         17.9          55.8       0.5X
-MINUTE of timestamp                                 564            564           0         17.7          56.4       0.5X
-SECOND of timestamp                                 657            658           1         15.2          65.7       0.4X
+cast to timestamp                                   243            273          33         41.2          24.3       1.0X
+YEAR of timestamp                                   780            785           5         12.8          78.0       0.3X
+YEAROFWEEK of timestamp                             849            883          36         11.8          84.9       0.3X
+QUARTER of timestamp                                798            799           0         12.5          79.8       0.3X
+MONTH of timestamp                                  758            762           4         13.2          75.8       0.3X
+WEEK of timestamp                                  1113           1118           6          9.0         111.3       0.2X
+DAY of timestamp                                    752            757           5         13.3          75.2       0.3X
+DAYOFWEEK of timestamp                              940            945           4         10.6          94.0       0.3X
+DOW of timestamp                                    940            949          14         10.6          94.0       0.3X
+DOW_ISO of timestamp                                997           1004          11         10.0          99.7       0.2X
+DAYOFWEEK_ISO of timestamp                          991            995           4         10.1          99.1       0.2X
+DOY of timestamp                                    811            816           6         12.3          81.1       0.3X
+HOUR of timestamp                                   536            543           7         18.7          53.6       0.5X
+MINUTE of timestamp                                 532            541           7         18.8          53.2       0.5X
+SECOND of timestamp                                 636            648          21         15.7          63.6       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   245            248           4         40.8          24.5       1.0X
-YEAR of timestamp                                   785            788           3         12.7          78.5       0.3X
-YEAROFWEEK of timestamp                             859            859           0         11.6          85.9       0.3X
-QUARTER of timestamp                                818            819           1         12.2          81.8       0.3X
-MONTH of timestamp                                  776            781           4         12.9          77.6       0.3X
-WEEK of timestamp                                  1099           1109          17          9.1         109.9       0.2X
-DAY of timestamp                                    778            780           2         12.9          77.8       0.3X
-DAYOFWEEK of timestamp                              964            966           2         10.4          96.4       0.3X
-DOW of timestamp                                    964            966           3         10.4          96.4       0.3X
-DOW_ISO of timestamp                               1015           1020           5          9.9         101.5       0.2X
-DAYOFWEEK_ISO of timestamp                         1012           1014           3          9.9         101.2       0.2X
-DOY of timestamp                                    847            850           6         11.8          84.7       0.3X
-HOUR of timestamp                                   560            562           4         17.9          56.0       0.4X
-MINUTE of timestamp                                 560            569          11         17.8          56.0       0.4X
-SECOND of timestamp                                 656            660           6         15.2          65.6       0.4X
+cast to timestamp                                   216            223          10         46.3          21.6       1.0X
+YEAR of timestamp                                   767            770           5         13.0          76.7       0.3X
+YEAROFWEEK of timestamp                             830            840          14         12.0          83.0       0.3X
+QUARTER of timestamp                                786            791           4         12.7          78.6       0.3X
+MONTH of timestamp                                  758            761           3         13.2          75.8       0.3X
+WEEK of timestamp                                  1110           1119           8          9.0         111.0       0.2X
+DAY of timestamp                                    759            760           1         13.2          75.9       0.3X
+DAYOFWEEK of timestamp                              939            942           5         10.7          93.9       0.2X
+DOW of timestamp                                    937            938           1         10.7          93.7       0.2X
+DOW_ISO of timestamp                                986            987           1         10.1          98.6       0.2X
+DAYOFWEEK_ISO of timestamp                          985            990           4         10.1          98.5       0.2X
+DOY of timestamp                                    819            824           4         12.2          81.9       0.3X
+HOUR of timestamp                                   531            541          12         18.8          53.1       0.4X
+MINUTE of timestamp                                 528            532           6         19.0          52.8       0.4X
+SECOND of timestamp                                 635            638           5         15.7          63.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        727            729           4         13.8          72.7       1.0X
-YEAR of date                                        777            787          10         12.9          77.7       0.9X
-YEAROFWEEK of date                                  852            858           8         11.7          85.2       0.9X
-QUARTER of date                                     813            815           3         12.3          81.3       0.9X
-MONTH of date                                       772            775           4         12.9          77.2       0.9X
-WEEK of date                                       1091           1093           2          9.2         109.1       0.7X
-DAY of date                                         777            778           1         12.9          77.7       0.9X
-DAYOFWEEK of date                                   963            965           3         10.4          96.3       0.8X
-DOW of date                                         960            963           3         10.4          96.0       0.8X
-DOW_ISO of date                                    1017           1018           1          9.8         101.7       0.7X
-DAYOFWEEK_ISO of date                              1010           1013           2          9.9         101.0       0.7X
-DOY of date                                         840            841           1         11.9          84.0       0.9X
-HOUR of date                                       1288           1295           8          7.8         128.8       0.6X
-MINUTE of date                                     1299           1313          20          7.7         129.9       0.6X
-SECOND of date                                     1383           1393          10          7.2         138.3       0.5X
+cast to date                                        701            710          12         14.3          70.1       1.0X
+YEAR of date                                        766            770           4         13.1          76.6       0.9X
+YEAROFWEEK of date                                  824            828           6         12.1          82.4       0.9X
+QUARTER of date                                     787            790           3         12.7          78.7       0.9X
+MONTH of date                                       756            756           1         13.2          75.6       0.9X
+WEEK of date                                       1112           1113           1          9.0         111.2       0.6X
+DAY of date                                         756            758           3         13.2          75.6       0.9X
+DAYOFWEEK of date                                   940            941           1         10.6          94.0       0.7X
+DOW of date                                         942            944           2         10.6          94.2       0.7X
+DOW_ISO of date                                     986           1001          21         10.1          98.6       0.7X
+DAYOFWEEK_ISO of date                               984            991           7         10.2          98.4       0.7X
+DOY of date                                         819            827           7         12.2          81.9       0.9X
+HOUR of date                                       1278           1290          10          7.8         127.8       0.5X
+MINUTE of date                                     1290           1293           2          7.8         129.0       0.5X
+SECOND of date                                     1374           1376           3          7.3         137.4       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        731            735           3         13.7          73.1       1.0X
-YEAR of date                                        779            784           7         12.8          77.9       0.9X
-YEAROFWEEK of date                                  851            856           6         11.8          85.1       0.9X
-QUARTER of date                                     816            817           1         12.3          81.6       0.9X
-MONTH of date                                       771            774           4         13.0          77.1       0.9X
-WEEK of date                                       1095           1097           4          9.1         109.5       0.7X
-DAY of date                                         774            777           3         12.9          77.4       0.9X
-DAYOFWEEK of date                                   960            961           2         10.4          96.0       0.8X
-DOW of date                                         959            962           4         10.4          95.9       0.8X
-DOW_ISO of date                                    1009           1011           2          9.9         100.9       0.7X
-DAYOFWEEK_ISO of date                              1009           1011           2          9.9         100.9       0.7X
-DOY of date                                         843            844           1         11.9          84.3       0.9X
-HOUR of date                                       1289           1290           1          7.8         128.9       0.6X
-MINUTE of date                                     1285           1289           5          7.8         128.5       0.6X
-SECOND of date                                     1390           1395           5          7.2         139.0       0.5X
+cast to date                                        711            722          10         14.1          71.1       1.0X
+YEAR of date                                        758            760           3         13.2          75.8       0.9X
+YEAROFWEEK of date                                  826            830           6         12.1          82.6       0.9X
+QUARTER of date                                     783            785           3         12.8          78.3       0.9X
+MONTH of date                                       755            756           1         13.2          75.5       0.9X
+WEEK of date                                       1102           1115          11          9.1         110.2       0.6X
+DAY of date                                         749            753           3         13.3          74.9       0.9X
+DAYOFWEEK of date                                   940            941           1         10.6          94.0       0.8X
+DOW of date                                         934            936           3         10.7          93.4       0.8X
+DOW_ISO of date                                     988            988           0         10.1          98.8       0.7X
+DAYOFWEEK_ISO of date                               988            998          18         10.1          98.8       0.7X
+DOY of date                                         812            817           7         12.3          81.2       0.9X
+HOUR of date                                       1274           1281           6          7.8         127.4       0.6X
+MINUTE of date                                     1282           1287           6          7.8         128.2       0.6X
+SECOND of date                                     1382           1384           2          7.2         138.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for interval:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                   1127           1130           2          8.9         112.7       1.0X
-YEAR of interval                                   1119           1123           5          8.9         111.9       1.0X
-MONTH of interval                                  1117           1118           2          9.0         111.7       1.0X
-DAY of interval                                    1124           1126           2          8.9         112.4       1.0X
-HOUR of interval                                   1119           1120           2          8.9         111.9       1.0X
-MINUTE of interval                                 1119           1122           3          8.9         111.9       1.0X
-SECOND of interval                                 1216           1224          10          8.2         121.6       0.9X
+cast to interval                                   1093           1095           2          9.2         109.3       1.0X
+YEAR of interval                                   1085           1086           2          9.2         108.5       1.0X
+MONTH of interval                                  1075           1075           0          9.3         107.5       1.0X
+DAY of interval                                    1071           1076           5          9.3         107.1       1.0X
+HOUR of interval                                   1075           1082           7          9.3         107.5       1.0X
+MINUTE of interval                                 1113           1122          12          9.0         111.3       1.0X
+SECOND of interval                                 1179           1181           3          8.5         117.9       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for interval:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                   1130           1131           2          8.8         113.0       1.0X
-YEAR of interval                                   1113           1116           2          9.0         111.3       1.0X
-MONTH of interval                                  1122           1122           1          8.9         112.2       1.0X
-DAY of interval                                    1122           1124           4          8.9         112.2       1.0X
-HOUR of interval                                   1119           1121           2          8.9         111.9       1.0X
-MINUTE of interval                                 1118           1125           9          8.9         111.8       1.0X
-SECOND of interval                                 1208           1211           3          8.3         120.8       0.9X
+cast to interval                                   1080           1082           3          9.3         108.0       1.0X
+YEAR of interval                                   1077           1080           3          9.3         107.7       1.0X
+MONTH of interval                                  1080           1081           1          9.3         108.0       1.0X
+DAY of interval                                    1069           1070           2          9.4         106.9       1.0X
+HOUR of interval                                   1073           1074           2          9.3         107.3       1.0X
+MINUTE of interval                                 1122           1125           5          8.9         112.2       1.0X
+SECOND of interval                                 1180           1184           4          8.5         118.0       0.9X
 
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-jdk21-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-jdk21-results.txt
index 17ffe9f3fab41..417979cfb62a7 100644
--- a/sql/core/benchmarks/FilterPushdownBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-jdk21-results.txt
@@ -2,733 +2,733 @@
 Pushdown for many distinct value case
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6345           6437          61          2.5         403.4       1.0X
-Parquet Vectorized (Pushdown)                       341            363          12         46.2          21.7      18.6X
-Native ORC Vectorized                              5118           5274         131          3.1         325.4       1.2X
-Native ORC Vectorized (Pushdown)                    318            323           5         49.5          20.2      20.0X
+Parquet Vectorized                                 6457           6500          40          2.4         410.5       1.0X
+Parquet Vectorized (Pushdown)                       362            383          16         43.4          23.0      17.8X
+Native ORC Vectorized                              5171           5288         107          3.0         328.8       1.2X
+Native ORC Vectorized (Pushdown)                    314            323           9         50.1          20.0      20.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                            6333           6355          22          2.5         402.7       1.0X
-Parquet Vectorized (Pushdown)                                  331            347           9         47.5          21.1      19.1X
-Native ORC Vectorized                                         5259           5281          25          3.0         334.4       1.2X
-Native ORC Vectorized (Pushdown)                               310            330          19         50.7          19.7      20.4X
+Parquet Vectorized                                            6405           6424          20          2.5         407.2       1.0X
+Parquet Vectorized (Pushdown)                                  314            326           9         50.0          20.0      20.4X
+Native ORC Vectorized                                         5221           5259          39          3.0         331.9       1.2X
+Native ORC Vectorized (Pushdown)                               299            317          13         52.6          19.0      21.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6378           6405          21          2.5         405.5       1.0X
-Parquet Vectorized (Pushdown)                       315            324          10         50.0          20.0      20.3X
-Native ORC Vectorized                              5359           5364           5          2.9         340.7       1.2X
-Native ORC Vectorized (Pushdown)                    301            308           5         52.2          19.2      21.2X
+Parquet Vectorized                                 6432           6453          22          2.4         408.9       1.0X
+Parquet Vectorized (Pushdown)                       298            310           9         52.8          18.9      21.6X
+Native ORC Vectorized                              5377           5388           8          2.9         341.9       1.2X
+Native ORC Vectorized (Pushdown)                    303            312           7         51.8          19.3      21.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  6371           6394          16          2.5         405.1       1.0X
-Parquet Vectorized (Pushdown)                        310            315           7         50.7          19.7      20.5X
-Native ORC Vectorized                               5354           5384          25          2.9         340.4       1.2X
-Native ORC Vectorized (Pushdown)                     291            299           6         54.1          18.5      21.9X
+Parquet Vectorized                                  6433           6478          25          2.4         409.0       1.0X
+Parquet Vectorized (Pushdown)                        295            302           6         53.4          18.7      21.8X
+Native ORC Vectorized                               5363           5368           5          2.9         341.0       1.2X
+Native ORC Vectorized (Pushdown)                     286            294           7         55.0          18.2      22.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              6427           6456          19          2.4         408.6       1.0X
-Parquet Vectorized (Pushdown)                                    310            312           1         50.7          19.7      20.7X
-Native ORC Vectorized                                           5240           5253          10          3.0         333.2       1.2X
-Native ORC Vectorized (Pushdown)                                 288            301          11         54.7          18.3      22.3X
+Parquet Vectorized                                              6457           6470          11          2.4         410.5       1.0X
+Parquet Vectorized (Pushdown)                                    293            300           6         53.6          18.7      22.0X
+Native ORC Vectorized                                           5356           5366           8          2.9         340.5       1.2X
+Native ORC Vectorized (Pushdown)                                 288            295           5         54.6          18.3      22.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  13717          13822          93          1.1         872.1       1.0X
-Parquet Vectorized (Pushdown)                       13817          13833          19          1.1         878.4       1.0X
-Native ORC Vectorized                               12689          12724          34          1.2         806.7       1.1X
-Native ORC Vectorized (Pushdown)                    12802          12812           9          1.2         813.9       1.1X
+Parquet Vectorized                                  14274          14374         112          1.1         907.5       1.0X
+Parquet Vectorized (Pushdown)                       14553          14581          27          1.1         925.2       1.0X
+Native ORC Vectorized                               13537          13553          20          1.2         860.7       1.1X
+Native ORC Vectorized (Pushdown)                    13620          13650          40          1.2         865.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6152           6231          51          2.6         391.2       1.0X
-Parquet Vectorized (Pushdown)                       306            315           9         51.4          19.5      20.1X
-Native ORC Vectorized                              4694           4761         104          3.4         298.4       1.3X
-Native ORC Vectorized (Pushdown)                    274            282           9         57.4          17.4      22.5X
+Parquet Vectorized                                 6163           6242          78          2.6         391.9       1.0X
+Parquet Vectorized (Pushdown)                       277            290          12         56.8          17.6      22.2X
+Native ORC Vectorized                              4740           4795          56          3.3         301.3       1.3X
+Native ORC Vectorized (Pushdown)                    281            290           6         56.0          17.8      22.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5996           6015          14          2.6         381.2       1.0X
-Parquet Vectorized (Pushdown)                           302            311           7         52.2          19.2      19.9X
-Native ORC Vectorized                                  4684           4691           5          3.4         297.8       1.3X
-Native ORC Vectorized (Pushdown)                        281            290           9         56.0          17.9      21.3X
+Parquet Vectorized                                     6072           6080          11          2.6         386.1       1.0X
+Parquet Vectorized (Pushdown)                           283            301          24         55.5          18.0      21.4X
+Native ORC Vectorized                                  4715           4731          22          3.3         299.8       1.3X
+Native ORC Vectorized (Pushdown)                        281            290          10         56.0          17.9      21.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6036           6049          10          2.6         383.8       1.0X
-Parquet Vectorized (Pushdown)                       296            302           4         53.1          18.8      20.4X
-Native ORC Vectorized                              4725           4753          22          3.3         300.4       1.3X
-Native ORC Vectorized (Pushdown)                    276            286           6         56.9          17.6      21.8X
+Parquet Vectorized                                 6104           6135          28          2.6         388.1       1.0X
+Parquet Vectorized (Pushdown)                       279            288           6         56.3          17.8      21.8X
+Native ORC Vectorized                              4780           4816          31          3.3         303.9       1.3X
+Native ORC Vectorized (Pushdown)                    279            297          13         56.4          17.7      21.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6041           6050           6          2.6         384.1       1.0X
-Parquet Vectorized (Pushdown)                       292            302           7         53.8          18.6      20.7X
-Native ORC Vectorized                              4711           4747          26          3.3         299.5       1.3X
-Native ORC Vectorized (Pushdown)                    271            286           8         58.0          17.2      22.3X
+Parquet Vectorized                                 6122           6149          19          2.6         389.2       1.0X
+Parquet Vectorized (Pushdown)                       283            290           5         55.5          18.0      21.6X
+Native ORC Vectorized                              4788           4807          15          3.3         304.4       1.3X
+Native ORC Vectorized (Pushdown)                    274            285           6         57.4          17.4      22.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       6046           6067          22          2.6         384.4       1.0X
-Parquet Vectorized (Pushdown)                             296            302           4         53.1          18.8      20.4X
-Native ORC Vectorized                                    4767           4804          28          3.3         303.1       1.3X
-Native ORC Vectorized (Pushdown)                          274            286           7         57.4          17.4      22.1X
+Parquet Vectorized                                       6128           6134           6          2.6         389.6       1.0X
+Parquet Vectorized (Pushdown)                             277            282           3         56.8          17.6      22.1X
+Native ORC Vectorized                                    4819           4831           9          3.3         306.4       1.3X
+Native ORC Vectorized (Pushdown)                          296            303           7         53.1          18.8      20.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     6053           6060           6          2.6         384.9       1.0X
-Parquet Vectorized (Pushdown)                           296            298           2         53.2          18.8      20.5X
-Native ORC Vectorized                                  4792           4801           8          3.3         304.7       1.3X
-Native ORC Vectorized (Pushdown)                        273            286           8         57.7          17.3      22.2X
+Parquet Vectorized                                     6143           6158          16          2.6         390.5       1.0X
+Parquet Vectorized (Pushdown)                           281            289           9         55.9          17.9      21.8X
+Native ORC Vectorized                                  4810           4822          12          3.3         305.8       1.3X
+Native ORC Vectorized (Pushdown)                        276            280           4         57.1          17.5      22.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6751           6789          38          2.3         429.2       1.0X
-Parquet Vectorized (Pushdown)                      1591           1607          15          9.9         101.1       4.2X
-Native ORC Vectorized                              5460           5476          19          2.9         347.1       1.2X
-Native ORC Vectorized (Pushdown)                   1457           1469          11         10.8          92.7       4.6X
+Parquet Vectorized                                 6791           6806          13          2.3         431.7       1.0X
+Parquet Vectorized (Pushdown)                      1541           1553           8         10.2          98.0       4.4X
+Native ORC Vectorized                              5445           5461          15          2.9         346.2       1.2X
+Native ORC Vectorized (Pushdown)                   1389           1399          11         11.3          88.3       4.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9467           9480          15          1.7         601.9       1.0X
-Parquet Vectorized (Pushdown)                      6594           6601          10          2.4         419.2       1.4X
-Native ORC Vectorized                              8160           8178          19          1.9         518.8       1.2X
-Native ORC Vectorized (Pushdown)                   5978           5991          14          2.6         380.1       1.6X
+Parquet Vectorized                                 9208           9246          24          1.7         585.4       1.0X
+Parquet Vectorized (Pushdown)                      6355           6366          12          2.5         404.0       1.4X
+Native ORC Vectorized                              7986           8006          22          2.0         507.7       1.2X
+Native ORC Vectorized (Pushdown)                   5817           5836          24          2.7         369.8       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11963          11975          19          1.3         760.6       1.0X
-Parquet Vectorized (Pushdown)                     11449          11464          16          1.4         727.9       1.0X
-Native ORC Vectorized                             10773          10783          10          1.5         684.9       1.1X
-Native ORC Vectorized (Pushdown)                  10394          10409          19          1.5         660.8       1.2X
+Parquet Vectorized                                11608          11632          22          1.4         738.0       1.0X
+Parquet Vectorized (Pushdown)                     11058          11081          15          1.4         703.1       1.0X
+Native ORC Vectorized                             10392          10449          58          1.5         660.7       1.1X
+Native ORC Vectorized (Pushdown)                   9987          10003          13          1.6         635.0       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                12713          12733          15          1.2         808.3       1.0X
-Parquet Vectorized (Pushdown)                     12801          12815          14          1.2         813.9       1.0X
-Native ORC Vectorized                             11367          11387          16          1.4         722.7       1.1X
-Native ORC Vectorized (Pushdown)                  11474          11480          10          1.4         729.5       1.1X
+Parquet Vectorized                                12256          12273          18          1.3         779.2       1.0X
+Parquet Vectorized (Pushdown)                     12325          12363          28          1.3         783.6       1.0X
+Native ORC Vectorized                             10919          10943          29          1.4         694.2       1.1X
+Native ORC Vectorized (Pushdown)                  10980          11026          35          1.4         698.1       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                12741          12750           9          1.2         810.1       1.0X
-Parquet Vectorized (Pushdown)                     12807          12836          31          1.2         814.2       1.0X
-Native ORC Vectorized                             11501          11506           6          1.4         731.2       1.1X
-Native ORC Vectorized (Pushdown)                  11585          11594           8          1.4         736.6       1.1X
+Parquet Vectorized                                12196          12240          38          1.3         775.4       1.0X
+Parquet Vectorized (Pushdown)                     12243          12306          54          1.3         778.4       1.0X
+Native ORC Vectorized                             10848          10869          22          1.4         689.7       1.1X
+Native ORC Vectorized (Pushdown)                  10937          10964          29          1.4         695.4       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                12572          12595          22          1.3         799.3       1.0X
-Parquet Vectorized (Pushdown)                     12635          12654          28          1.2         803.3       1.0X
-Native ORC Vectorized                             11466          11493          19          1.4         729.0       1.1X
-Native ORC Vectorized (Pushdown)                  11548          11558          10          1.4         734.2       1.1X
+Parquet Vectorized                                12402          12415          12          1.3         788.5       1.0X
+Parquet Vectorized (Pushdown)                     12413          12427          14          1.3         789.2       1.0X
+Native ORC Vectorized                             10821          10859          25          1.5         688.0       1.1X
+Native ORC Vectorized (Pushdown)                  10916          10932          13          1.4         694.0       1.1X
 
 
 ================================================================================================
 Pushdown for few distinct value case (use dictionary encoding)
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5530           5572          29          2.8         351.6       1.0X
-Parquet Vectorized (Pushdown)                           243            256          15         64.7          15.5      22.7X
-Native ORC Vectorized                                  6173           6214          31          2.5         392.5       0.9X
-Native ORC Vectorized (Pushdown)                        933            935           4         16.9          59.3       5.9X
+Parquet Vectorized                                     5635           5682          35          2.8         358.3       1.0X
+Parquet Vectorized (Pushdown)                           246            252           6         63.9          15.7      22.9X
+Native ORC Vectorized                                  6232           6241           6          2.5         396.2       0.9X
+Native ORC Vectorized (Pushdown)                        924            934          13         17.0          58.7       6.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                             5521           5537          14          2.8         351.0       1.0X
-Parquet Vectorized (Pushdown)                                   245            257          11         64.2          15.6      22.5X
-Native ORC Vectorized                                          6340           6348           5          2.5         403.1       0.9X
-Native ORC Vectorized (Pushdown)                                931            935           4         16.9          59.2       5.9X
+Parquet Vectorized                                             5646           5669          23          2.8         359.0       1.0X
+Parquet Vectorized (Pushdown)                                   249            259           6         63.3          15.8      22.7X
+Native ORC Vectorized                                          6380           6408          26          2.5         405.7       0.9X
+Native ORC Vectorized (Pushdown)                                900            909           9         17.5          57.2       6.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5476           5490          14          2.9         348.2       1.0X
-Parquet Vectorized (Pushdown)                           288            298          12         54.7          18.3      19.0X
-Native ORC Vectorized                                  6322           6341          11          2.5         401.9       0.9X
-Native ORC Vectorized (Pushdown)                        964            971           7         16.3          61.3       5.7X
+Parquet Vectorized                                     5569           5581          11          2.8         354.0       1.0X
+Parquet Vectorized (Pushdown)                           297            303           6         52.9          18.9      18.7X
+Native ORC Vectorized                                  6378           6387           6          2.5         405.5       0.9X
+Native ORC Vectorized (Pushdown)                        940            959          17         16.7          59.7       5.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5477           5493          10          2.9         348.2       1.0X
-Parquet Vectorized (Pushdown)                             286            302          20         55.0          18.2      19.2X
-Native ORC Vectorized                                    6324           6340          17          2.5         402.1       0.9X
-Native ORC Vectorized (Pushdown)                          966            975          11         16.3          61.4       5.7X
+Parquet Vectorized                                       5560           5569           6          2.8         353.5       1.0X
+Parquet Vectorized (Pushdown)                             305            309           4         51.6          19.4      18.2X
+Native ORC Vectorized                                    6377           6407          31          2.5         405.4       0.9X
+Native ORC Vectorized (Pushdown)                          952            959          13         16.5          60.5       5.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               5550           5561           9          2.8         352.8       1.0X
-Parquet Vectorized (Pushdown)                                     288            296           8         54.6          18.3      19.3X
-Native ORC Vectorized                                            6438           6452          10          2.4         409.3       0.9X
-Native ORC Vectorized (Pushdown)                                  972            977           5         16.2          61.8       5.7X
+Parquet Vectorized                                               5648           5663          17          2.8         359.1       1.0X
+Parquet Vectorized (Pushdown)                                     288            293           4         54.5          18.3      19.6X
+Native ORC Vectorized                                            6430           6456          20          2.4         408.8       0.9X
+Native ORC Vectorized (Pushdown)                                  968            971           3         16.2          61.6       5.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           13575          13632          39          1.2         863.1       1.0X
-Parquet Vectorized (Pushdown)                                13578          13607          21          1.2         863.2       1.0X
-Native ORC Vectorized                                        14550          14590          49          1.1         925.0       0.9X
-Native ORC Vectorized (Pushdown)                             14664          14775          78          1.1         932.3       0.9X
+Parquet Vectorized                                           14383          14409          24          1.1         914.5       1.0X
+Parquet Vectorized (Pushdown)                                14425          14443          18          1.1         917.1       1.0X
+Native ORC Vectorized                                        15288          15300          11          1.0         972.0       0.9X
+Native ORC Vectorized (Pushdown)                             15482          15517          60          1.0         984.3       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for StringStartsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    6543           6644          84          2.4         416.0       1.0X
-Parquet Vectorized (Pushdown)                          891            922          41         17.7          56.6       7.3X
-Native ORC Vectorized                                 5543           5553          11          2.8         352.4       1.2X
-Native ORC Vectorized (Pushdown)                      5605           5619           8          2.8         356.4       1.2X
+Parquet Vectorized                                    6744           6776          24          2.3         428.7       1.0X
+Parquet Vectorized (Pushdown)                          908            916           6         17.3          57.8       7.4X
+Native ORC Vectorized                                 5592           5608          22          2.8         355.5       1.2X
+Native ORC Vectorized (Pushdown)                      5664           5687          23          2.8         360.1       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      6367           6383          16          2.5         404.8       1.0X
-Parquet Vectorized (Pushdown)                            279            286           6         56.3          17.8      22.8X
-Native ORC Vectorized                                   5367           5377           8          2.9         341.2       1.2X
-Native ORC Vectorized (Pushdown)                        5436           5463          21          2.9         345.6       1.2X
+Parquet Vectorized                                      6447           6462          16          2.4         409.9       1.0X
+Parquet Vectorized (Pushdown)                            300            302           2         52.4          19.1      21.5X
+Native ORC Vectorized                                   5416           5426          11          2.9         344.3       1.2X
+Native ORC Vectorized (Pushdown)                        5508           5521          12          2.9         350.2       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        6369           6386          15          2.5         404.9       1.0X
-Parquet Vectorized (Pushdown)                              277            284           6         56.9          17.6      23.0X
-Native ORC Vectorized                                     5341           5370          28          2.9         339.6       1.2X
-Native ORC Vectorized (Pushdown)                          5435           5443          10          2.9         345.5       1.2X
+Parquet Vectorized                                        6447           6462          12          2.4         409.9       1.0X
+Parquet Vectorized (Pushdown)                              281            288           6         56.0          17.8      23.0X
+Native ORC Vectorized                                     5394           5400           7          2.9         342.9       1.2X
+Native ORC Vectorized (Pushdown)                          5467           5502          29          2.9         347.6       1.2X
 
 
 ================================================================================================
 Pushdown benchmark for StringEndsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%10'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  5594           5712          85          2.8         355.6       1.0X
-Parquet Vectorized (Pushdown)                        384            401          14         41.0          24.4      14.6X
-Native ORC Vectorized                               6399           6440          49          2.5         406.8       0.9X
-Native ORC Vectorized (Pushdown)                    6587           6606          15          2.4         418.8       0.8X
+Parquet Vectorized                                  5688           5817         122          2.8         361.7       1.0X
+Parquet Vectorized (Pushdown)                        368            379          16         42.7          23.4      15.4X
+Native ORC Vectorized                               6433           6447          10          2.4         409.0       0.9X
+Native ORC Vectorized (Pushdown)                    6684           6708          21          2.4         424.9       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%1000'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    5518           5545          19          2.9         350.8       1.0X
-Parquet Vectorized (Pushdown)                          294            324          50         53.5          18.7      18.8X
-Native ORC Vectorized                                 6314           6348          27          2.5         401.5       0.9X
-Native ORC Vectorized (Pushdown)                      6509           6530          20          2.4         413.8       0.8X
+Parquet Vectorized                                    5563           5576          14          2.8         353.7       1.0X
+Parquet Vectorized (Pushdown)                          266            272           4         59.1          16.9      20.9X
+Native ORC Vectorized                                 6386           6425          30          2.5         406.0       0.9X
+Native ORC Vectorized (Pushdown)                      6639           6689          48          2.4         422.1       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%786432'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      5527           5535           7          2.8         351.4       1.0X
-Parquet Vectorized (Pushdown)                            284            296           9         55.3          18.1      19.4X
-Native ORC Vectorized                                   6290           6301          10          2.5         399.9       0.9X
-Native ORC Vectorized (Pushdown)                        6552           6565          13          2.4         416.6       0.8X
+Parquet Vectorized                                      5574           5578           4          2.8         354.4       1.0X
+Parquet Vectorized (Pushdown)                            272            278           4         57.8          17.3      20.5X
+Native ORC Vectorized                                   6333           6412          60          2.5         402.6       0.9X
+Native ORC Vectorized (Pushdown)                        6604           6667          88          2.4         419.9       0.8X
 
 
 ================================================================================================
 Pushdown benchmark for StringContains
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   5855           6077         170          2.7         372.3       1.0X
-Parquet Vectorized (Pushdown)                         922            952          40         17.1          58.6       6.4X
-Native ORC Vectorized                                6452           6541          82          2.4         410.2       0.9X
-Native ORC Vectorized (Pushdown)                     6639           6651          10          2.4         422.1       0.9X
+Parquet Vectorized                                   5827           5939          73          2.7         370.5       1.0X
+Parquet Vectorized (Pushdown)                         810            829          17         19.4          51.5       7.2X
+Native ORC Vectorized                                6466           6550          65          2.4         411.1       0.9X
+Native ORC Vectorized (Pushdown)                     6691           6714          21          2.4         425.4       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5542           5550           8          2.8         352.3       1.0X
-Parquet Vectorized (Pushdown)                           296            310           8         53.2          18.8      18.7X
-Native ORC Vectorized                                  6214           6226          11          2.5         395.1       0.9X
-Native ORC Vectorized (Pushdown)                       6419           6431          20          2.5         408.1       0.9X
+Parquet Vectorized                                     5546           5555          10          2.8         352.6       1.0X
+Parquet Vectorized (Pushdown)                           268            276           4         58.6          17.1      20.7X
+Native ORC Vectorized                                  6251           6258           7          2.5         397.4       0.9X
+Native ORC Vectorized (Pushdown)                       6454           6471          13          2.4         410.3       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5537           5552          17          2.8         352.0       1.0X
-Parquet Vectorized (Pushdown)                             297            308          10         53.0          18.9      18.7X
-Native ORC Vectorized                                    6232           6246          15          2.5         396.2       0.9X
-Native ORC Vectorized (Pushdown)                         6407           6419          14          2.5         407.4       0.9X
+Parquet Vectorized                                       5548           5561          10          2.8         352.7       1.0X
+Parquet Vectorized (Pushdown)                             268            275           7         58.6          17.1      20.7X
+Native ORC Vectorized                                    6259           6269           8          2.5         397.9       0.9X
+Native ORC Vectorized (Pushdown)                         6466           6487          20          2.4         411.1       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for decimal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     2442           2459          21          6.4         155.3       1.0X
-Parquet Vectorized (Pushdown)                            77             90          14        203.1           4.9      31.5X
-Native ORC Vectorized                                  3128           3145          24          5.0         198.9       0.8X
-Native ORC Vectorized (Pushdown)                         57             72           9        273.6           3.7      42.5X
+Parquet Vectorized                                     2436           2443           4          6.5         154.9       1.0X
+Parquet Vectorized (Pushdown)                            71             74           4        222.9           4.5      34.5X
+Native ORC Vectorized                                  3333           3346          14          4.7         211.9       0.7X
+Native ORC Vectorized (Pushdown)                         58             61           4        271.6           3.7      42.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        3756           3808          31          4.2         238.8       1.0X
-Parquet Vectorized (Pushdown)                             1912           1937          42          8.2         121.6       2.0X
-Native ORC Vectorized                                     4593           4618          26          3.4         292.0       0.8X
-Native ORC Vectorized (Pushdown)                          2069           2105          29          7.6         131.5       1.8X
+Parquet Vectorized                                        3845           3881          42          4.1         244.4       1.0X
+Parquet Vectorized (Pushdown)                             1961           1989          29          8.0         124.7       2.0X
+Native ORC Vectorized                                     4660           4678          17          3.4         296.3       0.8X
+Native ORC Vectorized (Pushdown)                          2076           2087          11          7.6         132.0       1.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        8252           8270          22          1.9         524.6       1.0X
-Parquet Vectorized (Pushdown)                             7939           7987          28          2.0         504.7       1.0X
-Native ORC Vectorized                                     9304           9335          42          1.7         591.5       0.9X
-Native ORC Vectorized (Pushdown)                          8912           8946          32          1.8         566.6       0.9X
+Parquet Vectorized                                        8473           8500          17          1.9         538.7       1.0X
+Parquet Vectorized (Pushdown)                             8212           8248          23          1.9         522.1       1.0X
+Native ORC Vectorized                                     9900           9917          15          1.6         629.4       0.9X
+Native ORC Vectorized (Pushdown)                          9487           9498           9          1.7         603.2       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         9337           9380          67          1.7         593.6       1.0X
-Parquet Vectorized (Pushdown)                              9347           9376          22          1.7         594.3       1.0X
-Native ORC Vectorized                                     10538          10565          29          1.5         670.0       0.9X
-Native ORC Vectorized (Pushdown)                          10533          10559          28          1.5         669.7       0.9X
+Parquet Vectorized                                         9464           9502          33          1.7         601.7       1.0X
+Parquet Vectorized (Pushdown)                              9462           9502          39          1.7         601.6       1.0X
+Native ORC Vectorized                                     10726          10775          35          1.5         681.9       0.9X
+Native ORC Vectorized (Pushdown)                          10755          10784          24          1.5         683.8       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      2598           2612          15          6.1         165.2       1.0X
-Parquet Vectorized (Pushdown)                             72             83          13        217.1           4.6      35.9X
-Native ORC Vectorized                                   3113           3124          14          5.1         197.9       0.8X
-Native ORC Vectorized (Pushdown)                          55             64          12        285.4           3.5      47.1X
+Parquet Vectorized                                      2638           2651          15          6.0         167.7       1.0X
+Parquet Vectorized (Pushdown)                             71             85          18        220.9           4.5      37.0X
+Native ORC Vectorized                                   3330           3344          14          4.7         211.7       0.8X
+Native ORC Vectorized (Pushdown)                          55             60           5        285.6           3.5      47.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         3358           3379          34          4.7         213.5       1.0X
-Parquet Vectorized (Pushdown)                              1080           1111          28         14.6          68.7       3.1X
-Native ORC Vectorized                                      3874           3884          13          4.1         246.3       0.9X
-Native ORC Vectorized (Pushdown)                           1111           1137          34         14.2          70.6       3.0X
+Parquet Vectorized                                         3399           3440          49          4.6         216.1       1.0X
+Parquet Vectorized (Pushdown)                              1064           1076          10         14.8          67.7       3.2X
+Native ORC Vectorized                                      4064           4078          12          3.9         258.4       0.8X
+Native ORC Vectorized (Pushdown)                           1103           1109           6         14.3          70.2       3.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         6310           6332          17          2.5         401.2       1.0X
-Parquet Vectorized (Pushdown)                              5049           5073          21          3.1         321.0       1.2X
-Native ORC Vectorized                                      6975           6984           7          2.3         443.5       0.9X
-Native ORC Vectorized (Pushdown)                           5396           5411          14          2.9         343.1       1.2X
+Parquet Vectorized                                         6242           6260          21          2.5         396.9       1.0X
+Parquet Vectorized (Pushdown)                              4988           5018          32          3.2         317.2       1.3X
+Native ORC Vectorized                                      6949           6963          14          2.3         441.8       0.9X
+Native ORC Vectorized (Pushdown)                           5318           5332          10          3.0         338.1       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                          9115           9121           4          1.7         579.5       1.0X
-Parquet Vectorized (Pushdown)                               8907           8924          18          1.8         566.3       1.0X
-Native ORC Vectorized                                       9981           9994           9          1.6         634.6       0.9X
-Native ORC Vectorized (Pushdown)                            9656           9675          13          1.6         613.9       0.9X
+Parquet Vectorized                                          9079           9090          14          1.7         577.2       1.0X
+Parquet Vectorized (Pushdown)                               8825           8842          19          1.8         561.1       1.0X
+Native ORC Vectorized                                       9902           9928          26          1.6         629.5       0.9X
+Native ORC Vectorized (Pushdown)                            9611           9616           4          1.6         611.0       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      3755           3781          23          4.2         238.7       1.0X
-Parquet Vectorized (Pushdown)                             78             81           2        201.4           5.0      48.1X
-Native ORC Vectorized                                   3131           3155          36          5.0         199.0       1.2X
-Native ORC Vectorized (Pushdown)                          54             56           4        292.6           3.4      69.8X
+Parquet Vectorized                                      3823           3841          12          4.1         243.1       1.0X
+Parquet Vectorized (Pushdown)                             80             83           4        196.7           5.1      47.8X
+Native ORC Vectorized                                   3330           3350          18          4.7         211.7       1.1X
+Native ORC Vectorized (Pushdown)                          55             60           5        287.2           3.5      69.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         4741           4755          18          3.3         301.4       1.0X
-Parquet Vectorized (Pushdown)                              1415           1417           2         11.1          90.0       3.3X
-Native ORC Vectorized                                      4049           4065          20          3.9         257.4       1.2X
-Native ORC Vectorized (Pushdown)                           1220           1231          17         12.9          77.6       3.9X
+Parquet Vectorized                                         4750           4807          89          3.3         302.0       1.0X
+Parquet Vectorized (Pushdown)                              1400           1407           7         11.2          89.0       3.4X
+Native ORC Vectorized                                      4157           4168          12          3.8         264.3       1.1X
+Native ORC Vectorized (Pushdown)                           1211           1215           3         13.0          77.0       3.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         8556           8564           9          1.8         543.9       1.0X
-Parquet Vectorized (Pushdown)                              6743           6755           7          2.3         428.7       1.3X
-Native ORC Vectorized                                      7513           7524           9          2.1         477.7       1.1X
-Native ORC Vectorized (Pushdown)                           5906           5914           5          2.7         375.5       1.4X
+Parquet Vectorized                                         8636           8662          19          1.8         549.1       1.0X
+Parquet Vectorized (Pushdown)                              6754           6787          25          2.3         429.4       1.3X
+Native ORC Vectorized                                      7526           7536          12          2.1         478.5       1.1X
+Native ORC Vectorized (Pushdown)                           5915           5934          13          2.7         376.0       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         12314          12332          16          1.3         782.9       1.0X
-Parquet Vectorized (Pushdown)                              11976          11983           6          1.3         761.4       1.0X
-Native ORC Vectorized                                      10898          10916          14          1.4         692.9       1.1X
-Native ORC Vectorized (Pushdown)                           10605          10636          30          1.5         674.3       1.2X
+Parquet Vectorized                                         12415          12446          27          1.3         789.3       1.0X
+Parquet Vectorized (Pushdown)                              12049          12076          24          1.3         766.1       1.0X
+Native ORC Vectorized                                      10912          10980          93          1.4         693.7       1.1X
+Native ORC Vectorized (Pushdown)                           10559          10608          43          1.5         671.4       1.2X
 
 
 ================================================================================================
 Pushdown benchmark for InSet -> InFilters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6282           6318          39          2.5         399.4       1.0X
-Parquet Vectorized (Pushdown)                                     329            370          54         47.8          20.9      19.1X
-Native ORC Vectorized                                            4793           4843          35          3.3         304.7       1.3X
-Native ORC Vectorized (Pushdown)                                  307            321          15         51.2          19.5      20.4X
+Parquet Vectorized                                               6312           6343          25          2.5         401.3       1.0X
+Parquet Vectorized (Pushdown)                                     312            328          11         50.4          19.8      20.2X
+Native ORC Vectorized                                            4774           4861         102          3.3         303.5       1.3X
+Native ORC Vectorized (Pushdown)                                  286            300          15         55.0          18.2      22.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6120           6135          12          2.6         389.1       1.0X
-Parquet Vectorized (Pushdown)                                     294            304           8         53.5          18.7      20.8X
-Native ORC Vectorized                                            4787           4815          38          3.3         304.4       1.3X
-Native ORC Vectorized (Pushdown)                                  285            301          12         55.2          18.1      21.5X
+Parquet Vectorized                                               6158           6190          20          2.6         391.5       1.0X
+Parquet Vectorized (Pushdown)                                     292            302          11         53.9          18.5      21.1X
+Native ORC Vectorized                                            4712           4748          28          3.3         299.6       1.3X
+Native ORC Vectorized (Pushdown)                                  285            299          13         55.2          18.1      21.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6136           6151          20          2.6         390.1       1.0X
-Parquet Vectorized (Pushdown)                                     297            304           9         53.0          18.9      20.7X
-Native ORC Vectorized                                            4787           4802          26          3.3         304.3       1.3X
-Native ORC Vectorized (Pushdown)                                  286            296           7         55.0          18.2      21.4X
+Parquet Vectorized                                               6182           6209          20          2.5         393.0       1.0X
+Parquet Vectorized (Pushdown)                                     288            301           9         54.5          18.3      21.4X
+Native ORC Vectorized                                            4730           4794          51          3.3         300.7       1.3X
+Native ORC Vectorized (Pushdown)                                  295            301           5         53.3          18.8      20.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6143           6155          13          2.6         390.6       1.0X
-Parquet Vectorized (Pushdown)                                      304            307           3         51.7          19.3      20.2X
-Native ORC Vectorized                                             4811           4826          16          3.3         305.9       1.3X
-Native ORC Vectorized (Pushdown)                                   294            301           5         53.5          18.7      20.9X
+Parquet Vectorized                                                6239           6254          20          2.5         396.7       1.0X
+Parquet Vectorized (Pushdown)                                      306            311           4         51.4          19.5      20.4X
+Native ORC Vectorized                                             4747           4811          37          3.3         301.8       1.3X
+Native ORC Vectorized (Pushdown)                                   304            307           3         51.8          19.3      20.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6127           6133           6          2.6         389.5       1.0X
-Parquet Vectorized (Pushdown)                                      307            312           6         51.3          19.5      20.0X
-Native ORC Vectorized                                             4818           4845          35          3.3         306.3       1.3X
-Native ORC Vectorized (Pushdown)                                   298            310           9         52.8          18.9      20.6X
+Parquet Vectorized                                                6212           6253          42          2.5         395.0       1.0X
+Parquet Vectorized (Pushdown)                                      306            317           9         51.5          19.4      20.3X
+Native ORC Vectorized                                             4814           4853          40          3.3         306.1       1.3X
+Native ORC Vectorized (Pushdown)                                   306            314           6         51.3          19.5      20.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6122           6141          22          2.6         389.2       1.0X
-Parquet Vectorized (Pushdown)                                      310            317           7         50.8          19.7      19.8X
-Native ORC Vectorized                                             4813           4835          23          3.3         306.0       1.3X
-Native ORC Vectorized (Pushdown)                                   300            307           6         52.4          19.1      20.4X
+Parquet Vectorized                                                6192           6216          23          2.5         393.7       1.0X
+Parquet Vectorized (Pushdown)                                      303            309           4         51.9          19.3      20.4X
+Native ORC Vectorized                                             4752           4807          46          3.3         302.1       1.3X
+Native ORC Vectorized (Pushdown)                                   307            320           9         51.2          19.5      20.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6326           6331           5          2.5         402.2       1.0X
-Parquet Vectorized (Pushdown)                                      893            901           9         17.6          56.7       7.1X
-Native ORC Vectorized                                             5039           5049          12          3.1         320.3       1.3X
-Native ORC Vectorized (Pushdown)                                   399            402           3         39.4          25.4      15.8X
+Parquet Vectorized                                                6384           6413          29          2.5         405.9       1.0X
+Parquet Vectorized (Pushdown)                                      885            890           5         17.8          56.2       7.2X
+Native ORC Vectorized                                             4935           4972          41          3.2         313.8       1.3X
+Native ORC Vectorized (Pushdown)                                   421            425           2         37.3          26.8      15.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6309           6328          20          2.5         401.1       1.0X
-Parquet Vectorized (Pushdown)                                     3291           3308          16          4.8         209.2       1.9X
-Native ORC Vectorized                                             5019           5032          12          3.1         319.1       1.3X
-Native ORC Vectorized (Pushdown)                                   429            433           3         36.6          27.3      14.7X
+Parquet Vectorized                                                6392           6432          25          2.5         406.4       1.0X
+Parquet Vectorized (Pushdown)                                     3230           3247          12          4.9         205.4       2.0X
+Native ORC Vectorized                                             4940           4974          34          3.2         314.1       1.3X
+Native ORC Vectorized (Pushdown)                                   430            434           4         36.6          27.4      14.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6311           6335          22          2.5         401.3       1.0X
-Parquet Vectorized (Pushdown)                                     5508           5519           9          2.9         350.2       1.1X
-Native ORC Vectorized                                             5020           5036          13          3.1         319.2       1.3X
-Native ORC Vectorized (Pushdown)                                   442            444           2         35.6          28.1      14.3X
+Parquet Vectorized                                                6399           6407           5          2.5         406.8       1.0X
+Parquet Vectorized (Pushdown)                                     5280           5305          32          3.0         335.7       1.2X
+Native ORC Vectorized                                             4913           4920           9          3.2         312.4       1.3X
+Native ORC Vectorized (Pushdown)                                   422            428           5         37.3          26.8      15.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6271           6290          14          2.5         398.7       1.0X
-Parquet Vectorized (Pushdown)                                       872            877           4         18.0          55.4       7.2X
-Native ORC Vectorized                                              4971           4981           9          3.2         316.0       1.3X
-Native ORC Vectorized (Pushdown)                                    497            502           4         31.7          31.6      12.6X
+Parquet Vectorized                                                 6387           6415          28          2.5         406.1       1.0X
+Parquet Vectorized (Pushdown)                                       879            884           3         17.9          55.9       7.3X
+Native ORC Vectorized                                              4898           4907           8          3.2         311.4       1.3X
+Native ORC Vectorized (Pushdown)                                    512            514           1         30.7          32.6      12.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6284           6295           8          2.5         399.5       1.0X
-Parquet Vectorized (Pushdown)                                      3320           3340          13          4.7         211.1       1.9X
-Native ORC Vectorized                                              4972           4984           8          3.2         316.1       1.3X
-Native ORC Vectorized (Pushdown)                                    564            567           3         27.9          35.9      11.1X
+Parquet Vectorized                                                 6409           6424          15          2.5         407.5       1.0X
+Parquet Vectorized (Pushdown)                                      3279           3297          30          4.8         208.5       2.0X
+Native ORC Vectorized                                              4900           4920          24          3.2         311.5       1.3X
+Native ORC Vectorized (Pushdown)                                    584            592           7         26.9          37.2      11.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6289           6307          12          2.5         399.8       1.0X
-Parquet Vectorized (Pushdown)                                      5740           5750          11          2.7         365.0       1.1X
-Native ORC Vectorized                                              4972           4982           6          3.2         316.1       1.3X
-Native ORC Vectorized (Pushdown)                                    559            567          11         28.1          35.6      11.2X
+Parquet Vectorized                                                 6420           6445          26          2.4         408.2       1.0X
+Parquet Vectorized (Pushdown)                                      5734           5745          12          2.7         364.6       1.1X
+Native ORC Vectorized                                              4940           5018          59          3.2         314.0       1.3X
+Native ORC Vectorized (Pushdown)                                    575            581           7         27.4          36.5      11.2X
 
 
 ================================================================================================
 Pushdown benchmark for tinyint
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           2748           2766          16          5.7         174.7       1.0X
-Parquet Vectorized (Pushdown)                                 107            114           8        146.5           6.8      25.6X
-Native ORC Vectorized                                        2194           2203           9          7.2         139.5       1.3X
-Native ORC Vectorized (Pushdown)                              112            121           9        140.3           7.1      24.5X
+Parquet Vectorized                                           2841           2865          29          5.5         180.6       1.0X
+Parquet Vectorized (Pushdown)                                 112            122          10        140.6           7.1      25.4X
+Native ORC Vectorized                                        2239           2247           8          7.0         142.4       1.3X
+Native ORC Vectorized (Pushdown)                              115            130          16        136.9           7.3      24.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              3325           3359          48          4.7         211.4       1.0X
-Parquet Vectorized (Pushdown)                                    960            973          13         16.4          61.0       3.5X
-Native ORC Vectorized                                           2691           2705          17          5.8         171.1       1.2X
-Native ORC Vectorized (Pushdown)                                 840            846           3         18.7          53.4       4.0X
+Parquet Vectorized                                              3366           3422          87          4.7         214.0       1.0X
+Parquet Vectorized (Pushdown)                                    987            990           2         15.9          62.7       3.4X
+Native ORC Vectorized                                           2766           2784          10          5.7         175.9       1.2X
+Native ORC Vectorized (Pushdown)                                 876            879           3         18.0          55.7       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              5896           5906           9          2.7         374.9       1.0X
-Parquet Vectorized (Pushdown)                                   4608           4631          20          3.4         293.0       1.3X
-Native ORC Vectorized                                           5059           5084          21          3.1         321.6       1.2X
-Native ORC Vectorized (Pushdown)                                4014           4027           8          3.9         255.2       1.5X
+Parquet Vectorized                                              6015           6025           7          2.6         382.4       1.0X
+Parquet Vectorized (Pushdown)                                   4690           4699           9          3.4         298.2       1.3X
+Native ORC Vectorized                                           5138           5194          38          3.1         326.7       1.2X
+Native ORC Vectorized (Pushdown)                                3988           4002          10          3.9         253.5       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               8646           8666          24          1.8         549.7       1.0X
-Parquet Vectorized (Pushdown)                                    8379           8396          11          1.9         532.7       1.0X
-Native ORC Vectorized                                            7526           7539          14          2.1         478.5       1.1X
-Native ORC Vectorized (Pushdown)                                 7319           7342          16          2.1         465.4       1.2X
+Parquet Vectorized                                               8535           8557          21          1.8         542.7       1.0X
+Parquet Vectorized (Pushdown)                                    8308           8326          14          1.9         528.2       1.0X
+Native ORC Vectorized                                            7581           7600          24          2.1         482.0       1.1X
+Native ORC Vectorized (Pushdown)                                 7379           7399          22          2.1         469.1       1.2X
 
 
 ================================================================================================
 Pushdown benchmark for Timestamp
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                    3102           3122          23          5.1         197.2       1.0X
-Parquet Vectorized (Pushdown)                                                         3096           3104           5          5.1         196.8       1.0X
-Native ORC Vectorized                                                                 1983           1994          15          7.9         126.1       1.6X
-Native ORC Vectorized (Pushdown)                                                        39             44           5        404.7           2.5      79.8X
+Parquet Vectorized                                                                    3155           3166           9          5.0         200.6       1.0X
+Parquet Vectorized (Pushdown)                                                         3169           3174           5          5.0         201.5       1.0X
+Native ORC Vectorized                                                                 2102           2116          17          7.5         133.7       1.5X
+Native ORC Vectorized (Pushdown)                                                        39             44           6        399.9           2.5      80.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as INT96 rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       3791           3820          27          4.1         241.0       1.0X
-Parquet Vectorized (Pushdown)                                                            3755           3774          12          4.2         238.8       1.0X
-Native ORC Vectorized                                                                    2618           2635          18          6.0         166.5       1.4X
-Native ORC Vectorized (Pushdown)                                                          860            865           6         18.3          54.7       4.4X
+Parquet Vectorized                                                                       3827           3848          30          4.1         243.3       1.0X
+Parquet Vectorized (Pushdown)                                                            3803           3831          37          4.1         241.8       1.0X
+Native ORC Vectorized                                                                    2738           2757          20          5.7         174.1       1.4X
+Native ORC Vectorized (Pushdown)                                                          879            887           9         17.9          55.9       4.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as INT96 rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       6492           6500           8          2.4         412.8       1.0X
-Parquet Vectorized (Pushdown)                                                            6485           6497          12          2.4         412.3       1.0X
-Native ORC Vectorized                                                                    5272           5286          21          3.0         335.2       1.2X
-Native ORC Vectorized (Pushdown)                                                         4245           4253           9          3.7         269.9       1.5X
+Parquet Vectorized                                                                       6597           6622          19          2.4         419.4       1.0X
+Parquet Vectorized (Pushdown)                                                            6618           6639          14          2.4         420.8       1.0X
+Native ORC Vectorized                                                                    5324           5342          19          3.0         338.5       1.2X
+Native ORC Vectorized (Pushdown)                                                         4259           4264           5          3.7         270.8       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as INT96 rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                        9353           9362           9          1.7         594.7       1.0X
-Parquet Vectorized (Pushdown)                                                             9335           9349          17          1.7         593.5       1.0X
-Native ORC Vectorized                                                                     7781           7799          17          2.0         494.7       1.2X
-Native ORC Vectorized (Pushdown)                                                          7598           7613           9          2.1         483.1       1.2X
+Parquet Vectorized                                                                        9349           9366          22          1.7         594.4       1.0X
+Parquet Vectorized (Pushdown)                                                             9360           9391          21          1.7         595.1       1.0X
+Native ORC Vectorized                                                                     7882           7909          23          2.0         501.1       1.2X
+Native ORC Vectorized (Pushdown)                                                          7666           7676           6          2.1         487.4       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as TIMESTAMP_MICROS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               2578           2590          12          6.1         163.9       1.0X
-Parquet Vectorized (Pushdown)                                                                      70             77          11        223.4           4.5      36.6X
-Native ORC Vectorized                                                                            1982           1987           8          7.9         126.0       1.3X
-Native ORC Vectorized (Pushdown)                                                                   39             43           5        404.6           2.5      66.3X
+Parquet Vectorized                                                                               2617           2627           7          6.0         166.4       1.0X
+Parquet Vectorized (Pushdown)                                                                      69             74           9        229.5           4.4      38.2X
+Native ORC Vectorized                                                                            2092           2097           4          7.5         133.0       1.3X
+Native ORC Vectorized (Pushdown)                                                                   38             43           5        409.7           2.4      68.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  3228           3236           9          4.9         205.2       1.0X
-Parquet Vectorized (Pushdown)                                                                        983            987           5         16.0          62.5       3.3X
-Native ORC Vectorized                                                                               2607           2617           6          6.0         165.7       1.2X
-Native ORC Vectorized (Pushdown)                                                                     859            864           4         18.3          54.6       3.8X
+Parquet Vectorized                                                                                  3278           3287          10          4.8         208.4       1.0X
+Parquet Vectorized (Pushdown)                                                                        999           1010           9         15.7          63.5       3.3X
+Native ORC Vectorized                                                                               2724           2732           7          5.8         173.2       1.2X
+Native ORC Vectorized (Pushdown)                                                                     864            870           5         18.2          54.9       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5897           5908           9          2.7         374.9       1.0X
-Parquet Vectorized (Pushdown)                                                                       4693           4705          13          3.4         298.3       1.3X
-Native ORC Vectorized                                                                               5145           5195          75          3.1         327.1       1.1X
-Native ORC Vectorized (Pushdown)                                                                    4134           4139           5          3.8         262.8       1.4X
+Parquet Vectorized                                                                                  6098           6125          40          2.6         387.7       1.0X
+Parquet Vectorized (Pushdown)                                                                       4842           4859          15          3.2         307.9       1.3X
+Native ORC Vectorized                                                                               5243           5246           3          3.0         333.4       1.2X
+Native ORC Vectorized (Pushdown)                                                                    4205           4220          14          3.7         267.4       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                   8540           8549           9          1.8         542.9       1.0X
-Parquet Vectorized (Pushdown)                                                                        8330           8339          11          1.9         529.6       1.0X
-Native ORC Vectorized                                                                                7638           7650          11          2.1         485.6       1.1X
-Native ORC Vectorized (Pushdown)                                                                     7440           7448          11          2.1         473.0       1.1X
+Parquet Vectorized                                                                                   8867           8893          24          1.8         563.7       1.0X
+Parquet Vectorized (Pushdown)                                                                        8630           8677          38          1.8         548.7       1.0X
+Native ORC Vectorized                                                                                7897           7900           2          2.0         502.1       1.1X
+Native ORC Vectorized (Pushdown)                                                                     7700           7716          11          2.0         489.6       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               2748           2759          18          5.7         174.7       1.0X
-Parquet Vectorized (Pushdown)                                                                      70             73           3        224.7           4.5      39.3X
-Native ORC Vectorized                                                                            1986           1999          18          7.9         126.2       1.4X
-Native ORC Vectorized (Pushdown)                                                                   39             42           5        407.9           2.5      71.3X
+Parquet Vectorized                                                                               2783           2801          17          5.7         177.0       1.0X
+Parquet Vectorized (Pushdown)                                                                      72             75           4        218.9           4.6      38.7X
+Native ORC Vectorized                                                                            2023           2032           6          7.8         128.6       1.4X
+Native ORC Vectorized (Pushdown)                                                                   40             43           4        393.7           2.5      69.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  3397           3403           7          4.6         216.0       1.0X
-Parquet Vectorized (Pushdown)                                                                        999           1006           6         15.7          63.5       3.4X
-Native ORC Vectorized                                                                               2612           2620          12          6.0         166.1       1.3X
-Native ORC Vectorized (Pushdown)                                                                     876            879           2         18.0          55.7       3.9X
+Parquet Vectorized                                                                                  3474           3479           3          4.5         220.9       1.0X
+Parquet Vectorized (Pushdown)                                                                       1036           1044           8         15.2          65.9       3.4X
+Native ORC Vectorized                                                                               2757           2766           7          5.7         175.3       1.3X
+Native ORC Vectorized (Pushdown)                                                                     910            914           4         17.3          57.9       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  6116           6123           9          2.6         388.8       1.0X
-Parquet Vectorized (Pushdown)                                                                       4802           4813          11          3.3         305.3       1.3X
-Native ORC Vectorized                                                                               5152           5160           7          3.1         327.6       1.2X
-Native ORC Vectorized (Pushdown)                                                                    4126           4138           9          3.8         262.3       1.5X
+Parquet Vectorized                                                                                  6243           6270          16          2.5         396.9       1.0X
+Parquet Vectorized (Pushdown)                                                                       4928           4956          36          3.2         313.3       1.3X
+Native ORC Vectorized                                                                               5326           5332           5          3.0         338.6       1.2X
+Native ORC Vectorized (Pushdown)                                                                    4262           4272           8          3.7         271.0       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                   8692           8746          68          1.8         552.6       1.0X
-Parquet Vectorized (Pushdown)                                                                        8481           8495          15          1.9         539.2       1.0X
-Native ORC Vectorized                                                                                7644           7653          10          2.1         486.0       1.1X
-Native ORC Vectorized (Pushdown)                                                                     7449           7462           9          2.1         473.6       1.2X
+Parquet Vectorized                                                                                   8993           9036          38          1.7         571.7       1.0X
+Parquet Vectorized (Pushdown)                                                                        8777           8803          19          1.8         558.0       1.0X
+Native ORC Vectorized                                                                                7774           7790          17          2.0         494.3       1.2X
+Native ORC Vectorized (Pushdown)                                                                     7573           7587          12          2.1         481.5       1.2X
 
 
 ================================================================================================
 Pushdown benchmark with many filters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   58             72          19          0.0    57741888.0       1.0X
-Parquet Vectorized (Pushdown)                        58             61           3          0.0    58429929.0       1.0X
-Native ORC Vectorized                                51             53           2          0.0    51359839.0       1.1X
-Native ORC Vectorized (Pushdown)                     53             55           3          0.0    53142981.0       1.1X
+Parquet Vectorized                                   49             75          18          0.0    49268544.0       1.0X
+Parquet Vectorized (Pushdown)                        49             52           3          0.0    48949281.0       1.0X
+Native ORC Vectorized                                43             46           3          0.0    43343584.0       1.1X
+Native ORC Vectorized (Pushdown)                     44             48           4          0.0    44392858.0       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  400            420          16          0.0   400224642.0       1.0X
-Parquet Vectorized (Pushdown)                       399            417          11          0.0   399319343.0       1.0X
-Native ORC Vectorized                               387            393           7          0.0   387215337.0       1.0X
-Native ORC Vectorized (Pushdown)                    390            396           7          0.0   389851290.0       1.0X
+Parquet Vectorized                                  193            196           3          0.0   192567723.0       1.0X
+Parquet Vectorized (Pushdown)                       191            206          18          0.0   191266175.0       1.0X
+Native ORC Vectorized                               178            182           4          0.0   178471724.0       1.1X
+Native ORC Vectorized (Pushdown)                    184            190           7          0.0   183580008.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 2216           2266          67          0.0  2215862652.0       1.0X
-Parquet Vectorized (Pushdown)                      2237           2281          47          0.0  2237304947.0       1.0X
-Native ORC Vectorized                              2202           2257          58          0.0  2202335420.0       1.0X
-Native ORC Vectorized (Pushdown)                   2219           2262          65          0.0  2219444511.0       1.0X
+Parquet Vectorized                                  581            587           6          0.0   581443562.0       1.0X
+Parquet Vectorized (Pushdown)                       591            611          14          0.0   591021175.0       1.0X
+Native ORC Vectorized                               563            580          11          0.0   563194077.0       1.0X
+Native ORC Vectorized (Pushdown)                    583            597          12          0.0   582533796.0       1.0X
 
 
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
index f762a7147d31b..e6f878de0a974 100644
--- a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
@@ -2,733 +2,733 @@
 Pushdown for many distinct value case
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6564           6709         108          2.4         417.3       1.0X
-Parquet Vectorized (Pushdown)                       315            335          18         50.0          20.0      20.9X
-Native ORC Vectorized                              5085           5205          71          3.1         323.3       1.3X
-Native ORC Vectorized (Pushdown)                    296            309          11         53.2          18.8      22.2X
+Parquet Vectorized                                 6867           6919          54          2.3         436.6       1.0X
+Parquet Vectorized (Pushdown)                       313            342          24         50.3          19.9      22.0X
+Native ORC Vectorized                              5135           5177          42          3.1         326.5       1.3X
+Native ORC Vectorized (Pushdown)                    314            327           9         50.1          19.9      21.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                            6573           6616          51          2.4         417.9       1.0X
-Parquet Vectorized (Pushdown)                                  291            309          16         54.1          18.5      22.6X
-Native ORC Vectorized                                         5027           5047          17          3.1         319.6       1.3X
-Native ORC Vectorized (Pushdown)                               292            316          19         53.9          18.5      22.5X
+Parquet Vectorized                                            6952           6967          17          2.3         442.0       1.0X
+Parquet Vectorized (Pushdown)                                  313            324          14         50.2          19.9      22.2X
+Native ORC Vectorized                                         5212           5234          18          3.0         331.4       1.3X
+Native ORC Vectorized (Pushdown)                               318            331           8         49.5          20.2      21.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6453           6497          31          2.4         410.3       1.0X
-Parquet Vectorized (Pushdown)                       289            295           6         54.4          18.4      22.3X
-Native ORC Vectorized                              4973           5006          25          3.2         316.2       1.3X
-Native ORC Vectorized (Pushdown)                    276            288          14         57.0          17.5      23.4X
+Parquet Vectorized                                 6921           6940          11          2.3         440.1       1.0X
+Parquet Vectorized (Pushdown)                       299            310          11         52.7          19.0      23.2X
+Native ORC Vectorized                              5203           5210           6          3.0         330.8       1.3X
+Native ORC Vectorized (Pushdown)                    312            319           7         50.4          19.8      22.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  6498           6553          74          2.4         413.1       1.0X
-Parquet Vectorized (Pushdown)                        284            294           7         55.3          18.1      22.9X
-Native ORC Vectorized                               5070           5087          11          3.1         322.3       1.3X
-Native ORC Vectorized (Pushdown)                     272            287          14         57.9          17.3      23.9X
+Parquet Vectorized                                  6899           6925          15          2.3         438.6       1.0X
+Parquet Vectorized (Pushdown)                        286            303          13         55.0          18.2      24.1X
+Native ORC Vectorized                               5194           5210          15          3.0         330.2       1.3X
+Native ORC Vectorized (Pushdown)                     296            303           6         53.2          18.8      23.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              6558           6601          43          2.4         416.9       1.0X
-Parquet Vectorized (Pushdown)                                    275            288           9         57.1          17.5      23.8X
-Native ORC Vectorized                                           5016           5046          26          3.1         318.9       1.3X
-Native ORC Vectorized (Pushdown)                                 273            289          18         57.5          17.4      24.0X
+Parquet Vectorized                                              6934           6957          27          2.3         440.8       1.0X
+Parquet Vectorized (Pushdown)                                    288            296           8         54.6          18.3      24.1X
+Native ORC Vectorized                                           5212           5229          15          3.0         331.4       1.3X
+Native ORC Vectorized (Pushdown)                                 304            308           3         51.8          19.3      22.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  13444          13642         185          1.2         854.8       1.0X
-Parquet Vectorized (Pushdown)                       13455          13505          51          1.2         855.4       1.0X
-Native ORC Vectorized                               12196          12247          41          1.3         775.4       1.1X
-Native ORC Vectorized (Pushdown)                    12230          12264          21          1.3         777.5       1.1X
+Parquet Vectorized                                  13657          13798         103          1.2         868.3       1.0X
+Parquet Vectorized (Pushdown)                       13709          13730          14          1.1         871.6       1.0X
+Native ORC Vectorized                               12028          12061          37          1.3         764.7       1.1X
+Native ORC Vectorized (Pushdown)                    12105          12152          29          1.3         769.6       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6126           6159          26          2.6         389.5       1.0X
-Parquet Vectorized (Pushdown)                       260            273          10         60.6          16.5      23.6X
-Native ORC Vectorized                              4546           4572          18          3.5         289.0       1.3X
-Native ORC Vectorized (Pushdown)                    260            275          11         60.5          16.5      23.5X
+Parquet Vectorized                                 6544           6575          26          2.4         416.1       1.0X
+Parquet Vectorized (Pushdown)                       274            283           9         57.4          17.4      23.9X
+Native ORC Vectorized                              4734           4753          13          3.3         301.0       1.4X
+Native ORC Vectorized (Pushdown)                    283            293           8         55.5          18.0      23.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     6123           6138          11          2.6         389.3       1.0X
-Parquet Vectorized (Pushdown)                           273            286          15         57.7          17.3      22.4X
-Native ORC Vectorized                                  4557           4590          42          3.5         289.7       1.3X
-Native ORC Vectorized (Pushdown)                        265            277           8         59.3          16.9      23.1X
+Parquet Vectorized                                     6522           6549          39          2.4         414.7       1.0X
+Parquet Vectorized (Pushdown)                           285            296          10         55.3          18.1      22.9X
+Native ORC Vectorized                                  4717           4734          11          3.3         299.9       1.4X
+Native ORC Vectorized (Pushdown)                        290            296           5         54.3          18.4      22.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6105           6212          72          2.6         388.1       1.0X
-Parquet Vectorized (Pushdown)                       272            277           7         57.8          17.3      22.4X
-Native ORC Vectorized                              4581           4651          77          3.4         291.2       1.3X
-Native ORC Vectorized (Pushdown)                    264            275           9         59.7          16.8      23.2X
+Parquet Vectorized                                 6556           6567          10          2.4         416.8       1.0X
+Parquet Vectorized (Pushdown)                       279            288           6         56.3          17.8      23.5X
+Native ORC Vectorized                              4778           4790           8          3.3         303.8       1.4X
+Native ORC Vectorized (Pushdown)                    285            291           4         55.2          18.1      23.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6162           6217          59          2.6         391.8       1.0X
-Parquet Vectorized (Pushdown)                       263            275           9         59.8          16.7      23.4X
-Native ORC Vectorized                              4611           4630          23          3.4         293.1       1.3X
-Native ORC Vectorized (Pushdown)                    259            267           5         60.8          16.4      23.8X
+Parquet Vectorized                                 6561           6594          41          2.4         417.1       1.0X
+Parquet Vectorized (Pushdown)                       279            284           4         56.4          17.7      23.5X
+Native ORC Vectorized                              4785           4792           8          3.3         304.2       1.4X
+Native ORC Vectorized (Pushdown)                    284            292           6         55.3          18.1      23.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       6109           6137          21          2.6         388.4       1.0X
-Parquet Vectorized (Pushdown)                             260            270           5         60.4          16.5      23.5X
-Native ORC Vectorized                                    4596           4621          34          3.4         292.2       1.3X
-Native ORC Vectorized (Pushdown)                          263            272           7         59.7          16.8      23.2X
+Parquet Vectorized                                       6568           6587          21          2.4         417.6       1.0X
+Parquet Vectorized (Pushdown)                             277            282           3         56.9          17.6      23.7X
+Native ORC Vectorized                                    4775           4798          19          3.3         303.6       1.4X
+Native ORC Vectorized (Pushdown)                          284            290           5         55.3          18.1      23.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     6104           6142          41          2.6         388.1       1.0X
-Parquet Vectorized (Pushdown)                           266            278          13         59.0          16.9      22.9X
-Native ORC Vectorized                                  4601           4668          40          3.4         292.5       1.3X
-Native ORC Vectorized (Pushdown)                        264            271           7         59.5          16.8      23.1X
+Parquet Vectorized                                     6557           6576          20          2.4         416.9       1.0X
+Parquet Vectorized (Pushdown)                           275            283           7         57.3          17.5      23.9X
+Native ORC Vectorized                                  4783           4807          23          3.3         304.1       1.4X
+Native ORC Vectorized (Pushdown)                        284            289           4         55.4          18.0      23.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6775           6878         122          2.3         430.8       1.0X
-Parquet Vectorized (Pushdown)                      1502           1519          13         10.5          95.5       4.5X
-Native ORC Vectorized                              5241           5259          17          3.0         333.2       1.3X
-Native ORC Vectorized (Pushdown)                   1346           1359          11         11.7          85.6       5.0X
+Parquet Vectorized                                 7224           7258          23          2.2         459.3       1.0X
+Parquet Vectorized (Pushdown)                      1586           1589           4          9.9         100.8       4.6X
+Native ORC Vectorized                              5423           5455          30          2.9         344.8       1.3X
+Native ORC Vectorized (Pushdown)                   1408           1430          25         11.2          89.5       5.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9068           9115          48          1.7         576.5       1.0X
-Parquet Vectorized (Pushdown)                      6144           6157          17          2.6         390.6       1.5X
-Native ORC Vectorized                              7649           7712          67          2.1         486.3       1.2X
-Native ORC Vectorized (Pushdown)                   5542           5561          15          2.8         352.4       1.6X
+Parquet Vectorized                                 9684           9692           5          1.6         615.7       1.0X
+Parquet Vectorized (Pushdown)                      6559           6581          17          2.4         417.0       1.5X
+Native ORC Vectorized                              7866           7894          24          2.0         500.1       1.2X
+Native ORC Vectorized (Pushdown)                   5654           5668          11          2.8         359.5       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11477          11634         171          1.4         729.7       1.0X
-Parquet Vectorized (Pushdown)                     10963          11008          79          1.4         697.0       1.0X
-Native ORC Vectorized                              9938           9974          34          1.6         631.9       1.2X
-Native ORC Vectorized (Pushdown)                   9611           9667          77          1.6         611.1       1.2X
+Parquet Vectorized                                12234          12243          10          1.3         777.8       1.0X
+Parquet Vectorized (Pushdown)                     11654          11671          17          1.3         740.9       1.0X
+Native ORC Vectorized                             10449          10479          23          1.5         664.3       1.2X
+Native ORC Vectorized (Pushdown)                  10073          10120          40          1.6         640.4       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11960          12016          47          1.3         760.4       1.0X
-Parquet Vectorized (Pushdown)                     12071          12135          58          1.3         767.5       1.0X
-Native ORC Vectorized                             10598          10650          53          1.5         673.8       1.1X
-Native ORC Vectorized (Pushdown)                  10651          10736          70          1.5         677.2       1.1X
+Parquet Vectorized                                12733          12756          20          1.2         809.5       1.0X
+Parquet Vectorized (Pushdown)                     12700          12719          20          1.2         807.4       1.0X
+Native ORC Vectorized                             10963          10996          31          1.4         697.0       1.2X
+Native ORC Vectorized (Pushdown)                  11063          11088          23          1.4         703.4       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11960          11979          19          1.3         760.4       1.0X
-Parquet Vectorized (Pushdown)                     12058          12147          68          1.3         766.7       1.0X
-Native ORC Vectorized                             10563          10620          37          1.5         671.6       1.1X
-Native ORC Vectorized (Pushdown)                  10708          10947         187          1.5         680.8       1.1X
+Parquet Vectorized                                12663          12687          27          1.2         805.1       1.0X
+Parquet Vectorized (Pushdown)                     12760          12792          26          1.2         811.2       1.0X
+Native ORC Vectorized                             10947          10976          42          1.4         696.0       1.2X
+Native ORC Vectorized (Pushdown)                  11021          11058          24          1.4         700.7       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                12110          12189          60          1.3         769.9       1.0X
-Parquet Vectorized (Pushdown)                     12337          12422          81          1.3         784.4       1.0X
-Native ORC Vectorized                             10589          10660         105          1.5         673.2       1.1X
-Native ORC Vectorized (Pushdown)                  10648          10762          72          1.5         677.0       1.1X
+Parquet Vectorized                                12632          12656          24          1.2         803.1       1.0X
+Parquet Vectorized (Pushdown)                     12696          12733          30          1.2         807.2       1.0X
+Native ORC Vectorized                             10943          10969          22          1.4         695.7       1.2X
+Native ORC Vectorized (Pushdown)                  11050          11103          41          1.4         702.6       1.1X
 
 
 ================================================================================================
 Pushdown for few distinct value case (use dictionary encoding)
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5859           5871          12          2.7         372.5       1.0X
-Parquet Vectorized (Pushdown)                           237            246          10         66.4          15.1      24.8X
-Native ORC Vectorized                                  6491           6523          29          2.4         412.7       0.9X
-Native ORC Vectorized (Pushdown)                        907            910           2         17.3          57.6       6.5X
+Parquet Vectorized                                     5890           5925          33          2.7         374.5       1.0X
+Parquet Vectorized (Pushdown)                           239            251          11         65.8          15.2      24.7X
+Native ORC Vectorized                                  6519           6541          16          2.4         414.5       0.9X
+Native ORC Vectorized (Pushdown)                        959            961           1         16.4          61.0       6.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                             5937           5962          23          2.6         377.5       1.0X
-Parquet Vectorized (Pushdown)                                   239            245           9         65.8          15.2      24.9X
-Native ORC Vectorized                                          6769           6788          34          2.3         430.4       0.9X
-Native ORC Vectorized (Pushdown)                                914            925          13         17.2          58.1       6.5X
+Parquet Vectorized                                             6021           6046          24          2.6         382.8       1.0X
+Parquet Vectorized (Pushdown)                                   241            257          11         65.2          15.3      25.0X
+Native ORC Vectorized                                          6712           6738          20          2.3         426.7       0.9X
+Native ORC Vectorized (Pushdown)                                957            970          10         16.4          60.9       6.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5868           5878           6          2.7         373.1       1.0X
-Parquet Vectorized (Pushdown)                           284            289           3         55.3          18.1      20.6X
-Native ORC Vectorized                                  6676           6696          23          2.4         424.5       0.9X
-Native ORC Vectorized (Pushdown)                        956            963          11         16.5          60.8       6.1X
+Parquet Vectorized                                     5962           5982          17          2.6         379.1       1.0X
+Parquet Vectorized (Pushdown)                           288            294           8         54.6          18.3      20.7X
+Native ORC Vectorized                                  6667           6694          27          2.4         423.9       0.9X
+Native ORC Vectorized (Pushdown)                        986            997           6         15.9          62.7       6.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5940           5949           6          2.6         377.7       1.0X
-Parquet Vectorized (Pushdown)                             290            295           5         54.2          18.5      20.5X
-Native ORC Vectorized                                    6733           6746          10          2.3         428.1       0.9X
-Native ORC Vectorized (Pushdown)                          953            966          12         16.5          60.6       6.2X
+Parquet Vectorized                                       5961           5968           5          2.6         379.0       1.0X
+Parquet Vectorized (Pushdown)                             286            301          11         54.9          18.2      20.8X
+Native ORC Vectorized                                    6618           6670          68          2.4         420.7       0.9X
+Native ORC Vectorized (Pushdown)                          988            996          10         15.9          62.8       6.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               5946           5966          17          2.6         378.0       1.0X
-Parquet Vectorized (Pushdown)                                     292            296           5         53.9          18.5      20.4X
-Native ORC Vectorized                                            6741           6751           6          2.3         428.6       0.9X
-Native ORC Vectorized (Pushdown)                                  958            964           5         16.4          60.9       6.2X
+Parquet Vectorized                                               6025           6052          15          2.6         383.1       1.0X
+Parquet Vectorized (Pushdown)                                     288            296           8         54.6          18.3      20.9X
+Native ORC Vectorized                                            6727           6756          39          2.3         427.7       0.9X
+Native ORC Vectorized (Pushdown)                                  988            997          10         15.9          62.8       6.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           14342          14388          32          1.1         911.8       1.0X
-Parquet Vectorized (Pushdown)                                14351          14404          51          1.1         912.4       1.0X
-Native ORC Vectorized                                        14291          14316          30          1.1         908.6       1.0X
-Native ORC Vectorized (Pushdown)                             14452          14468          10          1.1         918.8       1.0X
+Parquet Vectorized                                           14170          14195          23          1.1         900.9       1.0X
+Parquet Vectorized (Pushdown)                                14143          14168          22          1.1         899.2       1.0X
+Native ORC Vectorized                                        14438          14488          39          1.1         918.0       1.0X
+Native ORC Vectorized (Pushdown)                             14638          14703         103          1.1         930.6       1.0X
 
 
 ================================================================================================
 Pushdown benchmark for StringStartsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    7092           7104          14          2.2         450.9       1.0X
-Parquet Vectorized (Pushdown)                          930            935           3         16.9          59.1       7.6X
-Native ORC Vectorized                                 5306           5327          28          3.0         337.3       1.3X
-Native ORC Vectorized (Pushdown)                      5385           5398          10          2.9         342.4       1.3X
+Parquet Vectorized                                    7156           7212          82          2.2         455.0       1.0X
+Parquet Vectorized (Pushdown)                          948            952           5         16.6          60.2       7.6X
+Native ORC Vectorized                                 5320           5345          31          3.0         338.2       1.3X
+Native ORC Vectorized (Pushdown)                      5413           5424           7          2.9         344.1       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      6867           6913          26          2.3         436.6       1.0X
-Parquet Vectorized (Pushdown)                            278            283           4         56.5          17.7      24.7X
-Native ORC Vectorized                                   5146           5154           5          3.1         327.2       1.3X
-Native ORC Vectorized (Pushdown)                        5225           5236           7          3.0         332.2       1.3X
+Parquet Vectorized                                      7011           7034          30          2.2         445.7       1.0X
+Parquet Vectorized (Pushdown)                            280            286           8         56.1          17.8      25.0X
+Native ORC Vectorized                                   5194           5204           9          3.0         330.2       1.3X
+Native ORC Vectorized (Pushdown)                        5264           5284          15          3.0         334.7       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        6831           6849          11          2.3         434.3       1.0X
-Parquet Vectorized (Pushdown)                              265            272           6         59.2          16.9      25.7X
-Native ORC Vectorized                                     5114           5140          18          3.1         325.2       1.3X
-Native ORC Vectorized (Pushdown)                          5193           5227          27          3.0         330.1       1.3X
+Parquet Vectorized                                        6995           7025          21          2.2         444.7       1.0X
+Parquet Vectorized (Pushdown)                              277            287          12         56.7          17.6      25.2X
+Native ORC Vectorized                                     5182           5205          23          3.0         329.5       1.3X
+Native ORC Vectorized (Pushdown)                          5260           5271          11          3.0         334.4       1.3X
 
 
 ================================================================================================
 Pushdown benchmark for StringEndsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%10'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  5890           5903          16          2.7         374.5       1.0X
-Parquet Vectorized (Pushdown)                        366            369           3         43.0          23.3      16.1X
-Native ORC Vectorized                               6686           6712          29          2.4         425.1       0.9X
-Native ORC Vectorized (Pushdown)                    6877           6895          12          2.3         437.2       0.9X
+Parquet Vectorized                                  5961           5985          24          2.6         379.0       1.0X
+Parquet Vectorized (Pushdown)                        366            375          10         42.9          23.3      16.3X
+Native ORC Vectorized                               6698           6718          19          2.3         425.9       0.9X
+Native ORC Vectorized (Pushdown)                    6899           6921          18          2.3         438.6       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%1000'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    5860           5875          13          2.7         372.6       1.0X
-Parquet Vectorized (Pushdown)                          269            275           7         58.5          17.1      21.8X
-Native ORC Vectorized                                 6606           6637          23          2.4         420.0       0.9X
-Native ORC Vectorized (Pushdown)                      6803           6830          28          2.3         432.5       0.9X
+Parquet Vectorized                                    5940           5950           7          2.6         377.7       1.0X
+Parquet Vectorized (Pushdown)                          269            278           9         58.5          17.1      22.1X
+Native ORC Vectorized                                 6613           6643          24          2.4         420.5       0.9X
+Native ORC Vectorized (Pushdown)                      6856           6870          14          2.3         435.9       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%786432'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      5806           5835          24          2.7         369.1       1.0X
-Parquet Vectorized (Pushdown)                            263            271           4         59.9          16.7      22.1X
-Native ORC Vectorized                                   6617           6624           5          2.4         420.7       0.9X
-Native ORC Vectorized (Pushdown)                        6771           6784          13          2.3         430.5       0.9X
+Parquet Vectorized                                      5939           5955          17          2.6         377.6       1.0X
+Parquet Vectorized (Pushdown)                            270            292          36         58.2          17.2      22.0X
+Native ORC Vectorized                                   6634           6655          20          2.4         421.8       0.9X
+Native ORC Vectorized (Pushdown)                        6824           6853          25          2.3         433.9       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for StringContains
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   5983           6035          43          2.6         380.4       1.0X
-Parquet Vectorized (Pushdown)                         790            795           4         19.9          50.2       7.6X
-Native ORC Vectorized                                6785           6803          11          2.3         431.4       0.9X
-Native ORC Vectorized (Pushdown)                     6943           6977          24          2.3         441.4       0.9X
+Parquet Vectorized                                   6160           6173          14          2.6         391.7       1.0X
+Parquet Vectorized (Pushdown)                         801            808           5         19.6          50.9       7.7X
+Native ORC Vectorized                                6872           6881          11          2.3         436.9       0.9X
+Native ORC Vectorized (Pushdown)                     7071           7082           7          2.2         449.6       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5790           5839          29          2.7         368.1       1.0X
-Parquet Vectorized (Pushdown)                           267            271           4         58.9          17.0      21.7X
-Native ORC Vectorized                                  6623           6635          14          2.4         421.1       0.9X
-Native ORC Vectorized (Pushdown)                       6782           6797          10          2.3         431.2       0.9X
+Parquet Vectorized                                     5949           5968          17          2.6         378.2       1.0X
+Parquet Vectorized (Pushdown)                           272            278           4         57.9          17.3      21.9X
+Native ORC Vectorized                                  6649           6666          23          2.4         422.7       0.9X
+Native ORC Vectorized (Pushdown)                       6860           6888          23          2.3         436.1       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5811           5831          16          2.7         369.4       1.0X
-Parquet Vectorized (Pushdown)                             263            273          10         59.8          16.7      22.1X
-Native ORC Vectorized                                    6563           6609          27          2.4         417.3       0.9X
-Native ORC Vectorized (Pushdown)                         6734           6772          26          2.3         428.1       0.9X
+Parquet Vectorized                                       5936           5959          19          2.6         377.4       1.0X
+Parquet Vectorized (Pushdown)                             271            276           3         58.1          17.2      21.9X
+Native ORC Vectorized                                    6637           6657          15          2.4         422.0       0.9X
+Native ORC Vectorized (Pushdown)                         6852           6878          24          2.3         435.6       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for decimal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     2805           2825          20          5.6         178.3       1.0X
-Parquet Vectorized (Pushdown)                            70             73           5        226.2           4.4      40.3X
-Native ORC Vectorized                                  3503           3543          26          4.5         222.7       0.8X
-Native ORC Vectorized (Pushdown)                         55             59           3        286.3           3.5      51.1X
+Parquet Vectorized                                     2866           2879          13          5.5         182.2       1.0X
+Parquet Vectorized (Pushdown)                            71             74           5        222.8           4.5      40.6X
+Native ORC Vectorized                                  3278           3290          12          4.8         208.4       0.9X
+Native ORC Vectorized (Pushdown)                         59             63           5        268.1           3.7      48.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        4151           4162           9          3.8         263.9       1.0X
-Parquet Vectorized (Pushdown)                             1966           1981          15          8.0         125.0       2.1X
-Native ORC Vectorized                                     5029           5082          51          3.1         319.7       0.8X
-Native ORC Vectorized (Pushdown)                          2193           2203           8          7.2         139.4       1.9X
+Parquet Vectorized                                        4201           4213          14          3.7         267.1       1.0X
+Parquet Vectorized (Pushdown)                             1950           1959           6          8.1         124.0       2.2X
+Native ORC Vectorized                                     4784           4797          15          3.3         304.2       0.9X
+Native ORC Vectorized (Pushdown)                          2117           2120           3          7.4         134.6       2.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        8776           8810          40          1.8         558.0       1.0X
-Parquet Vectorized (Pushdown)                             8460           8484          13          1.9         537.9       1.0X
-Native ORC Vectorized                                     9930           9952          17          1.6         631.4       0.9X
-Native ORC Vectorized (Pushdown)                          9440           9476          30          1.7         600.2       0.9X
+Parquet Vectorized                                        8854           8870          18          1.8         562.9       1.0X
+Parquet Vectorized (Pushdown)                             8480           8486           8          1.9         539.1       1.0X
+Native ORC Vectorized                                     9614           9653          56          1.6         611.2       0.9X
+Native ORC Vectorized (Pushdown)                          9180           9242          85          1.7         583.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        10066          10176          84          1.6         640.0       1.0X
-Parquet Vectorized (Pushdown)                             10147          10173          29          1.6         645.1       1.0X
-Native ORC Vectorized                                     10790          10854          60          1.5         686.0       0.9X
-Native ORC Vectorized (Pushdown)                          10900          11013         189          1.4         693.0       0.9X
+Parquet Vectorized                                         9942           9968          19          1.6         632.1       1.0X
+Parquet Vectorized (Pushdown)                              9975           9993          14          1.6         634.2       1.0X
+Native ORC Vectorized                                     10610          10638          19          1.5         674.6       0.9X
+Native ORC Vectorized (Pushdown)                          10626          10648          16          1.5         675.6       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      2978           2995          19          5.3         189.3       1.0X
-Parquet Vectorized (Pushdown)                             69             72           3        229.4           4.4      43.4X
-Native ORC Vectorized                                   3520           3535          13          4.5         223.8       0.8X
-Native ORC Vectorized (Pushdown)                          53             56           3        296.7           3.4      56.2X
+Parquet Vectorized                                      3028           3063          27          5.2         192.5       1.0X
+Parquet Vectorized (Pushdown)                             69             71           2        227.4           4.4      43.8X
+Native ORC Vectorized                                   3306           3322          21          4.8         210.2       0.9X
+Native ORC Vectorized (Pushdown)                          56             59           4        281.4           3.6      54.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         3733           3745           9          4.2         237.4       1.0X
-Parquet Vectorized (Pushdown)                              1078           1089           6         14.6          68.5       3.5X
-Native ORC Vectorized                                      4305           4316          11          3.7         273.7       0.9X
-Native ORC Vectorized (Pushdown)                           1110           1113           3         14.2          70.6       3.4X
+Parquet Vectorized                                         3790           3798           7          4.1         241.0       1.0X
+Parquet Vectorized (Pushdown)                              1082           1086           3         14.5          68.8       3.5X
+Native ORC Vectorized                                      4052           4071          35          3.9         257.6       0.9X
+Native ORC Vectorized (Pushdown)                           1078           1081           3         14.6          68.5       3.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         6729           6741          14          2.3         427.8       1.0X
-Parquet Vectorized (Pushdown)                              5185           5240          41          3.0         329.7       1.3X
-Native ORC Vectorized                                      7200           7224          21          2.2         457.8       0.9X
-Native ORC Vectorized (Pushdown)                           5405           5438          22          2.9         343.7       1.2X
+Parquet Vectorized                                         6691           6712          15          2.4         425.4       1.0X
+Parquet Vectorized (Pushdown)                              5196           5211          17          3.0         330.3       1.3X
+Native ORC Vectorized                                      6925           6934           8          2.3         440.3       1.0X
+Native ORC Vectorized (Pushdown)                           5264           5279          19          3.0         334.7       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                          9576           9593          18          1.6         608.8       1.0X
-Parquet Vectorized (Pushdown)                               9301           9312          16          1.7         591.3       1.0X
-Native ORC Vectorized                                      10115          10143          26          1.6         643.1       0.9X
-Native ORC Vectorized (Pushdown)                            9809           9814           3          1.6         623.6       1.0X
+Parquet Vectorized                                          9504           9527          19          1.7         604.3       1.0X
+Parquet Vectorized (Pushdown)                               9218           9233          17          1.7         586.1       1.0X
+Native ORC Vectorized                                       9809           9836          18          1.6         623.6       1.0X
+Native ORC Vectorized (Pushdown)                            9507           9531          17          1.7         604.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      4258           4267           8          3.7         270.7       1.0X
-Parquet Vectorized (Pushdown)                             75             77           3        210.3           4.8      56.9X
-Native ORC Vectorized                                   3587           3638          85          4.4         228.1       1.2X
-Native ORC Vectorized (Pushdown)                          52             55           3        302.8           3.3      82.0X
+Parquet Vectorized                                      4264           4282          25          3.7         271.1       1.0X
+Parquet Vectorized (Pushdown)                             75             79           3        208.7           4.8      56.6X
+Native ORC Vectorized                                   3347           3364          16          4.7         212.8       1.3X
+Native ORC Vectorized (Pushdown)                          55             59           5        283.6           3.5      76.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         5156           5167          18          3.1         327.8       1.0X
-Parquet Vectorized (Pushdown)                              1386           1395           9         11.3          88.1       3.7X
-Native ORC Vectorized                                      4486           4506          27          3.5         285.2       1.1X
-Native ORC Vectorized (Pushdown)                           1242           1251           8         12.7          79.0       4.2X
+Parquet Vectorized                                         5214           5231          10          3.0         331.5       1.0X
+Parquet Vectorized (Pushdown)                              1409           1413           2         11.2          89.6       3.7X
+Native ORC Vectorized                                      4207           4222          25          3.7         267.5       1.2X
+Native ORC Vectorized (Pushdown)                           1209           1211           2         13.0          76.9       4.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         8812           8822           7          1.8         560.2       1.0X
-Parquet Vectorized (Pushdown)                              6728           6732           4          2.3         427.8       1.3X
-Native ORC Vectorized                                      7787           7836          60          2.0         495.1       1.1X
-Native ORC Vectorized (Pushdown)                           6007           6023          24          2.6         381.9       1.5X
+Parquet Vectorized                                         8897           8913          12          1.8         565.6       1.0X
+Parquet Vectorized (Pushdown)                              6816           6830          19          2.3         433.3       1.3X
+Native ORC Vectorized                                      7648           7665          14          2.1         486.3       1.2X
+Native ORC Vectorized (Pushdown)                           5932           5954          25          2.7         377.1       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         12367          12381           9          1.3         786.3       1.0X
-Parquet Vectorized (Pushdown)                              11977          12018          24          1.3         761.5       1.0X
-Native ORC Vectorized                                      11109          11169          79          1.4         706.3       1.1X
-Native ORC Vectorized (Pushdown)                           10772          10786          15          1.5         684.9       1.1X
+Parquet Vectorized                                         12548          12558          13          1.3         797.8       1.0X
+Parquet Vectorized (Pushdown)                              12139          12156          22          1.3         771.8       1.0X
+Native ORC Vectorized                                      11055          11089          25          1.4         702.8       1.1X
+Native ORC Vectorized (Pushdown)                           10746          10789          41          1.5         683.2       1.2X
 
 
 ================================================================================================
 Pushdown benchmark for InSet -> InFilters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6442           6482          25          2.4         409.6       1.0X
-Parquet Vectorized (Pushdown)                                     284            293          15         55.4          18.0      22.7X
-Native ORC Vectorized                                            4965           4990          17          3.2         315.7       1.3X
-Native ORC Vectorized (Pushdown)                                  281            288           8         56.1          17.8      23.0X
+Parquet Vectorized                                               6561           6633          92          2.4         417.1       1.0X
+Parquet Vectorized (Pushdown)                                     281            287           6         55.9          17.9      23.3X
+Native ORC Vectorized                                            4666           4681          19          3.4         296.6       1.4X
+Native ORC Vectorized (Pushdown)                                  289            298           8         54.4          18.4      22.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6451           6461           9          2.4         410.1       1.0X
-Parquet Vectorized (Pushdown)                                     283            286           3         55.6          18.0      22.8X
-Native ORC Vectorized                                            4937           4981          34          3.2         313.9       1.3X
-Native ORC Vectorized (Pushdown)                                  279            288          11         56.4          17.7      23.1X
+Parquet Vectorized                                               6554           6599          62          2.4         416.7       1.0X
+Parquet Vectorized (Pushdown)                                     284            295          10         55.3          18.1      23.1X
+Native ORC Vectorized                                            4674           4695          23          3.4         297.1       1.4X
+Native ORC Vectorized (Pushdown)                                  293            303          13         53.7          18.6      22.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6449           6462           8          2.4         410.0       1.0X
-Parquet Vectorized (Pushdown)                                     283            293          10         55.6          18.0      22.8X
-Native ORC Vectorized                                            4955           4964          12          3.2         315.0       1.3X
-Native ORC Vectorized (Pushdown)                                  280            284           3         56.2          17.8      23.0X
+Parquet Vectorized                                               6540           6557          16          2.4         415.8       1.0X
+Parquet Vectorized (Pushdown)                                     284            298          12         55.4          18.1      23.0X
+Native ORC Vectorized                                            4667           4680           8          3.4         296.7       1.4X
+Native ORC Vectorized (Pushdown)                                  290            297           7         54.3          18.4      22.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6448           6474          23          2.4         409.9       1.0X
-Parquet Vectorized (Pushdown)                                      302            321          43         52.1          19.2      21.4X
-Native ORC Vectorized                                             4977           4994          16          3.2         316.4       1.3X
-Native ORC Vectorized (Pushdown)                                   297            301           3         53.0          18.9      21.7X
+Parquet Vectorized                                                6564           6587          18          2.4         417.3       1.0X
+Parquet Vectorized (Pushdown)                                      299            306           4         52.6          19.0      21.9X
+Native ORC Vectorized                                             4686           4707          22          3.4         297.9       1.4X
+Native ORC Vectorized (Pushdown)                                   305            310           3         51.5          19.4      21.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6478           6503          36          2.4         411.8       1.0X
-Parquet Vectorized (Pushdown)                                      301            307           4         52.2          19.1      21.5X
-Native ORC Vectorized                                             4972           5002          20          3.2         316.1       1.3X
-Native ORC Vectorized (Pushdown)                                   297            305          11         52.9          18.9      21.8X
+Parquet Vectorized                                                6568           6599          31          2.4         417.6       1.0X
+Parquet Vectorized (Pushdown)                                      307            309           1         51.2          19.5      21.4X
+Native ORC Vectorized                                             4684           4700          19          3.4         297.8       1.4X
+Native ORC Vectorized (Pushdown)                                   302            310          11         52.1          19.2      21.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6506           6522          11          2.4         413.7       1.0X
-Parquet Vectorized (Pushdown)                                      305            309           3         51.6          19.4      21.3X
-Native ORC Vectorized                                             5057           5062           4          3.1         321.5       1.3X
-Native ORC Vectorized (Pushdown)                                   304            309           3         51.7          19.4      21.4X
+Parquet Vectorized                                                6567           6584          12          2.4         417.5       1.0X
+Parquet Vectorized (Pushdown)                                      306            308           3         51.4          19.5      21.5X
+Native ORC Vectorized                                             4684           4694           9          3.4         297.8       1.4X
+Native ORC Vectorized (Pushdown)                                   308            313           3         51.1          19.6      21.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6675           6693          24          2.4         424.4       1.0X
-Parquet Vectorized (Pushdown)                                      896            899           3         17.6          57.0       7.4X
-Native ORC Vectorized                                             5264           5272           9          3.0         334.7       1.3X
-Native ORC Vectorized (Pushdown)                                   407            410           4         38.7          25.9      16.4X
+Parquet Vectorized                                                6743           6760          23          2.3         428.7       1.0X
+Parquet Vectorized (Pushdown)                                      902            909           7         17.4          57.3       7.5X
+Native ORC Vectorized                                             4877           4900          14          3.2         310.1       1.4X
+Native ORC Vectorized (Pushdown)                                   414            415           1         38.0          26.3      16.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6662           6668           8          2.4         423.6       1.0X
-Parquet Vectorized (Pushdown)                                     3357           3364           6          4.7         213.4       2.0X
-Native ORC Vectorized                                             5164           5191          23          3.0         328.3       1.3X
-Native ORC Vectorized (Pushdown)                                   426            429           3         36.9          27.1      15.6X
+Parquet Vectorized                                                6731           6752          22          2.3         427.9       1.0X
+Parquet Vectorized (Pushdown)                                     3328           3339          11          4.7         211.6       2.0X
+Native ORC Vectorized                                             4904           4908           5          3.2         311.8       1.4X
+Native ORC Vectorized (Pushdown)                                   431            433           2         36.5          27.4      15.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6648           6658          12          2.4         422.7       1.0X
-Parquet Vectorized (Pushdown)                                     5843           5860          15          2.7         371.5       1.1X
-Native ORC Vectorized                                             5182           5189           6          3.0         329.5       1.3X
-Native ORC Vectorized (Pushdown)                                   432            436           9         36.4          27.5      15.4X
+Parquet Vectorized                                                6720           6732           8          2.3         427.2       1.0X
+Parquet Vectorized (Pushdown)                                     6064           6085          14          2.6         385.6       1.1X
+Native ORC Vectorized                                             4885           4893          11          3.2         310.6       1.4X
+Native ORC Vectorized (Pushdown)                                   439            451          20         35.9          27.9      15.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6611           6629          16          2.4         420.3       1.0X
-Parquet Vectorized (Pushdown)                                       892            898           5         17.6          56.7       7.4X
-Native ORC Vectorized                                              5126           5154          32          3.1         325.9       1.3X
-Native ORC Vectorized (Pushdown)                                    500            505           3         31.4          31.8      13.2X
+Parquet Vectorized                                                 6698           6710           9          2.3         425.8       1.0X
+Parquet Vectorized (Pushdown)                                       927            932           5         17.0          59.0       7.2X
+Native ORC Vectorized                                              4843           4859          31          3.2         307.9       1.4X
+Native ORC Vectorized (Pushdown)                                    509            515           6         30.9          32.4      13.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6556           6583          17          2.4         416.8       1.0X
-Parquet Vectorized (Pushdown)                                      3433           3448          17          4.6         218.2       1.9X
-Native ORC Vectorized                                              5099           5119          12          3.1         324.2       1.3X
-Native ORC Vectorized (Pushdown)                                    570            572           3         27.6          36.2      11.5X
+Parquet Vectorized                                                 6698           6717          19          2.3         425.8       1.0X
+Parquet Vectorized (Pushdown)                                      3443           3458          19          4.6         218.9       1.9X
+Native ORC Vectorized                                              4838           4869          27          3.3         307.6       1.4X
+Native ORC Vectorized (Pushdown)                                    571            574           4         27.6          36.3      11.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6631           6642          10          2.4         421.6       1.0X
-Parquet Vectorized (Pushdown)                                      5877           5888           8          2.7         373.6       1.1X
-Native ORC Vectorized                                              5141           5148           8          3.1         326.9       1.3X
-Native ORC Vectorized (Pushdown)                                    585            587           2         26.9          37.2      11.3X
+Parquet Vectorized                                                 6694           6714          16          2.3         425.6       1.0X
+Parquet Vectorized (Pushdown)                                      5855           5876          26          2.7         372.2       1.1X
+Native ORC Vectorized                                              4833           4848          14          3.3         307.3       1.4X
+Native ORC Vectorized (Pushdown)                                    559            561           2         28.1          35.5      12.0X
 
 
 ================================================================================================
 Pushdown benchmark for tinyint
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           3074           3122          77          5.1         195.4       1.0X
-Parquet Vectorized (Pushdown)                                 107            111           6        146.8           6.8      28.7X
-Native ORC Vectorized                                        2473           2482           6          6.4         157.2       1.2X
-Native ORC Vectorized (Pushdown)                              114            117           5        138.0           7.2      27.0X
+Parquet Vectorized                                           3146           3222          55          5.0         200.0       1.0X
+Parquet Vectorized (Pushdown)                                 108            112           4        145.3           6.9      29.1X
+Native ORC Vectorized                                        2163           2182          29          7.3         137.5       1.5X
+Native ORC Vectorized (Pushdown)                              115            119           6        137.4           7.3      27.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              3739           3758          16          4.2         237.7       1.0X
-Parquet Vectorized (Pushdown)                                   1004           1011           5         15.7          63.8       3.7X
-Native ORC Vectorized                                           3078           3092          17          5.1         195.7       1.2X
-Native ORC Vectorized (Pushdown)                                 918            920           1         17.1          58.4       4.1X
+Parquet Vectorized                                              3769           3787          16          4.2         239.6       1.0X
+Parquet Vectorized (Pushdown)                                    990            998           6         15.9          63.0       3.8X
+Native ORC Vectorized                                           2723           2728           3          5.8         173.1       1.4X
+Native ORC Vectorized (Pushdown)                                 854            857           2         18.4          54.3       4.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              6391           6394           2          2.5         406.3       1.0X
-Parquet Vectorized (Pushdown)                                   4890           4907          10          3.2         310.9       1.3X
-Native ORC Vectorized                                           5584           5613          20          2.8         355.0       1.1X
-Native ORC Vectorized (Pushdown)                                4397           4412          10          3.6         279.6       1.5X
+Parquet Vectorized                                              6447           6463          14          2.4         409.9       1.0X
+Parquet Vectorized (Pushdown)                                   4925           4941          13          3.2         313.1       1.3X
+Native ORC Vectorized                                           5158           5166           9          3.0         327.9       1.2X
+Native ORC Vectorized (Pushdown)                                4108           4130          29          3.8         261.2       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               9020           9050          32          1.7         573.4       1.0X
-Parquet Vectorized (Pushdown)                                    8757           8792          23          1.8         556.7       1.0X
-Native ORC Vectorized                                            8277           8316          42          1.9         526.2       1.1X
-Native ORC Vectorized (Pushdown)                                 8050           8069          14          2.0         511.8       1.1X
+Parquet Vectorized                                               8983           9016          29          1.8         571.1       1.0X
+Parquet Vectorized (Pushdown)                                    8710           8734          14          1.8         553.8       1.0X
+Native ORC Vectorized                                            7637           7670          33          2.1         485.5       1.2X
+Native ORC Vectorized (Pushdown)                                 7453           7479          22          2.1         473.9       1.2X
 
 
 ================================================================================================
 Pushdown benchmark for Timestamp
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                    3218           3228          10          4.9         204.6       1.0X
-Parquet Vectorized (Pushdown)                                                         3203           3213          10          4.9         203.7       1.0X
-Native ORC Vectorized                                                                 2387           2391           5          6.6         151.7       1.3X
-Native ORC Vectorized (Pushdown)                                                        39             42           4        407.8           2.5      83.4X
+Parquet Vectorized                                                                    3234           3250          11          4.9         205.6       1.0X
+Parquet Vectorized (Pushdown)                                                         3243           3254          10          4.9         206.2       1.0X
+Native ORC Vectorized                                                                 2077           2092          22          7.6         132.1       1.6X
+Native ORC Vectorized (Pushdown)                                                        40             42           4        394.9           2.5      81.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as INT96 rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       3900           3912          11          4.0         247.9       1.0X
-Parquet Vectorized (Pushdown)                                                            3896           3903           6          4.0         247.7       1.0X
-Native ORC Vectorized                                                                    2987           2996          11          5.3         189.9       1.3X
-Native ORC Vectorized (Pushdown)                                                          889            892           3         17.7          56.5       4.4X
+Parquet Vectorized                                                                       3945           3975          41          4.0         250.8       1.0X
+Parquet Vectorized (Pushdown)                                                            3936           3950          15          4.0         250.3       1.0X
+Native ORC Vectorized                                                                    2699           2712          16          5.8         171.6       1.5X
+Native ORC Vectorized (Pushdown)                                                          864            871           6         18.2          54.9       4.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as INT96 rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       6698           6711          15          2.3         425.9       1.0X
-Parquet Vectorized (Pushdown)                                                            6698           6708           9          2.3         425.8       1.0X
-Native ORC Vectorized                                                                    5550           5563          22          2.8         352.9       1.2X
-Native ORC Vectorized (Pushdown)                                                         4359           4374          18          3.6         277.2       1.5X
+Parquet Vectorized                                                                       6749           6770          14          2.3         429.1       1.0X
+Parquet Vectorized (Pushdown)                                                            6746           6762          24          2.3         428.9       1.0X
+Native ORC Vectorized                                                                    5192           5218          16          3.0         330.1       1.3X
+Native ORC Vectorized (Pushdown)                                                         4140           4152          15          3.8         263.2       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as INT96 rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                        9385           9400          18          1.7         596.7       1.0X
-Parquet Vectorized (Pushdown)                                                             9378           9390          11          1.7         596.3       1.0X
-Native ORC Vectorized                                                                     8168           8194          28          1.9         519.3       1.1X
-Native ORC Vectorized (Pushdown)                                                          7949           7959           9          2.0         505.4       1.2X
+Parquet Vectorized                                                                        9447           9478          29          1.7         600.6       1.0X
+Parquet Vectorized (Pushdown)                                                             9462           9486          25          1.7         601.6       1.0X
+Native ORC Vectorized                                                                     7861           7994          77          2.0         499.8       1.2X
+Native ORC Vectorized (Pushdown)                                                          7811           7838          17          2.0         496.6       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as TIMESTAMP_MICROS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               2972           2983          14          5.3         189.0       1.0X
-Parquet Vectorized (Pushdown)                                                                      68             71           3        231.7           4.3      43.8X
-Native ORC Vectorized                                                                            2359           2366           4          6.7         150.0       1.3X
-Native ORC Vectorized (Pushdown)                                                                   38             40           3        416.7           2.4      78.7X
+Parquet Vectorized                                                                               3007           3018           9          5.2         191.2       1.0X
+Parquet Vectorized (Pushdown)                                                                      69             71           3        229.6           4.4      43.9X
+Native ORC Vectorized                                                                            2066           2069           2          7.6         131.4       1.5X
+Native ORC Vectorized (Pushdown)                                                                   39             41           3        399.5           2.5      76.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  3649           3652           4          4.3         232.0       1.0X
-Parquet Vectorized (Pushdown)                                                                       1043           1047           3         15.1          66.3       3.5X
-Native ORC Vectorized                                                                               2989           2989           1          5.3         190.0       1.2X
-Native ORC Vectorized (Pushdown)                                                                     888            892           2         17.7          56.5       4.1X
+Parquet Vectorized                                                                                  3717           3738          27          4.2         236.3       1.0X
+Parquet Vectorized (Pushdown)                                                                       1052           1055           4         14.9          66.9       3.5X
+Native ORC Vectorized                                                                               2695           2702           7          5.8         171.3       1.4X
+Native ORC Vectorized (Pushdown)                                                                     863            867           5         18.2          54.9       4.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  6413           6426           8          2.5         407.7       1.0X
-Parquet Vectorized (Pushdown)                                                                       5009           5049          48          3.1         318.5       1.3X
-Native ORC Vectorized                                                                               5548           5553          11          2.8         352.7       1.2X
-Native ORC Vectorized (Pushdown)                                                                    4359           4368           9          3.6         277.1       1.5X
+Parquet Vectorized                                                                                  6536           6551           9          2.4         415.5       1.0X
+Parquet Vectorized (Pushdown)                                                                       5041           5059          13          3.1         320.5       1.3X
+Native ORC Vectorized                                                                               5201           5223          21          3.0         330.7       1.3X
+Native ORC Vectorized (Pushdown)                                                                    4134           4139           5          3.8         262.9       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                   9114           9138          26          1.7         579.5       1.0X
-Parquet Vectorized (Pushdown)                                                                        8869           8880           9          1.8         563.9       1.0X
-Native ORC Vectorized                                                                                8175           8193          20          1.9         519.8       1.1X
-Native ORC Vectorized (Pushdown)                                                                     7947           7956          11          2.0         505.3       1.1X
+Parquet Vectorized                                                                                   9201           9222          27          1.7         585.0       1.0X
+Parquet Vectorized (Pushdown)                                                                        8940           8961          23          1.8         568.4       1.0X
+Native ORC Vectorized                                                                                7987           8023          27          2.0         507.8       1.2X
+Native ORC Vectorized (Pushdown)                                                                     7792           7808          21          2.0         495.4       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               3001           3005           6          5.2         190.8       1.0X
-Parquet Vectorized (Pushdown)                                                                      68             70           3        232.2           4.3      44.3X
-Native ORC Vectorized                                                                            2359           2362           3          6.7         150.0       1.3X
-Native ORC Vectorized (Pushdown)                                                                   38             40           4        415.7           2.4      79.3X
+Parquet Vectorized                                                                               3037           3044           8          5.2         193.1       1.0X
+Parquet Vectorized (Pushdown)                                                                      68             71           3        230.6           4.3      44.5X
+Native ORC Vectorized                                                                            2068           2082          23          7.6         131.5       1.5X
+Native ORC Vectorized (Pushdown)                                                                   39             42           3        400.5           2.5      77.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  3686           3693           5          4.3         234.4       1.0X
-Parquet Vectorized (Pushdown)                                                                       1044           1048           4         15.1          66.4       3.5X
-Native ORC Vectorized                                                                               2984           2989           4          5.3         189.7       1.2X
-Native ORC Vectorized (Pushdown)                                                                     889            891           2         17.7          56.5       4.1X
+Parquet Vectorized                                                                                  3730           3735           5          4.2         237.1       1.0X
+Parquet Vectorized (Pushdown)                                                                       1047           1052           4         15.0          66.5       3.6X
+Native ORC Vectorized                                                                               2700           2704           4          5.8         171.7       1.4X
+Native ORC Vectorized (Pushdown)                                                                     861            877          22         18.3          54.8       4.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  6454           6464           8          2.4         410.3       1.0X
-Parquet Vectorized (Pushdown)                                                                       5018           5033          26          3.1         319.0       1.3X
-Native ORC Vectorized                                                                               5545           5556           9          2.8         352.5       1.2X
-Native ORC Vectorized (Pushdown)                                                                    4357           4377          14          3.6         277.0       1.5X
+Parquet Vectorized                                                                                  6517           6528          18          2.4         414.4       1.0X
+Parquet Vectorized (Pushdown)                                                                       5046           5050           5          3.1         320.8       1.3X
+Native ORC Vectorized                                                                               5189           5203          15          3.0         329.9       1.3X
+Native ORC Vectorized (Pushdown)                                                                    4131           4148          22          3.8         262.6       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                   9143           9162          28          1.7         581.3       1.0X
-Parquet Vectorized (Pushdown)                                                                        8888           8895           6          1.8         565.1       1.0X
-Native ORC Vectorized                                                                                8163           8178          25          1.9         519.0       1.1X
-Native ORC Vectorized (Pushdown)                                                                     7942           7966          35          2.0         504.9       1.2X
+Parquet Vectorized                                                                                   9216           9225           7          1.7         585.9       1.0X
+Parquet Vectorized (Pushdown)                                                                        8966           8976          15          1.8         570.0       1.0X
+Native ORC Vectorized                                                                                7990           8006          18          2.0         508.0       1.2X
+Native ORC Vectorized (Pushdown)                                                                     7788           7804          19          2.0         495.1       1.2X
 
 
 ================================================================================================
 Pushdown benchmark with many filters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   55             57           2          0.0    55430933.0       1.0X
-Parquet Vectorized (Pushdown)                        56             59           5          0.0    56257088.0       1.0X
-Native ORC Vectorized                                50             52           2          0.0    50120677.0       1.1X
-Native ORC Vectorized (Pushdown)                     52             55           3          0.0    52126525.0       1.1X
+Parquet Vectorized                                   48             50           2          0.0    47822192.0       1.0X
+Parquet Vectorized (Pushdown)                        49             52           4          0.0    48715892.0       1.0X
+Native ORC Vectorized                                43             44           2          0.0    42630483.0       1.1X
+Native ORC Vectorized (Pushdown)                     44             47           4          0.0    44086388.0       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  405            417          11          0.0   404938298.0       1.0X
-Parquet Vectorized (Pushdown)                       407            412           6          0.0   406571487.0       1.0X
-Native ORC Vectorized                               394            397           4          0.0   394366762.0       1.0X
-Native ORC Vectorized (Pushdown)                    397            406           7          0.0   396723685.0       1.0X
+Parquet Vectorized                                  189            197           6          0.0   189302685.0       1.0X
+Parquet Vectorized (Pushdown)                       192            196           5          0.0   191858297.0       1.0X
+Native ORC Vectorized                               182            189           7          0.0   182429398.0       1.0X
+Native ORC Vectorized (Pushdown)                    186            190           3          0.0   185920182.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 2255           2270           9          0.0  2255338602.0       1.0X
-Parquet Vectorized (Pushdown)                      2258           2279          13          0.0  2258126416.0       1.0X
-Native ORC Vectorized                              2244           2260          10          0.0  2243733317.0       1.0X
-Native ORC Vectorized (Pushdown)                   2255           2290          29          0.0  2254729481.0       1.0X
+Parquet Vectorized                                  600            607           9          0.0   599862493.0       1.0X
+Parquet Vectorized (Pushdown)                       606            622          12          0.0   605756895.0       1.0X
+Native ORC Vectorized                               591            600          12          0.0   591069360.0       1.0X
+Native ORC Vectorized (Pushdown)                    595            609           9          0.0   594620092.0       1.0X
 
 
diff --git a/sql/core/benchmarks/GenerateExecBenchmark-jdk21-results.txt b/sql/core/benchmarks/GenerateExecBenchmark-jdk21-results.txt
index bd83ba8858f29..ae1f8694afbf5 100644
--- a/sql/core/benchmarks/GenerateExecBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/GenerateExecBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 GenerateExec benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 GenerateExec Benchmark:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-GenerateExec Benchmark wholestage off             71281          71290          12          1.4         712.8       1.0X
-GenerateExec Benchmark wholestage on              21377          22190         461          4.7         213.8       3.3X
+GenerateExec Benchmark wholestage off             73608          73642          47          1.4         736.1       1.0X
+GenerateExec Benchmark wholestage on              20481          20591         165          4.9         204.8       3.6X
 
 
diff --git a/sql/core/benchmarks/GenerateExecBenchmark-results.txt b/sql/core/benchmarks/GenerateExecBenchmark-results.txt
index 7aaa8fad9e560..6790608ad6b2b 100644
--- a/sql/core/benchmarks/GenerateExecBenchmark-results.txt
+++ b/sql/core/benchmarks/GenerateExecBenchmark-results.txt
@@ -2,11 +2,11 @@
 GenerateExec benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 GenerateExec Benchmark:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-GenerateExec Benchmark wholestage off             73307          73512         290          1.4         733.1       1.0X
-GenerateExec Benchmark wholestage on              24438          24523          84          4.1         244.4       3.0X
+GenerateExec Benchmark wholestage off             72745          72752          10          1.4         727.4       1.0X
+GenerateExec Benchmark wholestage on              23957          24433         339          4.2         239.6       3.0X
 
 
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk21-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk21-results.txt
index 8e47f7e27a85b..e326f00783419 100644
--- a/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk21-results.txt
@@ -2,10 +2,10 @@
 LongToUnsafeRowMap metrics
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-LongToUnsafeRowMap                                  255            259           3          2.0         510.5       1.0X
+LongToUnsafeRowMap                                  266            269           3          1.9         532.1       1.0X
 
 
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
index f201c27de387e..5364545cd8af7 100644
--- a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
@@ -2,10 +2,10 @@
 LongToUnsafeRowMap metrics
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-LongToUnsafeRowMap                                  261            268           6          1.9         521.5       1.0X
+LongToUnsafeRowMap                                  260            264           3          1.9         519.7       1.0X
 
 
diff --git a/sql/core/benchmarks/InExpressionBenchmark-jdk21-results.txt b/sql/core/benchmarks/InExpressionBenchmark-jdk21-results.txt
index e571db07479a0..4ee151d851b96 100644
--- a/sql/core/benchmarks/InExpressionBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/InExpressionBenchmark-jdk21-results.txt
@@ -2,739 +2,739 @@
 In Expression Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 bytes:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        46             61           9        217.9           4.6       1.0X
-InSet expression                                     68             73           6        146.3           6.8       0.7X
+In expression                                        39             52          10        254.0           3.9       1.0X
+InSet expression                                     61             68           5        162.9           6.1       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        36             42           6        275.1           3.6       1.0X
-InSet expression                                     62             66           4        160.3           6.2       0.6X
+In expression                                        37             42           6        267.8           3.7       1.0X
+InSet expression                                     57             62           4        173.9           5.7       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        65             70           6        153.5           6.5       1.0X
-InSet expression                                     77             81           4        130.5           7.7       0.9X
+In expression                                        66             72           5        151.0           6.6       1.0X
+InSet expression                                     82             85           4        122.2           8.2       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       106            111           7         94.6          10.6       1.0X
-InSet expression                                     84             87           3        119.7           8.4       1.3X
+In expression                                       106            111           5         94.1          10.6       1.0X
+InSet expression                                     96            101           6        103.7           9.6       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       184            189           4         54.3          18.4       1.0X
-InSet expression                                     98            102           4        102.1           9.8       1.9X
+In expression                                       185            189           5         54.1          18.5       1.0X
+InSet expression                                    124            128           4         80.4          12.4       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       393            395           2         25.5          39.3       1.0X
-InSet expression                                    187            192           6         53.5          18.7       2.1X
+In expression                                       397            403           9         25.2          39.7       1.0X
+InSet expression                                    187            190           3         53.4          18.7       2.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 shorts:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        29             33           5        345.2           2.9       1.0X
-InSet expression                                     75             79           3        132.5           7.5       0.4X
+In expression                                        32             35           4        315.0           3.2       1.0X
+InSet expression                                     85             88           4        117.4           8.5       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        36             41           5        274.6           3.6       1.0X
-InSet expression                                     92             95           2        109.1           9.2       0.4X
+In expression                                        41             44           5        244.9           4.1       1.0X
+InSet expression                                     98            101           2        101.8           9.8       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        62             66           4        161.1           6.2       1.0X
-InSet expression                                     91             93           2        110.1           9.1       0.7X
+In expression                                        63             65           3        159.0           6.3       1.0X
+InSet expression                                     98            100           2        102.4           9.8       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       119            122           4         84.3          11.9       1.0X
-InSet expression                                    128            129           1         78.4          12.8       0.9X
+In expression                                       120            123           4         83.6          12.0       1.0X
+InSet expression                                    133            137           4         74.9          13.3       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       186            189           5         53.7          18.6       1.0X
-InSet expression                                    114            116           3         87.5          11.4       1.6X
+In expression                                       197            201           4         50.7          19.7       1.0X
+InSet expression                                    120            124           4         83.1          12.0       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       368            372           5         27.2          36.8       1.0X
-InSet expression                                    122            124           1         81.9          12.2       3.0X
+In expression                                       370            372           2         27.1          37.0       1.0X
+InSet expression                                    132            135           3         76.0          13.2       2.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 300 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       551            559           5         18.1          55.1       1.0X
-InSet expression                                    135            138           2         74.0          13.5       4.1X
+In expression                                       556            557           2         18.0          55.6       1.0X
+InSet expression                                    145            148           3         68.9          14.5       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 400 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       734            737           3         13.6          73.4       1.0X
-InSet expression                                    148            151           2         67.5          14.8       4.9X
+In expression                                       731            736           4         13.7          73.1       1.0X
+InSet expression                                    158            160           2         63.4          15.8       4.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 500 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       941            947           5         10.6          94.1       1.0X
-InSet expression                                    162            165           2         61.7          16.2       5.8X
+In expression                                       944            945           2         10.6          94.4       1.0X
+InSet expression                                    172            174           2         58.2          17.2       5.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 shorts (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        27             31           6        374.5           2.7       1.0X
-InSet expression                                     73             75           3        137.7           7.3       0.4X
+In expression                                        27             30           4        372.4           2.7       1.0X
+InSet expression                                     81             84           2        122.8           8.1       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        36             41           7        276.7           3.6       1.0X
-InSet expression                                     91             93           1        109.7           9.1       0.4X
+In expression                                        37             39           3        270.3           3.7       1.0X
+InSet expression                                     98            100           2        102.4           9.8       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        60             64           4        166.6           6.0       1.0X
-InSet expression                                    120            122           1         83.5          12.0       0.5X
+In expression                                        62             63           3        162.1           6.2       1.0X
+InSet expression                                    123            125           1         81.1          12.3       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       111            116           5         89.8          11.1       1.0X
-InSet expression                                    134            137           4         74.8          13.4       0.8X
+In expression                                       112            116          10         89.1          11.2       1.0X
+InSet expression                                    140            142           2         71.3          14.0       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       195            197           3         51.3          19.5       1.0X
-InSet expression                                    116            119           3         85.8          11.6       1.7X
+In expression                                       194            198           4         51.4          19.4       1.0X
+InSet expression                                    123            126           4         81.3          12.3       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       348            351           4         28.7          34.8       1.0X
-InSet expression                                    122            125           2         81.7          12.2       2.8X
+In expression                                       344            347           3         29.1          34.4       1.0X
+InSet expression                                    128            130           2         77.8          12.8       2.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 300 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       513            516           5         19.5          51.3       1.0X
-InSet expression                                    133            135           2         75.1          13.3       3.9X
+In expression                                       514            517           3         19.5          51.4       1.0X
+InSet expression                                    143            146           3         70.2          14.3       3.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 400 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       658            663           4         15.2          65.8       1.0X
-InSet expression                                    146            149           3         68.3          14.6       4.5X
+In expression                                       664            670           6         15.1          66.4       1.0X
+InSet expression                                    156            159           2         64.0          15.6       4.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 500 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       847            853           7         11.8          84.7       1.0X
-InSet expression                                    159            162           2         62.7          15.9       5.3X
+In expression                                       848            851           4         11.8          84.8       1.0X
+InSet expression                                    169            172           2         59.2          16.9       5.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 ints:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        27             30           4        368.5           2.7       1.0X
-InSet expression                                     80             83           3        124.6           8.0       0.3X
+In expression                                        28             30           4        359.2           2.8       1.0X
+InSet expression                                     82             84           2        121.6           8.2       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        35             38           5        285.9           3.5       1.0X
-InSet expression                                     97             99           1        103.0           9.7       0.4X
+In expression                                        39             41           3        259.6           3.9       1.0X
+InSet expression                                     99            101           2        101.1           9.9       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        63             65           3        157.7           6.3       1.0X
-InSet expression                                     97            100           4        102.8           9.7       0.7X
+In expression                                        61             63           3        164.4           6.1       1.0X
+InSet expression                                     99            102           2        101.0           9.9       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       116            119           4         86.1          11.6       1.0X
-InSet expression                                    135            137           1         74.3          13.5       0.9X
+In expression                                       111            112           3         89.9          11.1       1.0X
+InSet expression                                    136            138           2         73.4          13.6       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       184            197           6         54.5          18.4       1.0X
-InSet expression                                    117            119           2         85.4          11.7       1.6X
+In expression                                       196            199           4         50.9          19.6       1.0X
+InSet expression                                    118            121           2         84.4          11.8       1.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       342            351           8         29.2          34.2       1.0X
-InSet expression                                    124            126           1         80.6          12.4       2.8X
+In expression                                       360            364           3         27.8          36.0       1.0X
+InSet expression                                    127            131           6         78.8          12.7       2.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 300 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       511            518           9         19.6          51.1       1.0X
-InSet expression                                    136            140           3         73.3          13.6       3.7X
+In expression                                       514            517           3         19.4          51.4       1.0X
+InSet expression                                    139            141           2         72.0          13.9       3.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 400 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       668            675           9         15.0          66.8       1.0X
-InSet expression                                    149            152           4         67.1          14.9       4.5X
+In expression                                       673            687          16         14.8          67.3       1.0X
+InSet expression                                    151            153           2         66.4          15.1       4.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 500 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       826            831           3         12.1          82.6       1.0X
-InSet expression                                    161            163           1         62.2          16.1       5.1X
+In expression                                       833            836           3         12.0          83.3       1.0X
+InSet expression                                    163            166           3         61.5          16.3       5.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 ints (non-compact):                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        20             24           4        497.1           2.0       1.0X
-InSet expression                                     77             80           1        129.1           7.7       0.3X
+In expression                                        21             25           4        466.0           2.1       1.0X
+InSet expression                                     79             81           1        126.7           7.9       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        28             31           4        356.5           2.8       1.0X
-InSet expression                                     94             96           1        105.9           9.4       0.3X
+In expression                                        30             32           5        335.1           3.0       1.0X
+InSet expression                                     96             98           2        104.5           9.6       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        56             60           4        177.3           5.6       1.0X
-InSet expression                                    108            110           1         92.5          10.8       0.5X
+In expression                                        58             60           3        171.6           5.8       1.0X
+InSet expression                                    109            111           2         91.5          10.9       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       100            102           3         99.7          10.0       1.0X
-InSet expression                                    133            135           1         75.1          13.3       0.8X
+In expression                                       101            103           3         98.6          10.1       1.0X
+InSet expression                                    134            136           2         74.6          13.4       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       179            182           4         55.7          17.9       1.0X
-InSet expression                                    120            123           3         83.2          12.0       1.5X
+In expression                                       180            182           3         55.5          18.0       1.0X
+InSet expression                                    121            124           4         82.4          12.1       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       337            347           7         29.6          33.7       1.0X
-InSet expression                                    127            131           9         78.9          12.7       2.7X
+In expression                                       339            344           3         29.5          33.9       1.0X
+InSet expression                                    127            130           2         78.4          12.7       2.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 300 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       506            517          21         19.8          50.6       1.0X
-InSet expression                                    135            139           4         73.8          13.5       3.7X
+In expression                                       507            507           0         19.7          50.7       1.0X
+InSet expression                                    138            140           2         72.3          13.8       3.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 400 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       666            672           4         15.0          66.6       1.0X
-InSet expression                                    148            152           3         67.4          14.8       4.5X
+In expression                                       664            675          10         15.1          66.4       1.0X
+InSet expression                                    151            153           1         66.2          15.1       4.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 500 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       831            860          62         12.0          83.1       1.0X
-InSet expression                                    159            162           1         62.7          15.9       5.2X
+In expression                                       833            867          63         12.0          83.3       1.0X
+InSet expression                                    162            165           2         61.8          16.2       5.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 longs:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        19             20           2        526.2           1.9       1.0X
-InSet expression                                     86             87           1        116.9           8.6       0.2X
+In expression                                        20             22           3        501.8           2.0       1.0X
+InSet expression                                     89             90           1        113.0           8.9       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        28             31           3        352.8           2.8       1.0X
-InSet expression                                    101            103           2         98.9          10.1       0.3X
+In expression                                        30             33           4        334.8           3.0       1.0X
+InSet expression                                    105            107           2         95.6          10.5       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        53             56           3        190.2           5.3       1.0X
-InSet expression                                    101            103           2         98.9          10.1       0.5X
+In expression                                        54             58           5        184.2           5.4       1.0X
+InSet expression                                    104            106           2         96.5          10.4       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        92             94           3        108.2           9.2       1.0X
-InSet expression                                    135            138           2         73.8          13.5       0.7X
+In expression                                        93             95           3        107.1           9.3       1.0X
+InSet expression                                    137            139           3         73.2          13.7       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       171            173           2         58.4          17.1       1.0X
-InSet expression                                    121            123           2         82.4          12.1       1.4X
+In expression                                       172            173           3         58.1          17.2       1.0X
+InSet expression                                    120            122           2         83.2          12.0       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       328            331           3         30.5          32.8       1.0X
-InSet expression                                    129            131           2         77.7          12.9       2.5X
+In expression                                       332            337           6         30.2          33.2       1.0X
+InSet expression                                    129            131           2         77.8          12.9       2.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 floats:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        36             40           4        274.9           3.6       1.0X
-InSet expression                                     77             81           8        130.3           7.7       0.5X
+In expression                                        36             40           4        281.6           3.6       1.0X
+InSet expression                                     78             80           2        127.9           7.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        61             63           3        164.9           6.1       1.0X
-InSet expression                                     98             99           1        102.5           9.8       0.6X
+In expression                                        61             65           3        164.8           6.1       1.0X
+InSet expression                                    100            102           2         99.6          10.0       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       147            150           3         67.8          14.7       1.0X
-InSet expression                                     99            101           1        101.1           9.9       1.5X
+In expression                                       149            151           3         67.0          14.9       1.0X
+InSet expression                                    100            102           2        100.1          10.0       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       227            232           6         44.0          22.7       1.0X
-InSet expression                                    144            146           2         69.5          14.4       1.6X
+In expression                                       231            234           3         43.3          23.1       1.0X
+InSet expression                                    146            147           2         68.7          14.6       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       384            387           4         26.0          38.4       1.0X
-InSet expression                                    116            118           1         86.0          11.6       3.3X
+In expression                                       387            389           2         25.9          38.7       1.0X
+InSet expression                                    117            119           2         85.5          11.7       3.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1895           1931          68          5.3         189.5       1.0X
-InSet expression                                    120            122           2         83.6          12.0      15.9X
+In expression                                      1905           1963          66          5.2         190.5       1.0X
+InSet expression                                    122            123           1         81.9          12.2      15.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 doubles:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        36             39           3        281.1           3.6       1.0X
-InSet expression                                     77             80           2        129.7           7.7       0.5X
+In expression                                        39             40           3        258.5           3.9       1.0X
+InSet expression                                     79             81           2        126.5           7.9       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        61             64           4        163.7           6.1       1.0X
-InSet expression                                     98            100           1        101.8           9.8       0.6X
+In expression                                        62             63           3        161.7           6.2       1.0X
+InSet expression                                    100            102           2         99.8          10.0       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       151            153           3         66.3          15.1       1.0X
-InSet expression                                    100            103           3         99.9          10.0       1.5X
+In expression                                       150            152           3         66.7          15.0       1.0X
+InSet expression                                     99            101           4        101.1           9.9       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       227            232          10         44.0          22.7       1.0X
-InSet expression                                    143            145           1         70.1          14.3       1.6X
+In expression                                       231            233           2         43.3          23.1       1.0X
+InSet expression                                    147            149           4         68.1          14.7       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       386            389           3         25.9          38.6       1.0X
-InSet expression                                    116            117           1         86.2          11.6       3.3X
+In expression                                       390            391           2         25.6          39.0       1.0X
+InSet expression                                    119            121           2         84.3          11.9       3.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      2266           2298          49          4.4         226.6       1.0X
-InSet expression                                    119            121           1         83.9          11.9      19.0X
+In expression                                      2236           2268          70          4.5         223.6       1.0X
+InSet expression                                    122            124           1         81.8          12.2      18.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 small decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        20             22           3         49.4          20.2       1.0X
-InSet expression                                     59             61           2         17.0          58.8       0.3X
+In expression                                        21             22           2         47.2          21.2       1.0X
+InSet expression                                     58             60           2         17.3          58.0       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        26             28           3         37.9          26.4       1.0X
-InSet expression                                     61             63           2         16.3          61.3       0.4X
+In expression                                        27             29           2         37.0          27.0       1.0X
+InSet expression                                     60             62           2         16.6          60.1       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        40             43           3         24.7          40.4       1.0X
-InSet expression                                     62             65           4         16.2          61.7       0.7X
+In expression                                        42             43           2         23.9          41.8       1.0X
+InSet expression                                     61             63           1         16.3          61.2       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        79             81           3         12.6          79.4       1.0X
-InSet expression                                     67             69           2         14.8          67.4       1.2X
+In expression                                        80             82           2         12.5          80.3       1.0X
+InSet expression                                     65             67           1         15.3          65.4       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       240            245           7          4.2         240.0       1.0X
-InSet expression                                     65             68           4         15.4          65.1       3.7X
+In expression                                       241            243           3          4.1         241.2       1.0X
+InSet expression                                     64             66           3         15.6          64.1       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       572            576           5          1.7         571.9       1.0X
-InSet expression                                     66             68           1         15.1          66.4       8.6X
+In expression                                       581            582           1          1.7         580.6       1.0X
+InSet expression                                     66             68           3         15.1          66.2       8.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 large decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           2        199.2           5.0       1.0X
-InSet expression                                      5              6           2        211.3           4.7       1.1X
+In expression                                         5              6           2        201.8           5.0       1.0X
+InSet expression                                      5              6           2        211.8           4.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           2        205.8           4.9       1.0X
-InSet expression                                      5              5           2        210.7           4.7       1.0X
+In expression                                         5              6           2        207.5           4.8       1.0X
+InSet expression                                      5              6           2        207.7           4.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           2        194.7           5.1       1.0X
-InSet expression                                      5              6           2        191.2           5.2       1.0X
+In expression                                         5              7           3        193.2           5.2       1.0X
+InSet expression                                      5              6           2        190.2           5.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         6              7           2        167.7           6.0       1.0X
-InSet expression                                      6              7           2        167.6           6.0       1.0X
+In expression                                         6              7           2        167.4           6.0       1.0X
+InSet expression                                      6              7           2        168.1           5.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         7              8           2        134.1           7.5       1.0X
-InSet expression                                      7              8           2        135.4           7.4       1.0X
+In expression                                         8              9           3        132.3           7.6       1.0X
+InSet expression                                      8              9           3        133.1           7.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        11             11           2         94.8          10.6       1.0X
-InSet expression                                     11             11           1         95.0          10.5       1.0X
+In expression                                        11             13           3         91.4          10.9       1.0X
+InSet expression                                     11             13           3         93.3          10.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 strings:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        29             30           1         34.3          29.1       1.0X
-InSet expression                                     43             45           2         23.1          43.3       0.7X
+In expression                                        29             34           5         34.2          29.3       1.0X
+InSet expression                                     43             46           3         23.2          43.1       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             36           2         29.0          34.5       1.0X
-InSet expression                                     46             47           1         21.9          45.6       0.8X
+In expression                                        34             38           4         29.4          34.1       1.0X
+InSet expression                                     46             50           3         21.9          45.7       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        44             45           2         23.0          43.5       1.0X
-InSet expression                                     50             51           1         19.9          50.3       0.9X
+In expression                                        43             47           3         23.2          43.1       1.0X
+InSet expression                                     60             62           3         16.5          60.5       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        58             60           2         17.1          58.4       1.0X
-InSet expression                                     54             55           1         18.5          54.2       1.1X
+In expression                                        59             59           1         17.0          58.7       1.0X
+InSet expression                                     54             54           1         18.6          53.7       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        90             92           2         11.1          90.4       1.0X
-InSet expression                                     51             53           1         19.6          51.1       1.8X
+In expression                                        89             90           2         11.2          89.4       1.0X
+InSet expression                                     51             51           1         19.7          50.8       1.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       408            413           5          2.4         408.2       1.0X
-InSet expression                                     51             53           2         19.6          50.9       8.0X
+In expression                                       408            410           4          2.5         407.6       1.0X
+InSet expression                                     52             52           1         19.4          51.6       7.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 timestamps:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        17             18           2        593.9           1.7       1.0X
-InSet expression                                     81             83           2        123.5           8.1       0.2X
+In expression                                        17             19           2        579.0           1.7       1.0X
+InSet expression                                     84             85           2        118.9           8.4       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        23             25           4        442.1           2.3       1.0X
-InSet expression                                     95             96           1        105.7           9.5       0.2X
+In expression                                        23             24           2        435.8           2.3       1.0X
+InSet expression                                     97             98           1        103.3           9.7       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        48             50           2        209.9           4.8       1.0X
-InSet expression                                    128            130           1         78.1          12.8       0.4X
+In expression                                        49             49           2        206.1           4.9       1.0X
+InSet expression                                    129            130           1         77.8          12.9       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        81             84           3        123.3           8.1       1.0X
-InSet expression                                    161            163           1         62.0          16.1       0.5X
+In expression                                        82             83           2        122.1           8.2       1.0X
+InSet expression                                    160            162           1         62.5          16.0       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       152            154           2         65.6          15.2       1.0X
-InSet expression                                    137            138           1         73.0          13.7       1.1X
+In expression                                       153            154           2         65.3          15.3       1.0X
+InSet expression                                    138            140           1         72.4          13.8       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       295            306          11         33.9          29.5       1.0X
-InSet expression                                    133            134           1         75.4          13.3       2.2X
+In expression                                       296            308          11         33.8          29.6       1.0X
+InSet expression                                    134            136           1         74.7          13.4       2.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 dates:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       304            306           3         32.9          30.4       1.0X
-InSet expression                                    300            303           3         33.3          30.0       1.0X
+In expression                                       288            289           1         34.7          28.8       1.0X
+InSet expression                                    284            288           3         35.2          28.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       307            313           4         32.6          30.7       1.0X
-InSet expression                                    300            302           2         33.3          30.0       1.0X
+In expression                                       290            294           2         34.5          29.0       1.0X
+InSet expression                                    284            287           2         35.2          28.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       344            345           1         29.1          34.4       1.0X
-InSet expression                                    300            301           1         33.4          30.0       1.1X
+In expression                                       328            329           1         30.5          32.8       1.0X
+InSet expression                                    287            289           2         34.8          28.7       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       387            393           7         25.9          38.7       1.0X
-InSet expression                                    300            302           1         33.3          30.0       1.3X
+In expression                                       381            389          17         26.3          38.1       1.0X
+InSet expression                                    285            289           4         35.1          28.5       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       488            489           1         20.5          48.8       1.0X
-InSet expression                                    300            305           3         33.3          30.0       1.6X
+In expression                                       474            477           3         21.1          47.4       1.0X
+InSet expression                                    287            289           1         34.9          28.7       1.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       643            646           3         15.6          64.3       1.0X
-InSet expression                                    303            305           2         33.0          30.3       2.1X
+In expression                                       618            620           1         16.2          61.8       1.0X
+InSet expression                                    287            290           3         34.8          28.7       2.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 300 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       803            805           2         12.5          80.3       1.0X
-InSet expression                                    305            306           1         32.8          30.5       2.6X
+In expression                                       789            793           3         12.7          78.9       1.0X
+InSet expression                                    291            295           3         34.3          29.1       2.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 400 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       960            972           8         10.4          96.0       1.0X
-InSet expression                                    306            308           2         32.7          30.6       3.1X
+In expression                                       952            973          27         10.5          95.2       1.0X
+InSet expression                                    292            294           2         34.2          29.2       3.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 500 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1122           1145          26          8.9         112.2       1.0X
-InSet expression                                    371            374           2         27.0          37.1       3.0X
+In expression                                      1110           1118           7          9.0         111.0       1.0X
+InSet expression                                    369            371           2         27.1          36.9       3.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 arrays:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        26             27           2         38.8          25.8       1.0X
-InSet expression                                     58             60           2         17.2          58.2       0.4X
+In expression                                        27             28           2         37.7          26.5       1.0X
+InSet expression                                     59             60           2         17.0          58.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        41             43           1         24.3          41.1       1.0X
-InSet expression                                     87             90           4         11.5          87.1       0.5X
+In expression                                        42             43           2         24.1          41.6       1.0X
+InSet expression                                     87             89           2         11.5          86.9       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       162            165           3          6.2         162.3       1.0X
-InSet expression                                    102            104           2          9.8         101.6       1.6X
+In expression                                       172            174           3          5.8         171.8       1.0X
+InSet expression                                    102            103           1          9.8         101.6       1.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       393            400          12          2.5         393.1       1.0X
-InSet expression                                    130            134           3          7.7         130.4       3.0X
+In expression                                       399            401           2          2.5         398.8       1.0X
+InSet expression                                    131            133           1          7.7         130.7       3.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       818            822           4          1.2         818.1       1.0X
-InSet expression                                    146            149           2          6.8         146.5       5.6X
+In expression                                       780            785           5          1.3         779.7       1.0X
+InSet expression                                    146            149           3          6.8         146.3       5.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1772           1922         315          0.6        1772.2       1.0X
-InSet expression                                    164            167           2          6.1         164.2      10.8X
+In expression                                      1715           1871         325          0.6        1714.6       1.0X
+InSet expression                                    164            166           2          6.1         163.6      10.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 structs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        17             18           1         58.3          17.1       1.0X
-InSet expression                                     83             87           9         12.1          82.9       0.2X
+In expression                                        17             20           3         57.7          17.3       1.0X
+InSet expression                                     87             90           6         11.5          86.9       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        24             25           1         42.0          23.8       1.0X
-InSet expression                                    126            128           1          7.9         126.2       0.2X
+In expression                                        25             27           2         40.8          24.5       1.0X
+InSet expression                                    134            136           2          7.5         133.9       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        71             73           1         14.1          71.1       1.0X
-InSet expression                                    149            150           1          6.7         148.8       0.5X
+In expression                                        72             73           1         13.8          72.2       1.0X
+InSet expression                                    157            163          14          6.4         156.9       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       191            192           1          5.2         191.2       1.0X
-InSet expression                                    190            194           4          5.3         189.9       1.0X
+In expression                                       198            199           2          5.0         198.0       1.0X
+InSet expression                                    202            204           2          5.0         202.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       564            582          35          1.8         563.9       1.0X
-InSet expression                                    214            217           2          4.7         214.2       2.6X
+In expression                                       555            573          35          1.8         554.6       1.0X
+InSet expression                                    229            233           3          4.4         229.2       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1282           1479         291          0.8        1282.4       1.0X
-InSet expression                                    243            252           7          4.1         243.3       5.3X
+In expression                                      1395           1508         239          0.7        1395.2       1.0X
+InSet expression                                    262            264           2          3.8         261.5       5.3X
 
 
diff --git a/sql/core/benchmarks/InExpressionBenchmark-results.txt b/sql/core/benchmarks/InExpressionBenchmark-results.txt
index 5178c51124c4f..539cb9a5060c9 100644
--- a/sql/core/benchmarks/InExpressionBenchmark-results.txt
+++ b/sql/core/benchmarks/InExpressionBenchmark-results.txt
@@ -2,739 +2,739 @@
 In Expression Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 bytes:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        43             61          11        230.8           4.3       1.0X
-InSet expression                                     88             94           7        113.6           8.8       0.5X
+In expression                                        48             60           8        210.3           4.8       1.0X
+InSet expression                                     85             92           6        117.9           8.5       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        36             42           5        279.2           3.6       1.0X
-InSet expression                                     82             86           4        121.3           8.2       0.4X
+In expression                                        38             43           6        263.4           3.8       1.0X
+InSet expression                                     79             84           4        125.8           7.9       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        65             69           3        154.3           6.5       1.0X
-InSet expression                                     83             87           4        121.0           8.3       0.8X
+In expression                                        67             70           4        149.6           6.7       1.0X
+InSet expression                                     84             88           3        119.3           8.4       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       105            107           2         95.4          10.5       1.0X
-InSet expression                                     88             91           3        113.8           8.8       1.2X
+In expression                                       107            110           4         93.9          10.7       1.0X
+InSet expression                                     89             92           3        112.7           8.9       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       193            196           5         51.8          19.3       1.0X
-InSet expression                                     98            100           3        102.2           9.8       2.0X
+In expression                                       194            198           4         51.5          19.4       1.0X
+InSet expression                                     99            102           3        101.5           9.9       2.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       388            390           2         25.8          38.8       1.0X
-InSet expression                                    178            180           2         56.1          17.8       2.2X
+In expression                                       397            401           3         25.2          39.7       1.0X
+InSet expression                                    191            194           5         52.5          19.1       2.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 shorts:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        30             34           4        330.9           3.0       1.0X
-InSet expression                                     91             95           3        109.5           9.1       0.3X
+In expression                                        32             35           4        316.4           3.2       1.0X
+InSet expression                                     96            100           2        104.6           9.6       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        38             42           6        259.9           3.8       1.0X
-InSet expression                                    106            109           4         94.7          10.6       0.4X
+In expression                                        37             40           4        268.0           3.7       1.0X
+InSet expression                                    109            113           4         91.4          10.9       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        61             65           6        164.1           6.1       1.0X
-InSet expression                                    106            108           2         94.6          10.6       0.6X
+In expression                                        63             65           3        159.7           6.3       1.0X
+InSet expression                                    109            116          18         91.6          10.9       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       116            118           2         86.3          11.6       1.0X
-InSet expression                                    136            138           2         73.4          13.6       0.9X
+In expression                                       118            120           2         84.6          11.8       1.0X
+InSet expression                                    139            142           3         72.0          13.9       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       197            199           3         50.8          19.7       1.0X
-InSet expression                                    119            121           1         84.0          11.9       1.7X
+In expression                                       186            188           2         53.8          18.6       1.0X
+InSet expression                                    122            126           3         81.7          12.2       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       364            367           3         27.5          36.4       1.0X
-InSet expression                                    126            128           1         79.3          12.6       2.9X
+In expression                                       366            368           3         27.3          36.6       1.0X
+InSet expression                                    133            135           1         75.3          13.3       2.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 300 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       548            551           3         18.2          54.8       1.0X
-InSet expression                                    139            143           3         71.8          13.9       3.9X
+In expression                                       551            555           7         18.1          55.1       1.0X
+InSet expression                                    145            147           2         69.0          14.5       3.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 400 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       666            672           4         15.0          66.6       1.0X
-InSet expression                                    153            155           3         65.3          15.3       4.4X
+In expression                                       664            785          87         15.1          66.4       1.0X
+InSet expression                                    157            161           2         63.6          15.7       4.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 500 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       934            938           5         10.7          93.4       1.0X
-InSet expression                                    165            168           2         60.4          16.5       5.6X
+In expression                                       884            892           5         11.3          88.4       1.0X
+InSet expression                                    170            173           3         58.7          17.0       5.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 shorts (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        28             30           2        352.5           2.8       1.0X
-InSet expression                                     87             89           4        114.7           8.7       0.3X
+In expression                                        30             32           3        334.7           3.0       1.0X
+InSet expression                                     92             94           3        108.7           9.2       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             37           4        291.1           3.4       1.0X
-InSet expression                                    106            108           2         94.2          10.6       0.3X
+In expression                                        36             39           4        277.4           3.6       1.0X
+InSet expression                                    109            110           1         91.7          10.9       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        69             71           1        144.2           6.9       1.0X
-InSet expression                                    128            133           8         78.0          12.8       0.5X
+In expression                                        70             72           2        142.4           7.0       1.0X
+InSet expression                                    131            133           1         76.3          13.1       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       105            107           2         95.0          10.5       1.0X
-InSet expression                                    143            145           2         70.1          14.3       0.7X
+In expression                                       106            107           2         94.6          10.6       1.0X
+InSet expression                                    146            148           1         68.5          14.6       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       195            197           2         51.2          19.5       1.0X
-InSet expression                                    123            127           5         81.4          12.3       1.6X
+In expression                                       196            197           2         51.1          19.6       1.0X
+InSet expression                                    126            130           5         79.6          12.6       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       353            356           4         28.3          35.3       1.0X
-InSet expression                                    127            129           1         78.7          12.7       2.8X
+In expression                                       354            357           4         28.3          35.4       1.0X
+InSet expression                                    130            132           1         76.9          13.0       2.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 300 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       500            502           3         20.0          50.0       1.0X
-InSet expression                                    137            142           7         73.1          13.7       3.7X
+In expression                                       501            504           4         20.0          50.1       1.0X
+InSet expression                                    143            148           5         70.0          14.3       3.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 400 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       666            670           4         15.0          66.6       1.0X
-InSet expression                                    151            154           2         66.1          15.1       4.4X
+In expression                                       665            667           4         15.0          66.5       1.0X
+InSet expression                                    155            158           1         64.4          15.5       4.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 500 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       854            858           4         11.7          85.4       1.0X
-InSet expression                                    162            164           1         61.9          16.2       5.3X
+In expression                                       858            861           3         11.7          85.8       1.0X
+InSet expression                                    167            171           4         59.9          16.7       5.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 ints:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        27             29           3        372.2           2.7       1.0X
-InSet expression                                     89             92           2        111.7           8.9       0.3X
+In expression                                        28             30           2        356.5           2.8       1.0X
+InSet expression                                     91             93           2        109.8           9.1       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             39           2        291.3           3.4       1.0X
-InSet expression                                    107            109           1         93.5          10.7       0.3X
+In expression                                        35             37           2        283.2           3.5       1.0X
+InSet expression                                    107            110           2         93.1          10.7       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        60             65           9        166.2           6.0       1.0X
-InSet expression                                    109            112           2         91.9          10.9       0.6X
+In expression                                        62             66          11        160.7           6.2       1.0X
+InSet expression                                    110            113           3         90.6          11.0       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       103            104           1         97.2          10.3       1.0X
-InSet expression                                    140            144           8         71.5          14.0       0.7X
+In expression                                       105            106           1         95.4          10.5       1.0X
+InSet expression                                    142            144           1         70.3          14.2       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       183            184           3         54.8          18.3       1.0X
-InSet expression                                    119            121           1         83.9          11.9       1.5X
+In expression                                       195            195           1         51.4          19.5       1.0X
+InSet expression                                    122            124           2         81.7          12.2       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       353            355           2         28.3          35.3       1.0X
-InSet expression                                    122            127           3         81.6          12.2       2.9X
+In expression                                       360            364           3         27.8          36.0       1.0X
+InSet expression                                    130            132           2         76.7          13.0       2.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 300 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       508            510           3         19.7          50.8       1.0X
-InSet expression                                    135            140           8         74.0          13.5       3.8X
+In expression                                       509            514           8         19.6          50.9       1.0X
+InSet expression                                    142            143           1         70.5          14.2       3.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 400 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       668            692          33         15.0          66.8       1.0X
-InSet expression                                    147            149           2         68.2          14.7       4.6X
+In expression                                       668            683          23         15.0          66.8       1.0X
+InSet expression                                    153            155           1         65.4          15.3       4.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 500 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       835            841           8         12.0          83.5       1.0X
-InSet expression                                    160            162           2         62.6          16.0       5.2X
+In expression                                       830            854          45         12.1          83.0       1.0X
+InSet expression                                    165            167           1         60.8          16.5       5.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 ints (non-compact):                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        22             24           3        456.0           2.2       1.0X
-InSet expression                                     86             89           4        116.4           8.6       0.3X
+In expression                                        23             24           2        444.2           2.3       1.0X
+InSet expression                                     88             91           3        113.3           8.8       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        29             31           3        350.9           2.9       1.0X
-InSet expression                                    103            105           3         97.1          10.3       0.3X
+In expression                                        29             31           2        345.8           2.9       1.0X
+InSet expression                                    104            106           1         95.9          10.4       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        56             57           2        179.0           5.6       1.0X
-InSet expression                                    118            120           1         84.6          11.8       0.5X
+In expression                                        57             58           2        176.4           5.7       1.0X
+InSet expression                                    119            121           1         83.8          11.9       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       100            101           1        100.0          10.0       1.0X
-InSet expression                                    138            140           2         72.6          13.8       0.7X
+In expression                                       101            102           1         99.2          10.1       1.0X
+InSet expression                                    139            142           3         71.7          13.9       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       179            181           2         55.8          17.9       1.0X
-InSet expression                                    122            124           1         82.2          12.2       1.5X
+In expression                                       180            181           2         55.6          18.0       1.0X
+InSet expression                                    125            128           4         80.1          12.5       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       344            347           3         29.1          34.4       1.0X
-InSet expression                                    126            128           2         79.7          12.6       2.7X
+In expression                                       346            350           4         28.9          34.6       1.0X
+InSet expression                                    130            131           2         77.1          13.0       2.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 300 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       505            506           2         19.8          50.5       1.0X
-InSet expression                                    136            139           2         73.5          13.6       3.7X
+In expression                                       506            508           3         19.8          50.6       1.0X
+InSet expression                                    141            144           2         71.0          14.1       3.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 400 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       661            665           3         15.1          66.1       1.0X
-InSet expression                                    147            149           1         68.1          14.7       4.5X
+In expression                                       658            665           4         15.2          65.8       1.0X
+InSet expression                                    153            155           2         65.5          15.3       4.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 500 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       818            856          78         12.2          81.8       1.0X
-InSet expression                                    159            161           3         63.0          15.9       5.2X
+In expression                                       821            858          76         12.2          82.1       1.0X
+InSet expression                                    164            166           1         61.1          16.4       5.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 longs:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        22             24           4        459.0           2.2       1.0X
-InSet expression                                     82             86           5        121.2           8.2       0.3X
+In expression                                        22             24           3        456.1           2.2       1.0X
+InSet expression                                     88             90           2        113.9           8.8       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        28             29           2        355.6           2.8       1.0X
-InSet expression                                     99            101           1        101.1           9.9       0.3X
+In expression                                        30             31           2        337.8           3.0       1.0X
+InSet expression                                    103            106           2         96.8          10.3       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        53             55           2        190.1           5.3       1.0X
-InSet expression                                    102            105           5         98.5          10.2       0.5X
+In expression                                        55             56           2        180.8           5.5       1.0X
+InSet expression                                    106            108           2         94.5          10.6       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        91             94           3        109.4           9.1       1.0X
-InSet expression                                    132            134           1         75.7          13.2       0.7X
+In expression                                        95             98           2        105.3           9.5       1.0X
+InSet expression                                    136            139           4         73.5          13.6       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       175            183          10         57.2          17.5       1.0X
-InSet expression                                    112            114           2         89.5          11.2       1.6X
+In expression                                       172            177           5         58.0          17.2       1.0X
+InSet expression                                    116            119           4         86.3          11.6       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       327            332           4         30.6          32.7       1.0X
-InSet expression                                    119            128          18         84.3          11.9       2.8X
+In expression                                       330            347          10         30.3          33.0       1.0X
+InSet expression                                    125            127           2         80.2          12.5       2.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 floats:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        35             37           3        286.9           3.5       1.0X
-InSet expression                                    113            115           1         88.8          11.3       0.3X
+In expression                                        37             38           3        273.6           3.7       1.0X
+InSet expression                                    114            116           2         87.4          11.4       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        64             65           2        157.2           6.4       1.0X
-InSet expression                                    143            148          10         70.2          14.3       0.4X
+In expression                                        65             66           1        154.2           6.5       1.0X
+InSet expression                                    143            145           3         70.1          14.3       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       125            127           2         79.8          12.5       1.0X
-InSet expression                                    143            147           5         70.1          14.3       0.9X
+In expression                                       128            130           3         78.1          12.8       1.0X
+InSet expression                                    144            146           3         69.4          14.4       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       208            210           1         48.0          20.8       1.0X
-InSet expression                                    188            190           2         53.3          18.8       1.1X
+In expression                                       210            211           2         47.7          21.0       1.0X
+InSet expression                                    191            192           1         52.3          19.1       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       365            369           6         27.4          36.5       1.0X
-InSet expression                                    148            154          15         67.6          14.8       2.5X
+In expression                                       367            369           4         27.3          36.7       1.0X
+InSet expression                                    149            151           2         67.1          14.9       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1682           1771         104          5.9         168.2       1.0X
-InSet expression                                    148            150           1         67.5          14.8      11.3X
+In expression                                      1682           1789          90          5.9         168.2       1.0X
+InSet expression                                    151            152           1         66.2          15.1      11.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 doubles:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        35             36           2        286.9           3.5       1.0X
-InSet expression                                     95             97           2        105.0           9.5       0.4X
+In expression                                        41             42           3        246.1           4.1       1.0X
+InSet expression                                    116            117           2         86.6          11.6       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        63             64           2        159.6           6.3       1.0X
-InSet expression                                    116            118           1         85.9          11.6       0.5X
+In expression                                        63             65           6        158.1           6.3       1.0X
+InSet expression                                    144            147           3         69.3          14.4       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       127            129           1         78.6          12.7       1.0X
-InSet expression                                    118            120           1         84.9          11.8       1.1X
+In expression                                       128            129           2         78.2          12.8       1.0X
+InSet expression                                    144            146           4         69.6          14.4       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       207            210           3         48.3          20.7       1.0X
-InSet expression                                    155            158           1         64.3          15.5       1.3X
+In expression                                       210            210           2         47.7          21.0       1.0X
+InSet expression                                    196            198           3         51.1          19.6       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       364            370          12         27.4          36.4       1.0X
-InSet expression                                    126            129           3         79.3          12.6       2.9X
+In expression                                       367            367           1         27.3          36.7       1.0X
+InSet expression                                    152            154           1         65.6          15.2       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1920           1928           7          5.2         192.0       1.0X
-InSet expression                                    134            139           5         74.8          13.4      14.4X
+In expression                                      1925           2082         125          5.2         192.5       1.0X
+InSet expression                                    155            158           2         64.3          15.5      12.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 small decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        20             21           3         51.3          19.5       1.0X
-InSet expression                                     56             57           1         17.9          56.0       0.3X
+In expression                                        21             23           2         47.8          20.9       1.0X
+InSet expression                                     58             61           7         17.2          58.1       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        25             27           2         40.2          24.9       1.0X
-InSet expression                                     58             60           1         17.3          57.9       0.4X
+In expression                                        26             28           2         38.7          25.9       1.0X
+InSet expression                                     60             63           2         16.5          60.5       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        40             41           2         25.3          39.5       1.0X
-InSet expression                                     58             61           4         17.3          58.0       0.7X
+In expression                                        42             43           2         24.1          41.5       1.0X
+InSet expression                                     61             63           2         16.4          60.8       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        76             77           1         13.2          75.8       1.0X
-InSet expression                                     62             64           1         16.1          62.2       1.2X
+In expression                                        80             80           1         12.6          79.6       1.0X
+InSet expression                                     65             67           1         15.3          65.3       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       234            236           4          4.3         234.3       1.0X
-InSet expression                                     60             62           2         16.7          60.0       3.9X
+In expression                                       237            238           2          4.2         236.9       1.0X
+InSet expression                                     63             65           2         15.9          62.8       3.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       548            605         125          1.8         547.7       1.0X
-InSet expression                                     63             65           4         15.9          62.7       8.7X
+In expression                                       555            611         124          1.8         554.9       1.0X
+InSet expression                                     65             68           4         15.4          64.8       8.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 large decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           1        202.4           4.9       1.0X
-InSet expression                                      5              6           1        207.4           4.8       1.0X
+In expression                                         5              6           2        194.4           5.1       1.0X
+InSet expression                                      5              6           2        205.4           4.9       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           1        201.8           5.0       1.0X
-InSet expression                                      5              5           1        203.6           4.9       1.0X
+In expression                                         5              6           2        199.6           5.0       1.0X
+InSet expression                                      5              6           1        205.5           4.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           1        187.1           5.3       1.0X
-InSet expression                                      5              6           1        185.3           5.4       1.0X
+In expression                                         5              7           2        182.1           5.5       1.0X
+InSet expression                                      5              6           1        187.6           5.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         6              7           1        163.5           6.1       1.0X
-InSet expression                                      6              7           1        161.6           6.2       1.0X
+In expression                                         6              7           2        160.2           6.2       1.0X
+InSet expression                                      6              7           1        160.7           6.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         8              8           1        127.7           7.8       1.0X
-InSet expression                                      8              9           2        127.7           7.8       1.0X
+In expression                                         8              9           1        125.2           8.0       1.0X
+InSet expression                                      8              9           1        122.6           8.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        11             12           1         90.6          11.0       1.0X
-InSet expression                                     11             12           1         90.6          11.0       1.0X
+In expression                                        12             13           1         84.6          11.8       1.0X
+InSet expression                                     12             13           1         84.8          11.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 strings:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        29             32           4         34.6          28.9       1.0X
-InSet expression                                     45             46           2         22.3          44.8       0.6X
+In expression                                        30             32           2         33.2          30.2       1.0X
+InSet expression                                     49             51           2         20.2          49.4       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             36           2         29.3          34.1       1.0X
-InSet expression                                     48             49           1         21.0          47.6       0.7X
+In expression                                        36             38           2         27.8          36.0       1.0X
+InSet expression                                     51             53           2         19.5          51.4       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        43             44           2         23.5          42.5       1.0X
-InSet expression                                     51             52           1         19.6          50.9       0.8X
+In expression                                        44             45           1         22.7          44.0       1.0X
+InSet expression                                     56             57           1         17.9          55.7       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        58             59           2         17.4          57.6       1.0X
-InSet expression                                     55             57           1         18.2          54.8       1.1X
+In expression                                        59             60           2         16.9          59.3       1.0X
+InSet expression                                     60             61           1         16.6          60.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        88             90           1         11.3          88.4       1.0X
-InSet expression                                     51             52           1         19.6          51.1       1.7X
+In expression                                       138            139           1          7.3         137.5       1.0X
+InSet expression                                     56             57           1         17.9          56.0       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       382            388          12          2.6         381.7       1.0X
-InSet expression                                     52             53           1         19.4          51.5       7.4X
+In expression                                       392            393           2          2.6         392.0       1.0X
+InSet expression                                     56             58           6         17.8          56.0       7.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 timestamps:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        16             17           1        622.8           1.6       1.0X
-InSet expression                                     89             92           3        112.0           8.9       0.2X
+In expression                                        17             18           2        602.6           1.7       1.0X
+InSet expression                                     91             93           2        109.3           9.1       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        23             25           4        427.6           2.3       1.0X
-InSet expression                                     99            101           1        101.0           9.9       0.2X
+In expression                                        24             26           2        412.8           2.4       1.0X
+InSet expression                                    101            103           2         98.5          10.1       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        45             46           2        220.5           4.5       1.0X
-InSet expression                                    125            127           1         80.0          12.5       0.4X
+In expression                                        47             48           1        212.0           4.7       1.0X
+InSet expression                                    127            129           3         78.5          12.7       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        80             82           1        124.5           8.0       1.0X
-InSet expression                                    151            153           2         66.2          15.1       0.5X
+In expression                                        82             83           1        121.4           8.2       1.0X
+InSet expression                                    155            157           1         64.5          15.5       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       152            153           2         65.8          15.2       1.0X
-InSet expression                                    130            132           1         76.7          13.0       1.2X
+In expression                                       154            155           2         65.1          15.4       1.0X
+InSet expression                                    133            137           6         74.9          13.3       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       295            304           9         33.9          29.5       1.0X
-InSet expression                                    127            129           1         78.8          12.7       2.3X
+In expression                                       296            307          12         33.8          29.6       1.0X
+InSet expression                                    128            130           1         77.8          12.8       2.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 dates:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       441            442           1         22.7          44.1       1.0X
-InSet expression                                    434            440           9         23.1          43.4       1.0X
+In expression                                       441            443           2         22.7          44.1       1.0X
+InSet expression                                    437            440           4         22.9          43.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       445            450           7         22.5          44.5       1.0X
-InSet expression                                    437            438           3         22.9          43.7       1.0X
+In expression                                       447            452           4         22.4          44.7       1.0X
+InSet expression                                    441            443           2         22.7          44.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       485            486           1         20.6          48.5       1.0X
-InSet expression                                    436            438           2         23.0          43.6       1.1X
+In expression                                       470            471           1         21.3          47.0       1.0X
+InSet expression                                    438            440           3         22.9          43.8       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       547            549           1         18.3          54.7       1.0X
-InSet expression                                    441            445           4         22.7          44.1       1.2X
+In expression                                       542            543           2         18.5          54.2       1.0X
+InSet expression                                    440            443           3         22.7          44.0       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       608            610           1         16.4          60.8       1.0X
-InSet expression                                    440            441           1         22.7          44.0       1.4X
+In expression                                       619            620           1         16.1          61.9       1.0X
+InSet expression                                    442            445           3         22.6          44.2       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       776            782           7         12.9          77.6       1.0X
-InSet expression                                    436            440           2         22.9          43.6       1.8X
+In expression                                       785            790           4         12.7          78.5       1.0X
+InSet expression                                    441            448           7         22.7          44.1       1.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 300 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       939            945           6         10.6          93.9       1.0X
-InSet expression                                    441            443           2         22.7          44.1       2.1X
+In expression                                       933            937           6         10.7          93.3       1.0X
+InSet expression                                    441            444           2         22.7          44.1       2.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 400 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1098           1105          10          9.1         109.8       1.0X
-InSet expression                                    447            450           2         22.4          44.7       2.5X
+In expression                                      1096           1106           7          9.1         109.6       1.0X
+InSet expression                                    443            444           1         22.6          44.3       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 500 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1297           1305           7          7.7         129.7       1.0X
-InSet expression                                    537            539           1         18.6          53.7       2.4X
+In expression                                      1270           1273           2          7.9         127.0       1.0X
+InSet expression                                    551            554           3         18.1          55.1       2.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 arrays:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        24             25           2         41.2          24.3       1.0X
-InSet expression                                     57             58           1         17.6          56.8       0.4X
+In expression                                        26             28           4         38.9          25.7       1.0X
+InSet expression                                     58             59           2         17.4          57.6       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        41             42           1         24.4          41.0       1.0X
-InSet expression                                     83             86           4         12.0          83.4       0.5X
+In expression                                        42             43           2         24.1          41.5       1.0X
+InSet expression                                     85             87           1         11.7          85.5       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       157            158           2          6.4         156.6       1.0X
-InSet expression                                     98             99           1         10.2          97.6       1.6X
+In expression                                       156            158           2          6.4         156.2       1.0X
+InSet expression                                    100            102           2         10.0          99.8       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       371            374           2          2.7         371.2       1.0X
-InSet expression                                    125            127           1          8.0         125.2       3.0X
+In expression                                       388            389           1          2.6         387.5       1.0X
+InSet expression                                    128            130           3          7.8         128.2       3.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       757            759           3          1.3         756.8       1.0X
-InSet expression                                    142            144           1          7.0         142.5       5.3X
+In expression                                       761            767           6          1.3         761.2       1.0X
+InSet expression                                    143            149          13          7.0         143.1       5.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1671           1822         219          0.6        1671.2       1.0X
-InSet expression                                    159            173          37          6.3         159.2      10.5X
+In expression                                      1682           1860         303          0.6        1682.0       1.0X
+InSet expression                                    160            163           2          6.3         160.0      10.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 5 structs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        20             21           1         49.8          20.1       1.0X
-InSet expression                                     76             78           5         13.2          75.6       0.3X
+In expression                                        22             23           2         46.5          21.5       1.0X
+InSet expression                                     81             85           3         12.3          81.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 10 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        29             30           2         34.3          29.1       1.0X
-InSet expression                                    116            117           2          8.7         115.6       0.3X
+In expression                                        31             33           2         31.9          31.4       1.0X
+InSet expression                                    122            125           3          8.2         122.4       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 25 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        68             70           1         14.6          68.3       1.0X
-InSet expression                                    131            137           4          7.6         131.2       0.5X
+In expression                                        71             73           1         14.0          71.2       1.0X
+InSet expression                                    144            146           2          6.9         144.0       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 50 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       187            189           3          5.4         186.5       1.0X
-InSet expression                                    176            179           3          5.7         175.5       1.1X
+In expression                                       209            210           1          4.8         209.0       1.0X
+InSet expression                                    186            189           3          5.4         186.1       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 100 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       491            498           8          2.0         491.0       1.0X
-InSet expression                                    199            201           1          5.0         199.0       2.5X
+In expression                                       490            502           7          2.0         489.9       1.0X
+InSet expression                                    209            213           4          4.8         209.2       2.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 200 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1139           1377         266          0.9        1139.0       1.0X
-InSet expression                                    227            232           8          4.4         227.2       5.0X
+In expression                                      1212           1422         205          0.8        1211.6       1.0X
+InSet expression                                    239            242           5          4.2         238.6       5.1X
 
 
diff --git a/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk21-results.txt b/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk21-results.txt
index 4d79ea0b65033..94ffd3ca73811 100644
--- a/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 Int In-memory with 1000000 rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Int In-Memory scan:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-columnar deserialization + columnar-to-row            147            200          59          6.8         147.4       1.0X
-row-based deserialization                             129            158          42          7.8         129.0       1.1X
+columnar deserialization + columnar-to-row            171            215          38          5.8         171.5       1.0X
+row-based deserialization                             136            139           2          7.3         136.2       1.3X
 
 
diff --git a/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt b/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt
index 6787b645563b3..3feaaca07c885 100644
--- a/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt
+++ b/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt
@@ -2,11 +2,11 @@
 Int In-memory with 1000000 rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Int In-Memory scan:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-columnar deserialization + columnar-to-row            188            199          12          5.3         187.6       1.0X
-row-based deserialization                             142            216         115          7.0         141.9       1.3X
+columnar deserialization + columnar-to-row            184            210          23          5.4         184.0       1.0X
+row-based deserialization                             142            144           2          7.0         142.2       1.3X
 
 
diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk21-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk21-results.txt
index 85601d9e9757f..f78e7551cefe1 100644
--- a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk21-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dynamic insert table benchmark, totalRows = 200000:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-one partition column, 100 partitions                         9762           9793          43          0.0       48810.6       1.0X
-two partition columns, 500 partitions                       25446          25796         495          0.0      127230.3       0.4X
-three partition columns, 2000 partitions                    68971          69095         176          0.0      344853.7       0.1X
+one partition column, 100 partitions                         8137           8169          44          0.0       40687.0       1.0X
+two partition columns, 500 partitions                       20814          20937         174          0.0      104067.7       0.4X
+three partition columns, 2000 partitions                    56067          56122          78          0.0      280335.3       0.1X
 
diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
index a8b6b9b48805d..8ca9c389f7348 100644
--- a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
+++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 dynamic insert table benchmark, totalRows = 200000:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-one partition column, 100 partitions                         9336           9513         250          0.0       46681.2       1.0X
-two partition columns, 500 partitions                       25266          25745         677          0.0      126332.0       0.4X
-three partition columns, 2000 partitions                    69778          70117         479          0.0      348891.4       0.1X
+one partition column, 100 partitions                         7555           7583          41          0.0       37772.8       1.0X
+two partition columns, 500 partitions                       20496          20667         242          0.0      102480.4       0.4X
+three partition columns, 2000 partitions                    56071          56093          30          0.0      280357.3       0.1X
 
diff --git a/sql/core/benchmarks/IntervalBenchmark-jdk21-results.txt b/sql/core/benchmarks/IntervalBenchmark-jdk21-results.txt
index 260eec63f5118..8e46de244bcc0 100644
--- a/sql/core/benchmarks/IntervalBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/IntervalBenchmark-jdk21-results.txt
@@ -1,40 +1,40 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 cast strings to intervals:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare string w/ interval                          427            498          62          2.3         426.8       1.0X
-prepare string w/o interval                         385            389           4          2.6         384.8       1.1X
-1 units w/ interval                                 340            343           4          2.9         340.0       1.3X
-1 units w/o interval                                380            387           6          2.6         380.3       1.1X
-2 units w/ interval                                 549            557           7          1.8         549.2       0.8X
-2 units w/o interval                                553            555           4          1.8         553.1       0.8X
-3 units w/ interval                                1125           1126           2          0.9        1124.7       0.4X
-3 units w/o interval                               1144           1149           7          0.9        1143.7       0.4X
-4 units w/ interval                                1338           1341           3          0.7        1337.7       0.3X
-4 units w/o interval                               1351           1367          18          0.7        1351.1       0.3X
-5 units w/ interval                                1506           1510           5          0.7        1506.4       0.3X
-5 units w/o interval                               1522           1523           1          0.7        1521.6       0.3X
-6 units w/ interval                                1644           1651          11          0.6        1643.6       0.3X
-6 units w/o interval                               1654           1661          10          0.6        1653.8       0.3X
-7 units w/ interval                                2058           2066           9          0.5        2058.2       0.2X
-7 units w/o interval                               2069           2072           5          0.5        2068.7       0.2X
-8 units w/ interval                                2291           2295           6          0.4        2290.9       0.2X
-8 units w/o interval                               2348           2358          12          0.4        2347.9       0.2X
-9 units w/ interval                                2453           2457           5          0.4        2452.8       0.2X
-9 units w/o interval                               2460           2472          16          0.4        2460.0       0.2X
-10 units w/ interval                               2709           2716           6          0.4        2709.3       0.2X
-10 units w/o interval                              2706           2707           1          0.4        2705.6       0.2X
-11 units w/ interval                               3049           3055           7          0.3        3048.7       0.1X
-11 units w/o interval                              3043           3050           7          0.3        3042.5       0.1X
+prepare string w/ interval                          397            422          27          2.5         396.9       1.0X
+prepare string w/o interval                         365            395          43          2.7         365.0       1.1X
+1 units w/ interval                                 337            347          15          3.0         337.4       1.2X
+1 units w/o interval                                358            365          10          2.8         357.6       1.1X
+2 units w/ interval                                 526            528           2          1.9         526.3       0.8X
+2 units w/o interval                                535            539           5          1.9         535.3       0.7X
+3 units w/ interval                                1126           1133           7          0.9        1125.5       0.4X
+3 units w/o interval                               1115           1118           3          0.9        1115.0       0.4X
+4 units w/ interval                                1310           1315           4          0.8        1310.3       0.3X
+4 units w/o interval                               1327           1333           5          0.8        1327.1       0.3X
+5 units w/ interval                                1453           1457           7          0.7        1452.9       0.3X
+5 units w/o interval                               1467           1472           4          0.7        1467.5       0.3X
+6 units w/ interval                                1615           1618           3          0.6        1614.7       0.2X
+6 units w/o interval                               1617           1617           1          0.6        1616.6       0.2X
+7 units w/ interval                                2046           2053           6          0.5        2046.0       0.2X
+7 units w/o interval                               2067           2072           4          0.5        2067.4       0.2X
+8 units w/ interval                                2277           2288          11          0.4        2277.3       0.2X
+8 units w/o interval                               2291           2297           5          0.4        2290.8       0.2X
+9 units w/ interval                                2612           2618           8          0.4        2611.7       0.2X
+9 units w/o interval                               2647           2651           4          0.4        2646.5       0.1X
+10 units w/ interval                               2842           2849           8          0.4        2841.8       0.1X
+10 units w/o interval                              2838           2841           3          0.4        2838.4       0.1X
+11 units w/ interval                               3071           3077           9          0.3        3070.8       0.1X
+11 units w/o interval                              3083           3088           6          0.3        3082.8       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 make_interval():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-prepare make_interval()                               351            355           4          2.9         350.8       1.0X
-make_interval(0, 1, 2, 3, 4, 5, 50.123456)             42             44           2         23.9          41.9       8.4X
-make_interval(*, *, 2, 3, 4, 5, 50.123456)             52             54           3         19.2          52.0       6.7X
-make_interval(0, 1, *, *, 4, 5, 50.123456)             61             64           3         16.4          60.9       5.8X
-make_interval(0, 1, 2, 3, *, *, *)                    344            348           5          2.9         344.5       1.0X
-make_interval(*, *, *, *, *, *, *)                    359            363           6          2.8         359.2       1.0X
+prepare make_interval()                               356            357           2          2.8         355.5       1.0X
+make_interval(0, 1, 2, 3, 4, 5, 50.123456)             44             53           8         22.8          43.8       8.1X
+make_interval(*, *, 2, 3, 4, 5, 50.123456)             53             57           4         18.8          53.3       6.7X
+make_interval(0, 1, *, *, 4, 5, 50.123456)             56             56           0         17.9          55.8       6.4X
+make_interval(0, 1, 2, 3, *, *, *)                    326            327           1          3.1         326.1       1.1X
+make_interval(*, *, *, *, *, *, *)                    342            345           3          2.9         341.6       1.0X
 
diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt
index f09ebdc4d121f..5bd12d7b15ef5 100644
--- a/sql/core/benchmarks/IntervalBenchmark-results.txt
+++ b/sql/core/benchmarks/IntervalBenchmark-results.txt
@@ -1,40 +1,40 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 cast strings to intervals:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare string w/ interval                          409            467          97          2.4         409.1       1.0X
-prepare string w/o interval                         371            378          11          2.7         371.2       1.1X
-1 units w/ interval                                 358            373          14          2.8         358.5       1.1X
-1 units w/o interval                                382            395          22          2.6         382.1       1.1X
-2 units w/ interval                                 532            536           4          1.9         531.9       0.8X
-2 units w/o interval                                538            544           6          1.9         537.9       0.8X
-3 units w/ interval                                1202           1205           3          0.8        1202.1       0.3X
-3 units w/o interval                               1222           1227           5          0.8        1222.0       0.3X
-4 units w/ interval                                1403           1408           5          0.7        1403.3       0.3X
-4 units w/o interval                               1432           1435           4          0.7        1431.6       0.3X
-5 units w/ interval                                1552           1564          16          0.6        1551.5       0.3X
-5 units w/o interval                               1559           1562           3          0.6        1558.8       0.3X
-6 units w/ interval                                1700           1705           5          0.6        1700.2       0.2X
-6 units w/o interval                               1721           1728           8          0.6        1720.8       0.2X
-7 units w/ interval                                2241           2244           4          0.4        2241.0       0.2X
-7 units w/o interval                               2254           2265          10          0.4        2254.3       0.2X
-8 units w/ interval                                2505           2519          15          0.4        2505.0       0.2X
-8 units w/o interval                               2505           2508           3          0.4        2505.5       0.2X
-9 units w/ interval                                2621           2629           7          0.4        2621.2       0.2X
-9 units w/o interval                               2623           2628           4          0.4        2623.1       0.2X
-10 units w/ interval                               2844           2849           6          0.4        2843.6       0.1X
-10 units w/o interval                              2829           2842          20          0.4        2829.2       0.1X
-11 units w/ interval                               3143           3146           3          0.3        3142.7       0.1X
-11 units w/o interval                              3147           3156          10          0.3        3146.9       0.1X
+prepare string w/ interval                          407            418          12          2.5         406.7       1.0X
+prepare string w/o interval                         375            383           8          2.7         374.6       1.1X
+1 units w/ interval                                 386            387           1          2.6         385.8       1.1X
+1 units w/o interval                                343            352          11          2.9         343.0       1.2X
+2 units w/ interval                                 511            513           2          2.0         511.1       0.8X
+2 units w/o interval                                526            529           3          1.9         526.5       0.8X
+3 units w/ interval                                1196           1199           4          0.8        1196.3       0.3X
+3 units w/o interval                               1171           1174           3          0.9        1171.0       0.3X
+4 units w/ interval                                1389           1392           3          0.7        1389.3       0.3X
+4 units w/o interval                               1401           1403           2          0.7        1400.5       0.3X
+5 units w/ interval                                1545           1549           4          0.6        1545.2       0.3X
+5 units w/o interval                               1545           1552           8          0.6        1544.9       0.3X
+6 units w/ interval                                1689           1692           3          0.6        1689.0       0.2X
+6 units w/o interval                               1703           1706           5          0.6        1702.5       0.2X
+7 units w/ interval                                2287           2287           1          0.4        2286.6       0.2X
+7 units w/o interval                               2267           2272           4          0.4        2267.2       0.2X
+8 units w/ interval                                2475           2479           5          0.4        2474.8       0.2X
+8 units w/o interval                               2471           2476           4          0.4        2471.1       0.2X
+9 units w/ interval                                2625           2629           3          0.4        2625.4       0.2X
+9 units w/o interval                               2616           2624          12          0.4        2616.0       0.2X
+10 units w/ interval                               2850           2852           2          0.4        2850.5       0.1X
+10 units w/o interval                              2842           2845           4          0.4        2842.3       0.1X
+11 units w/ interval                               3177           3180           4          0.3        3177.3       0.1X
+11 units w/o interval                              3164           3174           8          0.3        3164.1       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 make_interval():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-prepare make_interval()                               368            374           5          2.7         368.5       1.0X
-make_interval(0, 1, 2, 3, 4, 5, 50.123456)             47             50           3         21.1          47.3       7.8X
-make_interval(*, *, 2, 3, 4, 5, 50.123456)             59             62           5         17.0          58.8       6.3X
-make_interval(0, 1, *, *, 4, 5, 50.123456)             62             64           3         16.0          62.3       5.9X
-make_interval(0, 1, 2, 3, *, *, *)                    342            345           2          2.9         342.0       1.1X
-make_interval(*, *, *, *, *, *, *)                    351            357           7          2.8         350.9       1.1X
+prepare make_interval()                               337            340           3          3.0         337.0       1.0X
+make_interval(0, 1, 2, 3, 4, 5, 50.123456)             42             43           1         23.6          42.4       7.9X
+make_interval(*, *, 2, 3, 4, 5, 50.123456)             53             55           3         19.0          52.7       6.4X
+make_interval(0, 1, *, *, 4, 5, 50.123456)             56             60           5         17.9          55.7       6.0X
+make_interval(0, 1, 2, 3, *, *, *)                    341            345           5          2.9         341.2       1.0X
+make_interval(*, *, *, *, *, *, *)                    343            344           1          2.9         342.8       1.0X
 
diff --git a/sql/core/benchmarks/JoinBenchmark-jdk21-results.txt b/sql/core/benchmarks/JoinBenchmark-jdk21-results.txt
index 473cfdde4d76d..b908a2502d766 100644
--- a/sql/core/benchmarks/JoinBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/JoinBenchmark-jdk21-results.txt
@@ -2,81 +2,81 @@
 Join Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long wholestage off                         2088           2099          15         10.0          99.6       1.0X
-Join w long wholestage on                           918            947          28         22.8          43.8       2.3X
+Join w long wholestage off                         2048           2052           5         10.2          97.7       1.0X
+Join w long wholestage on                           884            926          37         23.7          42.1       2.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long duplicated wholestage off              1991           1993           3         10.5          94.9       1.0X
-Join w long duplicated wholestage on                911            923          16         23.0          43.4       2.2X
+Join w long duplicated wholestage off              2023           2028           7         10.4          96.5       1.0X
+Join w long duplicated wholestage on                887            904          18         23.6          42.3       2.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 ints wholestage off                     106730         106790          85          0.2        5089.3       1.0X
-Join w 2 ints wholestage on                      105489         105534          40          0.2        5030.1       1.0X
+Join w 2 ints wholestage off                     107738         107744           9          0.2        5137.3       1.0X
+Join w 2 ints wholestage on                      105798         105824          18          0.2        5044.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs wholestage off                      3315           3323          12          6.3         158.1       1.0X
-Join w 2 longs wholestage on                       1972           1997          25         10.6          94.0       1.7X
+Join w 2 longs wholestage off                      3236           3370         189          6.5         154.3       1.0X
+Join w 2 longs wholestage on                       1977           2014          37         10.6          94.3       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs duplicated wholestage off           8534           8563          42          2.5         406.9       1.0X
-Join w 2 longs duplicated wholestage on            5521           5729         121          3.8         263.3       1.5X
+Join w 2 longs duplicated wholestage off           8713           8742          42          2.4         415.5       1.0X
+Join w 2 longs duplicated wholestage on            5435           5556         105          3.9         259.2       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-outer join w long wholestage off                   1590           1593           5         13.2          75.8       1.0X
-outer join w long wholestage on                     948            978          46         22.1          45.2       1.7X
+outer join w long wholestage off                   1586           1675         126         13.2          75.6       1.0X
+outer join w long wholestage on                     904            935          30         23.2          43.1       1.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-semi join w long wholestage off                    1053           1055           3         19.9          50.2       1.0X
-semi join w long wholestage on                      568            585          15         37.0          27.1       1.9X
+semi join w long wholestage off                    1052           1053           1         19.9          50.2       1.0X
+semi join w long wholestage on                      522            528           4         40.2          24.9       2.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort merge join wholestage off                      519            527          11          4.0         247.7       1.0X
-sort merge join wholestage on                       467            493          27          4.5         222.5       1.1X
+sort merge join wholestage off                      525            536          15          4.0         250.4       1.0X
+sort merge join wholestage on                       460            465           5          4.6         219.4       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sort merge join with duplicates:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-sort merge join with duplicates wholestage off           1031           1042          15          2.0         491.7       1.0X
-sort merge join with duplicates wholestage on             960            968           8          2.2         457.8       1.1X
+sort merge join with duplicates wholestage off           1008           1020          17          2.1         480.5       1.0X
+sort merge join with duplicates wholestage on             920            934          14          2.3         438.5       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-shuffle hash join wholestage off                    530            537          10          7.9         126.4       1.0X
-shuffle hash join wholestage on                     415            434          12         10.1          99.1       1.3X
+shuffle hash join wholestage off                    494            508          20          8.5         117.7       1.0X
+shuffle hash join wholestage on                     412            426          17         10.2          98.1       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 broadcast nested loop join:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-broadcast nested loop join wholestage off          25590          25605          22          0.8        1220.2       1.0X
-broadcast nested loop join wholestage on           18711          18767          79          1.1         892.2       1.4X
+broadcast nested loop join wholestage off          25779          25822          61          0.8        1229.2       1.0X
+broadcast nested loop join wholestage on           18110          18272         148          1.2         863.6       1.4X
 
 
diff --git a/sql/core/benchmarks/JoinBenchmark-results.txt b/sql/core/benchmarks/JoinBenchmark-results.txt
index 9c460f39d1ae7..abf8364e533d7 100644
--- a/sql/core/benchmarks/JoinBenchmark-results.txt
+++ b/sql/core/benchmarks/JoinBenchmark-results.txt
@@ -2,81 +2,81 @@
 Join Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long wholestage off                         2221           2232          15          9.4         105.9       1.0X
-Join w long wholestage on                          1032           1080          56         20.3          49.2       2.2X
+Join w long wholestage off                         2021           2057          50         10.4          96.4       1.0X
+Join w long wholestage on                           922            960          38         22.7          44.0       2.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long duplicated wholestage off              2180           2181           1          9.6         104.0       1.0X
-Join w long duplicated wholestage on                917            927          10         22.9          43.7       2.4X
+Join w long duplicated wholestage off              2060           2199         197         10.2          98.2       1.0X
+Join w long duplicated wholestage on                927            935          11         22.6          44.2       2.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 ints wholestage off                     112458         112473          21          0.2        5362.4       1.0X
-Join w 2 ints wholestage on                      110885         110937          68          0.2        5287.4       1.0X
+Join w 2 ints wholestage off                     112712         112721          12          0.2        5374.5       1.0X
+Join w 2 ints wholestage on                      111144         111183          26          0.2        5299.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs wholestage off                      3502           3507           7          6.0         167.0       1.0X
-Join w 2 longs wholestage on                       2071           2085          10         10.1          98.8       1.7X
+Join w 2 longs wholestage off                      3114           3126          17          6.7         148.5       1.0X
+Join w 2 longs wholestage on                       1971           1991          17         10.6          94.0       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs duplicated wholestage off           9384           9385           2          2.2         447.4       1.0X
-Join w 2 longs duplicated wholestage on            5493           5515          16          3.8         261.9       1.7X
+Join w 2 longs duplicated wholestage off           8230           8239          13          2.5         392.4       1.0X
+Join w 2 longs duplicated wholestage on            5478           5494          16          3.8         261.2       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-outer join w long wholestage off                   1871           1884          19         11.2          89.2       1.0X
-outer join w long wholestage on                    1031           1054          30         20.4          49.1       1.8X
+outer join w long wholestage off                   1607           1627          28         13.1          76.6       1.0X
+outer join w long wholestage on                     906            914           6         23.1          43.2       1.8X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-semi join w long wholestage off                    1189           1195           8         17.6          56.7       1.0X
-semi join w long wholestage on                      549            569          35         38.2          26.2       2.2X
+semi join w long wholestage off                    1047           1050           3         20.0          49.9       1.0X
+semi join w long wholestage on                      522            530           5         40.2          24.9       2.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort merge join wholestage off                      526            535          13          4.0         250.9       1.0X
-sort merge join wholestage on                       461            470           6          4.5         220.0       1.1X
+sort merge join wholestage off                      512            516           5          4.1         244.2       1.0X
+sort merge join wholestage on                       459            477          13          4.6         218.7       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sort merge join with duplicates:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-sort merge join with duplicates wholestage off           1026           1054          39          2.0         489.2       1.0X
-sort merge join with duplicates wholestage on             922            948          28          2.3         439.4       1.1X
+sort merge join with duplicates wholestage off            970            984          20          2.2         462.4       1.0X
+sort merge join with duplicates wholestage on             868            879          10          2.4         413.9       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-shuffle hash join wholestage off                    521            533          16          8.0         124.3       1.0X
-shuffle hash join wholestage on                     383            393          10         11.0          91.3       1.4X
+shuffle hash join wholestage off                    512            520          13          8.2         122.0       1.0X
+shuffle hash join wholestage on                     353            369          20         11.9          84.1       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 broadcast nested loop join:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-broadcast nested loop join wholestage off          29667          29788         171          0.7        1414.6       1.0X
-broadcast nested loop join wholestage on           18946          19016          66          1.1         903.4       1.6X
+broadcast nested loop join wholestage off          25058          25234         249          0.8        1194.9       1.0X
+broadcast nested loop join wholestage on           18197          18557         692          1.2         867.7       1.4X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt b/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt
index d87eb6530a855..381f30f110867 100644
--- a/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt
@@ -3,128 +3,128 @@ Benchmark for performance of JSON parsing
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2240           2300          98          2.2         448.0       1.0X
-UTF-8 is set                                       3325           3333           8          1.5         665.0       0.7X
+No encoding                                        2632           2713          96          1.9         526.3       1.0X
+UTF-8 is set                                       4814           4824          12          1.0         962.8       0.5X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        1890           1917          23          2.6         378.1       1.0X
-UTF-8 is set                                       3155           3158           3          1.6         630.9       0.6X
+No encoding                                        2193           2256          82          2.3         438.6       1.0X
+UTF-8 is set                                       4539           4544           5          1.1         907.8       0.5X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        5079           5266         188          0.2        5078.9       1.0X
-UTF-8 is set                                       4272           4280           6          0.2        4272.5       1.2X
+No encoding                                        4593           4651          87          0.2        4592.9       1.0X
+UTF-8 is set                                       4837           4856          30          0.2        4837.0       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        9614           9866         271          0.0      192271.0       1.0X
-UTF-8 is set                                      10517          10608          80          0.0      210331.2       0.9X
+No encoding                                        9423           9596         234          0.0      188463.6       1.0X
+UTF-8 is set                                      10747          10797          49          0.0      214934.1       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  1661           1666           6          0.6        1660.6       1.0X
-Select 1 column                                    1078           1081           2          0.9        1078.3       1.5X
+Select 10 columns                                  1769           1779          11          0.6        1769.0       1.0X
+Select 1 column                                    1217           1220           3          0.8        1217.4       1.5X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       604            612          10          1.7         604.3       1.0X
-Short column with UTF-8                             828            839          15          1.2         828.3       0.7X
-Wide column without encoding                       7212           7255          38          0.1        7212.1       0.1X
-Wide column with UTF-8                             7446           7462          15          0.1        7445.8       0.1X
+Short column without encoding                       658            665           6          1.5         658.4       1.0X
+Short column with UTF-8                            1144           1162          16          0.9        1143.9       0.6X
+Wide column without encoding                       5152           5164          19          0.2        5151.8       0.1X
+Wide column with UTF-8                             7246           7274          28          0.1        7246.1       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                            60             63           2         16.6          60.1       1.0X
-from_json                                          1168           1175           7          0.9        1168.4       0.1X
-json_tuple                                         1158           1170          16          0.9        1158.4       0.1X
-get_json_object wholestage off                     1075           1081           6          0.9        1074.8       0.1X
-get_json_object wholestage on                      1018           1029          13          1.0        1018.1       0.1X
+Text read                                            59             62           2         16.9          59.0       1.0X
+from_json                                          1119           1125           6          0.9        1119.4       0.1X
+json_tuple                                         1039           1044           6          1.0        1039.4       0.1X
+get_json_object wholestage off                     1054           1060           5          0.9        1053.7       0.1X
+get_json_object wholestage on                       991            998           6          1.0         991.2       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           232            238          10         21.6          46.4       1.0X
-schema inferring                                   1919           1928           9          2.6         383.7       0.1X
-parsing                                            2717           2724           7          1.8         543.4       0.1X
+Text read                                           235            242          12         21.3          46.9       1.0X
+schema inferring                                   1966           1972           9          2.5         393.1       0.1X
+parsing                                            2961           2978          24          1.7         592.2       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           562            569           7          8.9         112.5       1.0X
-Schema inferring                                   2424           2432           9          2.1         484.8       0.2X
-Parsing without charset                            2808           2810           3          1.8         561.7       0.2X
-Parsing with UTF-8                                 3993           4001          12          1.3         798.5       0.1X
+Text read                                           563            569           6          8.9         112.5       1.0X
+Schema inferring                                   2535           2538           3          2.0         507.0       0.2X
+Parsing without charset                            3072           3102          36          1.6         614.4       0.2X
+Parsing with UTF-8                                 5607           5629          33          0.9        1121.5       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      101            108           7          9.9         101.4       1.0X
-to_json(timestamp)                                  705            707           2          1.4         704.6       0.1X
-write timestamps to files                           598            611          20          1.7         598.3       0.2X
-Create a dataset of dates                           112            118          10          8.9         111.9       0.9X
-to_json(date)                                       546            548           2          1.8         546.3       0.2X
-write dates to files                                393            399           9          2.5         393.1       0.3X
+Create a dataset of timestamps                      103            105           2          9.7         102.7       1.0X
+to_json(timestamp)                                  555            557           3          1.8         554.8       0.2X
+write timestamps to files                           591            597           7          1.7         591.0       0.2X
+Create a dataset of dates                           121            125           4          8.3         120.8       0.8X
+to_json(date)                                       420            422           3          2.4         419.6       0.2X
+write dates to files                                393            394           1          2.5         392.6       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read dates and timestamps:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                                                   149            153           4          6.7         149.4       1.0X
-read timestamps from files                                                      1044           1049           5          1.0        1043.6       0.1X
-infer timestamps from files                                                     1973           1983          12          0.5        1972.7       0.1X
-read date text from files                                                        140            143           5          7.1         140.0       1.1X
-read date from files                                                             690            698           9          1.4         690.3       0.2X
-timestamp strings                                                                141            149           7          7.1         140.8       1.1X
-parse timestamps from Dataset[String]                                           1265           1266           2          0.8        1264.5       0.1X
-infer timestamps from Dataset[String]                                           2160           2169          12          0.5        2160.5       0.1X
-date strings                                                                     248            250           2          4.0         248.3       0.6X
-parse dates from Dataset[String]                                                1010           1015           6          1.0        1009.6       0.1X
-from_json(timestamp)                                                            1781           1810          27          0.6        1781.1       0.1X
-from_json(date)                                                                 1510           1514           4          0.7        1510.0       0.1X
-infer error timestamps from Dataset[String] with default format                 1412           1420           8          0.7        1412.2       0.1X
-infer error timestamps from Dataset[String] with user-provided format           1372           1378           6          0.7        1371.6       0.1X
-infer error timestamps from Dataset[String] with legacy format                  1427           1439          18          0.7        1426.6       0.1X
+read timestamp text from files                                                   143            149           9          7.0         143.4       1.0X
+read timestamps from files                                                      1102           1110          13          0.9        1101.8       0.1X
+infer timestamps from files                                                     2042           2051          14          0.5        2041.7       0.1X
+read date text from files                                                        140            143           4          7.2         139.6       1.0X
+read date from files                                                             739            764          33          1.4         739.1       0.2X
+timestamp strings                                                                135            136           1          7.4         134.6       1.1X
+parse timestamps from Dataset[String]                                           1321           1328           7          0.8        1320.8       0.1X
+infer timestamps from Dataset[String]                                           2235           2239           5          0.4        2235.3       0.1X
+date strings                                                                     194            196           3          5.2         193.6       0.7X
+parse dates from Dataset[String]                                                1054           1058           4          0.9        1054.1       0.1X
+from_json(timestamp)                                                            1750           1753           4          0.6        1750.0       0.1X
+from_json(date)                                                                 1476           1480           6          0.7        1476.1       0.1X
+infer error timestamps from Dataset[String] with default format                 1499           1502           4          0.7        1499.4       0.1X
+infer error timestamps from Dataset[String] with user-provided format           1491           1496           7          0.7        1491.1       0.1X
+infer error timestamps from Dataset[String] with legacy format                  1528           1538           9          0.7        1527.8       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        6100           6124          33          0.0       61003.7       1.0X
-pushdown disabled                                  5957           5981          31          0.0       59569.9       1.0X
-w/ filters                                          729            737           8          0.1        7291.0       8.4X
+w/o filters                                        6122           6143          24          0.0       61217.0       1.0X
+pushdown disabled                                  5947           5957          10          0.0       59474.6       1.0X
+w/ filters                                          700            703           4          0.1        7004.2       8.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Partial JSON results:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-parse invalid JSON                                 2476           2480           5          0.0      247550.8       1.0X
+parse invalid JSON                                 2496           2508          16          0.0      249615.2       1.0X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt b/sql/core/benchmarks/JsonBenchmark-results.txt
index bf7662a428dfb..106ad732614c0 100644
--- a/sql/core/benchmarks/JsonBenchmark-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-results.txt
@@ -3,128 +3,128 @@ Benchmark for performance of JSON parsing
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2335           2405         102          2.1         467.1       1.0X
-UTF-8 is set                                       3188           3205          17          1.6         637.5       0.7X
+No encoding                                        2318           2390          99          2.2         463.5       1.0X
+UTF-8 is set                                       4814           4832          16          1.0         962.8       0.5X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2017           2130         130          2.5         403.4       1.0X
-UTF-8 is set                                       3090           3104          22          1.6         618.0       0.7X
+No encoding                                        2303           2369          93          2.2         460.6       1.0X
+UTF-8 is set                                       4841           4855          12          1.0         968.2       0.5X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        3264           3390         134          0.3        3264.0       1.0X
-UTF-8 is set                                       4385           4419          40          0.2        4384.9       0.7X
+No encoding                                        4324           4400          95          0.2        4324.1       1.0X
+UTF-8 is set                                       4825           4836          10          0.2        4824.6       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        8549           8761         245          0.0      170970.8       1.0X
-UTF-8 is set                                       9833           9868          31          0.0      196661.2       0.9X
+No encoding                                        9577           9734         206          0.0      191531.0       1.0X
+UTF-8 is set                                      10240          10256          14          0.0      204805.9       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  1485           1498          22          0.7        1484.5       1.0X
-Select 1 column                                    1056           1063           6          0.9        1055.6       1.4X
+Select 10 columns                                  1602           1605           3          0.6        1601.6       1.0X
+Select 1 column                                    1160           1169          12          0.9        1160.0       1.4X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       622            630           9          1.6         622.2       1.0X
-Short column with UTF-8                             792            802          13          1.3         792.1       0.8X
-Wide column without encoding                       7214           7321         111          0.1        7214.3       0.1X
-Wide column with UTF-8                             6455           6493          54          0.2        6454.8       0.1X
+Short column without encoding                       661            665           4          1.5         660.7       1.0X
+Short column with UTF-8                            1188           1188           1          0.8        1187.9       0.6X
+Wide column without encoding                       5314           5336          21          0.2        5313.6       0.1X
+Wide column with UTF-8                             7265           7267           2          0.1        7265.1       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                            59             61           3         16.9          59.1       1.0X
-from_json                                          1083           1088           5          0.9        1082.5       0.1X
-json_tuple                                         1125           1133           7          0.9        1125.5       0.1X
-get_json_object wholestage off                     1049           1062          12          1.0        1048.6       0.1X
-get_json_object wholestage on                       968            975           7          1.0         968.1       0.1X
+Text read                                            58             64           6         17.4          57.5       1.0X
+from_json                                          1092           1106          21          0.9        1091.9       0.1X
+json_tuple                                         1075           1077           3          0.9        1074.7       0.1X
+get_json_object wholestage off                     1071           1081          10          0.9        1071.1       0.1X
+get_json_object wholestage on                      1020           1033          20          1.0        1019.7       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           234            238           5         21.4          46.8       1.0X
-schema inferring                                   1774           1776           2          2.8         354.8       0.1X
-parsing                                            2648           2686          33          1.9         529.6       0.1X
+Text read                                           233            270          61         21.5          46.6       1.0X
+schema inferring                                   1786           1790           8          2.8         357.2       0.1X
+parsing                                            2837           2844           9          1.8         567.4       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           615            634          23          8.1         123.0       1.0X
-Schema inferring                                   2319           2330          10          2.2         463.8       0.3X
-Parsing without charset                            2834           2844           9          1.8         566.8       0.2X
-Parsing with UTF-8                                 3741           3758          17          1.3         748.1       0.2X
+Text read                                           622            625           4          8.0         124.4       1.0X
+Schema inferring                                   2378           2386           7          2.1         475.7       0.3X
+Parsing without charset                            3016           3019           3          1.7         603.2       0.2X
+Parsing with UTF-8                                 5651           5660          11          0.9        1130.2       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      117            125          10          8.6         116.8       1.0X
-to_json(timestamp)                                  803            809           5          1.2         803.0       0.1X
-write timestamps to files                           698            701           4          1.4         697.6       0.2X
-Create a dataset of dates                           123            128           6          8.1         123.2       0.9X
-to_json(date)                                       594            602           7          1.7         594.2       0.2X
-write dates to files                                471            479           7          2.1         471.4       0.2X
+Create a dataset of timestamps                      100            101           2         10.0          99.6       1.0X
+to_json(timestamp)                                  648            653           5          1.5         648.1       0.2X
+write timestamps to files                           677            683           6          1.5         677.2       0.1X
+Create a dataset of dates                           128            130           3          7.8         128.1       0.8X
+to_json(date)                                       455            466          10          2.2         455.1       0.2X
+write dates to files                                444            448           5          2.3         443.8       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read dates and timestamps:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                                                   157            161           3          6.4         156.8       1.0X
-read timestamps from files                                                      1010           1019          10          1.0        1009.8       0.2X
-infer timestamps from files                                                     1924           1930          10          0.5        1923.9       0.1X
-read date text from files                                                        147            150           4          6.8         146.6       1.1X
-read date from files                                                             705            710           6          1.4         705.5       0.2X
-timestamp strings                                                                151            159           7          6.6         150.9       1.0X
-parse timestamps from Dataset[String]                                           1191           1193           1          0.8        1191.3       0.1X
-infer timestamps from Dataset[String]                                           2049           2055           7          0.5        2049.2       0.1X
-date strings                                                                     228            235           6          4.4         228.3       0.7X
-parse dates from Dataset[String]                                                 955            967          14          1.0         954.8       0.2X
-from_json(timestamp)                                                            1669           1681          12          0.6        1669.4       0.1X
-from_json(date)                                                                 1444           1447           5          0.7        1443.9       0.1X
-infer error timestamps from Dataset[String] with default format                 1398           1401           5          0.7        1397.9       0.1X
-infer error timestamps from Dataset[String] with user-provided format           1420           1423           2          0.7        1420.2       0.1X
-infer error timestamps from Dataset[String] with legacy format                  1419           1437          21          0.7        1418.7       0.1X
+read timestamp text from files                                                   155            157           3          6.5         154.7       1.0X
+read timestamps from files                                                      1091           1093           2          0.9        1091.1       0.1X
+infer timestamps from files                                                     2016           2017           1          0.5        2016.2       0.1X
+read date text from files                                                        141            142           1          7.1         141.2       1.1X
+read date from files                                                             744            751           6          1.3         744.2       0.2X
+timestamp strings                                                                132            135           3          7.6         131.5       1.2X
+parse timestamps from Dataset[String]                                           1247           1249           2          0.8        1246.8       0.1X
+infer timestamps from Dataset[String]                                           2129           2134           7          0.5        2128.7       0.1X
+date strings                                                                     203            204           1          4.9         202.6       0.8X
+parse dates from Dataset[String]                                                1005           1006           1          1.0        1004.7       0.2X
+from_json(timestamp)                                                            1659           1672          11          0.6        1659.4       0.1X
+from_json(date)                                                                 1413           1416           4          0.7        1413.2       0.1X
+infer error timestamps from Dataset[String] with default format                 1400           1407           6          0.7        1400.4       0.1X
+infer error timestamps from Dataset[String] with user-provided format           1411           1420          13          0.7        1410.7       0.1X
+infer error timestamps from Dataset[String] with legacy format                  1441           1461          20          0.7        1441.2       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        6592           6601           7          0.0       65920.4       1.0X
-pushdown disabled                                  5825           5829           4          0.0       58246.5       1.1X
-w/ filters                                          664            802         200          0.2        6643.7       9.9X
+w/o filters                                        5832           5837           9          0.0       58320.8       1.0X
+pushdown disabled                                  5810           5821          10          0.0       58100.1       1.0X
+w/ filters                                          679            767         147          0.1        6792.3       8.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Partial JSON results:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-parse invalid JSON                                 2714           2828         195          0.0      271356.0       1.0X
+parse invalid JSON                                 2325           2417         159          0.0      232496.5       1.0X
 
 
diff --git a/sql/core/benchmarks/LargeRowBenchmark-jdk21-results.txt b/sql/core/benchmarks/LargeRowBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..dbcf544b492d9
--- /dev/null
+++ b/sql/core/benchmarks/LargeRowBenchmark-jdk21-results.txt
@@ -0,0 +1,26 @@
+================================================================================================
+Large Row Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
+#rows: 100, #cols: 10, cell: 1.3 MB:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+built-in UPPER                                     5909           6154         347          0.0    59088236.5       1.0X
+udf UPPER                                          4106           4364         364          0.0    41062501.9       1.4X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
+#rows: 1, #cols: 1, cell: 300.0 MB:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+built-in UPPER                                     1317           1319           3          0.0  1317449498.0       1.0X
+udf UPPER                                           954            975          25          0.0   953744994.0       1.4X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
+#rows: 1, #cols: 200, cell: 1.0 MB:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+built-in UPPER                                     1118           1138          28          0.0  1117901962.0       1.0X
+udf UPPER                                          1145           1210          91          0.0  1145234313.0       1.0X
+
+
diff --git a/sql/core/benchmarks/LargeRowBenchmark-results.txt b/sql/core/benchmarks/LargeRowBenchmark-results.txt
new file mode 100644
index 0000000000000..9fafe282238b6
--- /dev/null
+++ b/sql/core/benchmarks/LargeRowBenchmark-results.txt
@@ -0,0 +1,26 @@
+================================================================================================
+Large Row Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
+#rows: 100, #cols: 10, cell: 1.3 MB:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+built-in UPPER                                     6610           6651          58          0.0    66101681.9       1.0X
+udf UPPER                                          4289           4291           3          0.0    42892607.0       1.5X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
+#rows: 1, #cols: 1, cell: 300.0 MB:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+built-in UPPER                                     1492           1510          26          0.0  1492292577.0       1.0X
+udf UPPER                                          1033           1034           1          0.0  1032584220.0       1.4X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
+#rows: 1, #cols: 200, cell: 1.0 MB:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+built-in UPPER                                     1271           1290          28          0.0  1270654457.0       1.0X
+udf UPPER                                          1397           1558         228          0.0  1396607518.0       0.9X
+
+
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-jdk21-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk21-results.txt
index 3f95fc73de078..ba1261bd77389 100644
--- a/sql/core/benchmarks/MakeDateTimeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk21-results.txt
@@ -1,22 +1,22 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare make_date()                                2328           2360          31         43.0          23.3       1.0X
-make_date(2019, 9, 16)                             1883           1936          46         53.1          18.8       1.2X
-make_date(*, *, *)                                 4034           4050          20         24.8          40.3       0.6X
+prepare make_date()                                2319           2381          55         43.1          23.2       1.0X
+make_date(2019, 9, 16)                             2021           2048          28         49.5          20.2       1.1X
+make_date(*, *, *)                                 3857           3872          14         25.9          38.6       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 make_timestamp():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-prepare make_timestamp()                               358            367          11          2.8         358.0       1.0X
-make_timestamp(2019, 1, 2, 3, 4, 50.123456)             43             46           4         23.3          42.9       8.3X
-make_timestamp(2019, 1, 2, 3, 4, 60.000000)             37             40           3         26.8          37.3       9.6X
-make_timestamp(2019, 12, 31, 23, 59, 60.00)             37             48          10         27.3          36.6       9.8X
-make_timestamp(*, *, *, 3, 4, 50.123456)               170            171           2          5.9         169.9       2.1X
-make_timestamp(*, *, *, *, *, 0)                       113            116           3          8.9         112.5       3.2X
-make_timestamp(*, *, *, *, *, 60.0)                    158            162           4          6.3         158.2       2.3X
-make_timestamp(2019, 1, 2, *, *, *)                    478            479           1          2.1         477.9       0.7X
-make_timestamp(*, *, *, *, *, *)                       491            495           6          2.0         491.5       0.7X
+prepare make_timestamp()                               346            351           4          2.9         346.3       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)             39             43           4         25.8          38.8       8.9X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             42             48           8         23.8          42.0       8.2X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             33             37           6         29.9          33.4      10.4X
+make_timestamp(*, *, *, 3, 4, 50.123456)               160            162           1          6.2         160.2       2.2X
+make_timestamp(*, *, *, *, *, 0)                       103            109           6          9.7         102.6       3.4X
+make_timestamp(*, *, *, *, *, 60.0)                    144            148           5          6.9         144.2       2.4X
+make_timestamp(2019, 1, 2, *, *, *)                    422            424           3          2.4         422.0       0.8X
+make_timestamp(*, *, *, *, *, *)                       459            460           1          2.2         459.3       0.8X
 
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
index 34855593dd93f..acfb4fa3ddbb1 100644
--- a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
@@ -1,22 +1,22 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare make_date()                                2195           2443         263         45.5          22.0       1.0X
-make_date(2019, 9, 16)                             1806           1860          81         55.4          18.1       1.2X
-make_date(*, *, *)                                 4107           4186          74         24.4          41.1       0.5X
+prepare make_date()                                2164           2170           6         46.2          21.6       1.0X
+make_date(2019, 9, 16)                             1823           1836          11         54.9          18.2       1.2X
+make_date(*, *, *)                                 4074           4085          16         24.5          40.7       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 make_timestamp():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-prepare make_timestamp()                               354            364           9          2.8         354.3       1.0X
-make_timestamp(2019, 1, 2, 3, 4, 50.123456)             44             48           3         22.5          44.5       8.0X
-make_timestamp(2019, 1, 2, 3, 4, 60.000000)             48             53           4         20.8          48.1       7.4X
-make_timestamp(2019, 12, 31, 23, 59, 60.00)             34             37           4         29.3          34.1      10.4X
-make_timestamp(*, *, *, 3, 4, 50.123456)               146            158          16          6.9         146.0       2.4X
-make_timestamp(*, *, *, *, *, 0)                       107            114           9          9.4         106.5       3.3X
-make_timestamp(*, *, *, *, *, 60.0)                    145            150           8          6.9         144.7       2.4X
-make_timestamp(2019, 1, 2, *, *, *)                    453            454           2          2.2         452.7       0.8X
-make_timestamp(*, *, *, *, *, *)                       475            480           6          2.1         475.2       0.7X
+prepare make_timestamp()                               336            350          13          3.0         336.0       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)             44             50           8         22.6          44.2       7.6X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             37             43           8         26.8          37.4       9.0X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             37             38           0         26.8          37.3       9.0X
+make_timestamp(*, *, *, 3, 4, 50.123456)               155            159           5          6.4         155.2       2.2X
+make_timestamp(*, *, *, *, *, 0)                       109            111           4          9.2         109.1       3.1X
+make_timestamp(*, *, *, *, *, 60.0)                    141            144           5          7.1         140.9       2.4X
+make_timestamp(2019, 1, 2, *, *, *)                    451            452           1          2.2         451.1       0.7X
+make_timestamp(*, *, *, *, *, *)                       480            483           2          2.1         479.8       0.7X
 
diff --git a/sql/core/benchmarks/MetadataStructBenchmark-jdk21-results.txt b/sql/core/benchmarks/MetadataStructBenchmark-jdk21-results.txt
index 8f1696638d097..1b9b8408c42ed 100644
--- a/sql/core/benchmarks/MetadataStructBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/MetadataStructBenchmark-jdk21-results.txt
@@ -2,45 +2,45 @@
 Metadata Struct Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Vectorized Parquet:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                 614            644          14          8.1         122.7       1.0X
-_metadata.file_path                                 737            752          10          6.8         147.5       0.8X
-_metadata.file_name                                 737            751          17          6.8         147.3       0.8X
-_metadata.file_size                                 638            678          14          7.8         127.5       1.0X
-_metadata.file_block_start                          632            654          11          7.9         126.5       1.0X
-_metadata.file_block_length                         665            676           7          7.5         133.0       0.9X
-_metadata.file_modification_time                    636            655          13          7.9         127.1       1.0X
-_metadata.row_index                                 714            728           8          7.0         142.9       0.9X
-_metadata                                           966            993          15          5.2         193.2       0.6X
+no metadata columns                                 615            646          20          8.1         122.9       1.0X
+_metadata.file_path                                 731            748           9          6.8         146.2       0.8X
+_metadata.file_name                                 720            749          12          6.9         144.0       0.9X
+_metadata.file_size                                 668            682          10          7.5         133.7       0.9X
+_metadata.file_block_start                          670            679          11          7.5         134.0       0.9X
+_metadata.file_block_length                         668            679           8          7.5         133.6       0.9X
+_metadata.file_modification_time                    647            674           8          7.7         129.5       0.9X
+_metadata.row_index                                 702            721          11          7.1         140.4       0.9X
+_metadata                                           965            991          17          5.2         192.9       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet-mr:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                2804           2862          34          1.8         560.9       1.0X
-_metadata.file_path                                3567           3624          33          1.4         713.3       0.8X
-_metadata.file_name                                3614           3648          27          1.4         722.8       0.8X
-_metadata.file_size                                3459           3485          21          1.4         691.8       0.8X
-_metadata.file_block_start                         3460           3498          25          1.4         692.0       0.8X
-_metadata.file_block_length                        3396           3432          32          1.5         679.3       0.8X
-_metadata.file_modification_time                   3385           3416          19          1.5         677.1       0.8X
-_metadata.row_index                                3734           3762          18          1.3         746.8       0.8X
-_metadata                                          4804           4837          20          1.0         960.8       0.6X
+no metadata columns                                2555           2601          25          2.0         511.1       1.0X
+_metadata.file_path                                3338           3382          19          1.5         667.6       0.8X
+_metadata.file_name                                3325           3365          20          1.5         665.0       0.8X
+_metadata.file_size                                3141           3164          16          1.6         628.2       0.8X
+_metadata.file_block_start                         3123           3172          22          1.6         624.7       0.8X
+_metadata.file_block_length                        3138           3158          11          1.6         627.6       0.8X
+_metadata.file_modification_time                   3120           3152          31          1.6         624.1       0.8X
+_metadata.row_index                                3664           3700          23          1.4         732.9       0.7X
+_metadata                                          4819           4856          26          1.0         963.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 JSON:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                6626           6678          89          0.8        1325.2       1.0X
-_metadata.file_path                                7476           7498          14          0.7        1495.3       0.9X
-_metadata.file_name                                7468           7485          15          0.7        1493.7       0.9X
-_metadata.file_size                                7302           7326          18          0.7        1460.3       0.9X
-_metadata.file_block_start                         7303           7327          14          0.7        1460.5       0.9X
-_metadata.file_block_length                        7312           7337          14          0.7        1462.4       0.9X
-_metadata.file_modification_time                   7322           7340          11          0.7        1464.3       0.9X
-_metadata                                          8135           8155          14          0.6        1627.0       0.8X
+no metadata columns                                7218           7289         102          0.7        1443.5       1.0X
+_metadata.file_path                                8149           8178          19          0.6        1629.8       0.9X
+_metadata.file_name                                8137           8164          22          0.6        1627.5       0.9X
+_metadata.file_size                                7942           7964          14          0.6        1588.5       0.9X
+_metadata.file_block_start                         7916           7959          20          0.6        1583.3       0.9X
+_metadata.file_block_length                        7931           7958          17          0.6        1586.2       0.9X
+_metadata.file_modification_time                   7934           7956          16          0.6        1586.7       0.9X
+_metadata                                          8829           8857          27          0.6        1765.8       0.8X
 
 
diff --git a/sql/core/benchmarks/MetadataStructBenchmark-results.txt b/sql/core/benchmarks/MetadataStructBenchmark-results.txt
index 82429601dab29..bd14214994cd1 100644
--- a/sql/core/benchmarks/MetadataStructBenchmark-results.txt
+++ b/sql/core/benchmarks/MetadataStructBenchmark-results.txt
@@ -2,45 +2,45 @@
 Metadata Struct Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Vectorized Parquet:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                 650            670          20          7.7         129.9       1.0X
-_metadata.file_path                                 743            762          12          6.7         148.7       0.9X
-_metadata.file_name                                 742            752           7          6.7         148.4       0.9X
-_metadata.file_size                                 677            685           6          7.4         135.4       1.0X
-_metadata.file_block_start                          675            686          12          7.4         134.9       1.0X
-_metadata.file_block_length                         677            683           5          7.4         135.5       1.0X
-_metadata.file_modification_time                    673            682           7          7.4         134.7       1.0X
-_metadata.row_index                                 718            728           8          7.0         143.6       0.9X
-_metadata                                          1023           1033           6          4.9         204.6       0.6X
+no metadata columns                                 639            660          20          7.8         127.8       1.0X
+_metadata.file_path                                 745            760          13          6.7         148.9       0.9X
+_metadata.file_name                                 738            747           8          6.8         147.6       0.9X
+_metadata.file_size                                 672            678           4          7.4         134.3       1.0X
+_metadata.file_block_start                          671            678           4          7.4         134.3       1.0X
+_metadata.file_block_length                         670            677           5          7.5         134.0       1.0X
+_metadata.file_modification_time                    669            678           9          7.5         133.8       1.0X
+_metadata.row_index                                 726            731           4          6.9         145.1       0.9X
+_metadata                                          1022           1031           6          4.9         204.5       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Parquet-mr:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                2582           2621          41          1.9         516.4       1.0X
-_metadata.file_path                                3488           3505          12          1.4         697.5       0.7X
-_metadata.file_name                                3481           3502          10          1.4         696.1       0.7X
-_metadata.file_size                                3193           3223          18          1.6         638.6       0.8X
-_metadata.file_block_start                         3198           3217          15          1.6         639.7       0.8X
-_metadata.file_block_length                        3191           3216          19          1.6         638.2       0.8X
-_metadata.file_modification_time                   3188           3204          13          1.6         637.7       0.8X
-_metadata.row_index                                3714           3736          18          1.3         742.8       0.7X
-_metadata                                          4935           4958          24          1.0         986.9       0.5X
+no metadata columns                                2658           2694          18          1.9         531.7       1.0X
+_metadata.file_path                                3480           3504          16          1.4         696.1       0.8X
+_metadata.file_name                                3465           3486          17          1.4         693.0       0.8X
+_metadata.file_size                                3244           3268          18          1.5         648.8       0.8X
+_metadata.file_block_start                         3268           3291          15          1.5         653.6       0.8X
+_metadata.file_block_length                        3269           3296          33          1.5         653.8       0.8X
+_metadata.file_modification_time                   3275           3301          19          1.5         655.1       0.8X
+_metadata.row_index                                3727           3742          13          1.3         745.3       0.7X
+_metadata                                          4986           5019          24          1.0         997.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 JSON:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                7127           7171          21          0.7        1425.3       1.0X
-_metadata.file_path                                8122           8147          13          0.6        1624.4       0.9X
-_metadata.file_name                                8143           8166          24          0.6        1628.6       0.9X
-_metadata.file_size                                7914           7943          14          0.6        1582.8       0.9X
-_metadata.file_block_start                         7947           7978          16          0.6        1589.4       0.9X
-_metadata.file_block_length                        7964           7991          20          0.6        1592.8       0.9X
-_metadata.file_modification_time                   7950           7977          20          0.6        1590.1       0.9X
-_metadata                                          8869           8888          15          0.6        1773.7       0.8X
+no metadata columns                                7065           7085          12          0.7        1413.0       1.0X
+_metadata.file_path                                8095           8116          11          0.6        1619.0       0.9X
+_metadata.file_name                                8133           8148          10          0.6        1626.6       0.9X
+_metadata.file_size                                7787           7810          10          0.6        1557.3       0.9X
+_metadata.file_block_start                         7787           7803           9          0.6        1557.4       0.9X
+_metadata.file_block_length                        7774           7802          13          0.6        1554.8       0.9X
+_metadata.file_modification_time                   7788           7803          11          0.6        1557.6       0.9X
+_metadata                                          8705           8724          12          0.6        1741.0       0.8X
 
 
diff --git a/sql/core/benchmarks/MetricsAggregationBenchmark-jdk21-results.txt b/sql/core/benchmarks/MetricsAggregationBenchmark-jdk21-results.txt
index 220e9da2e088c..edc395387882b 100644
--- a/sql/core/benchmarks/MetricsAggregationBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/MetricsAggregationBenchmark-jdk21-results.txt
@@ -1,12 +1,12 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 metrics aggregation (50 metrics, 100000 tasks per stage):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-1 stage(s)                                                          565            623          52          0.0   564588687.0       1.0X
-2 stage(s)                                                         1318           1347          41          0.0  1318133868.0       0.4X
-3 stage(s)                                                         2044           2136         130          0.0  2043877303.0       0.3X
+1 stage(s)                                                          602            678          78          0.0   601969935.0       1.0X
+2 stage(s)                                                         1150           1277         180          0.0  1149781938.0       0.5X
+3 stage(s)                                                         1992           2072         113          0.0  1992188122.0       0.3X
 
 Stage Count    Stage Proc. Time    Aggreg. Time
-     1              339                58
-     2              333                213
-     3              376                256
+     1              339                53
+     2              344                156
+     3              387                299
diff --git a/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt b/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
index 5ca7125aa3bc0..d3e2611541552 100644
--- a/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
+++ b/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
@@ -1,12 +1,12 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 metrics aggregation (50 metrics, 100000 tasks per stage):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-1 stage(s)                                                          684            710          28          0.0   683720517.0       1.0X
-2 stage(s)                                                         1368           1407          55          0.0  1367925138.0       0.5X
-3 stage(s)                                                         1606           1850         346          0.0  1605768734.0       0.4X
+1 stage(s)                                                          667            689          20          0.0   666605773.0       1.0X
+2 stage(s)                                                         1366           1403          52          0.0  1366457850.0       0.5X
+3 stage(s)                                                         2087           2127          57          0.0  2086587364.0       0.3X
 
 Stage Count    Stage Proc. Time    Aggreg. Time
-     1              306                92
-     2              437                150
-     3              368                219
+     1              388                98
+     2              346                232
+     3              384                244
diff --git a/sql/core/benchmarks/MiscBenchmark-jdk21-results.txt b/sql/core/benchmarks/MiscBenchmark-jdk21-results.txt
index 7228d0a184011..0bccc882d5d0d 100644
--- a/sql/core/benchmarks/MiscBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/MiscBenchmark-jdk21-results.txt
@@ -2,126 +2,126 @@
 filter & aggregate without group
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/filter/sum wholestage off                   36694          36725          44         57.2          17.5       1.0X
-range/filter/sum wholestage on                     3483           3597          69        602.2           1.7      10.5X
+range/filter/sum wholestage off                   35356          36325        1371         59.3          16.9       1.0X
+range/filter/sum wholestage on                     2827           3470         362        741.9           1.3      12.5X
 
 
 ================================================================================================
 range/limit/sum
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/limit/sum wholestage off                       70             95          35       7458.5           0.1       1.0X
-range/limit/sum wholestage on                        66             82          13       7909.4           0.1       1.1X
+range/limit/sum wholestage off                       59             60           1       8883.8           0.1       1.0X
+range/limit/sum wholestage on                        66             70           7       7984.2           0.1       0.9X
 
 
 ================================================================================================
 sample
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sample with replacement wholestage off             8132           8156          35         16.1          62.0       1.0X
-sample with replacement wholestage on              5075           5185         154         25.8          38.7       1.6X
+sample with replacement wholestage off             7811           7961         212         16.8          59.6       1.0X
+sample with replacement wholestage on              5125           5152          40         25.6          39.1       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sample without replacement:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-sample without replacement wholestage off           1885           1893          12         69.5          14.4       1.0X
-sample without replacement wholestage on             651            668          20        201.2           5.0       2.9X
+sample without replacement wholestage off           1837           1839           3         71.4          14.0       1.0X
+sample without replacement wholestage on             660            672          10        198.5           5.0       2.8X
 
 
 ================================================================================================
 collect
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect 1 million                                   159            232          53          6.6         151.6       1.0X
-collect 2 millions                                  295            441          85          3.6         281.2       0.5X
-collect 4 millions                                  818            832          12          1.3         780.2       0.2X
+collect 1 million                                   159            228          64          6.6         151.8       1.0X
+collect 2 millions                                  331            404          65          3.2         316.1       0.5X
+collect 4 millions                                  743            912         148          1.4         708.4       0.2X
 
 
 ================================================================================================
 collect limit
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect limit 1 million                             147            224          53          7.1         140.6       1.0X
-collect limit 2 millions                            301            404          86          3.5         287.3       0.5X
+collect limit 1 million                             161            240          54          6.5         153.6       1.0X
+collect limit 2 millions                            302            421          79          3.5         287.8       0.5X
 
 
 ================================================================================================
 generate explode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode array wholestage off             12316          12347          45          1.4         734.1       1.0X
-generate explode array wholestage on               2800           2856          69          6.0         166.9       4.4X
+generate explode array wholestage off             12439          12453          20          1.3         741.4       1.0X
+generate explode array wholestage on               2842           3047         135          5.9         169.4       4.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode map wholestage off               23670          23767         138          0.7        1410.8       1.0X
-generate explode map wholestage on                 9745           9872         100          1.7         580.8       2.4X
+generate explode map wholestage off               23498          23624         178          0.7        1400.6       1.0X
+generate explode map wholestage on                 9976          10151         128          1.7         594.6       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate posexplode array wholestage off          12583          12709         178          1.3         750.0       1.0X
-generate posexplode array wholestage on            2992           3053          67          5.6         178.3       4.2X
+generate posexplode array wholestage off          12883          13108         318          1.3         767.9       1.0X
+generate posexplode array wholestage on            2971           3070          66          5.6         177.1       4.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate inline array wholestage off               6914           6972          81          2.4         412.1       1.0X
-generate inline array wholestage on                2418           2524          90          6.9         144.1       2.9X
+generate inline array wholestage off               7289           7311          31          2.3         434.5       1.0X
+generate inline array wholestage on                2378           2456         100          7.1         141.7       3.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate big struct array wholestage off            200            216          22          0.3        3334.6       1.0X
-generate big struct array wholestage on             164            180          13          0.4        2733.2       1.2X
+generate big struct array wholestage off            191            208          25          0.3        3181.0       1.0X
+generate big struct array wholestage on             155            179          24          0.4        2575.5       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate big nested struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-generate big nested struct array wholestage off          18684          18738          77          0.0      311401.3       1.0X
-generate big nested struct array wholestage on             152            161          10          0.4        2533.3     122.9X
+generate big nested struct array wholestage off          17003          17476         669          0.0      283383.8       1.0X
+generate big nested struct array wholestage on             146            149           3          0.4        2436.9     116.3X
 
 
 ================================================================================================
 generate regular generator
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate stack wholestage off                     14179          14202          33          1.2         845.1       1.0X
-generate stack wholestage on                       3091           3114          26          5.4         184.2       4.6X
+generate stack wholestage off                     14560          14589          42          1.2         867.8       1.0X
+generate stack wholestage on                       3365           3428          53          5.0         200.6       4.3X
 
 
diff --git a/sql/core/benchmarks/MiscBenchmark-results.txt b/sql/core/benchmarks/MiscBenchmark-results.txt
index 8a3e9921dbe4b..8df4b3a8c4d0a 100644
--- a/sql/core/benchmarks/MiscBenchmark-results.txt
+++ b/sql/core/benchmarks/MiscBenchmark-results.txt
@@ -2,126 +2,126 @@
 filter & aggregate without group
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/filter/sum wholestage off                   33428          35668        3169         62.7          15.9       1.0X
-range/filter/sum wholestage on                     2842           3756         511        737.8           1.4      11.8X
+range/filter/sum wholestage off                   36850          37568        1016         56.9          17.6       1.0X
+range/filter/sum wholestage on                     3456           3738         158        606.8           1.6      10.7X
 
 
 ================================================================================================
 range/limit/sum
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/limit/sum wholestage off                       98            107          13       5332.3           0.2       1.0X
-range/limit/sum wholestage on                        67             77          11       7806.1           0.1       1.5X
+range/limit/sum wholestage off                      111            116           7       4736.2           0.2       1.0X
+range/limit/sum wholestage on                        91             94           3       5739.7           0.2       1.2X
 
 
 ================================================================================================
 sample
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sample with replacement wholestage off             8058           8215         221         16.3          61.5       1.0X
-sample with replacement wholestage on              4994           5005           8         26.2          38.1       1.6X
+sample with replacement wholestage off             7793           7904         158         16.8          59.5       1.0X
+sample with replacement wholestage on              4935           5027          54         26.6          37.7       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 sample without replacement:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-sample without replacement wholestage off           1974           1983          13         66.4          15.1       1.0X
-sample without replacement wholestage on             701            713          12        186.9           5.4       2.8X
+sample without replacement wholestage off           1857           1874          24         70.6          14.2       1.0X
+sample without replacement wholestage on             707            721          14        185.3           5.4       2.6X
 
 
 ================================================================================================
 collect
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect 1 million                                   170            227          81          6.2         161.8       1.0X
-collect 2 millions                                  361            470          71          2.9         344.1       0.5X
-collect 4 millions                                  727            753          33          1.4         693.1       0.2X
+collect 1 million                                   161            231          79          6.5         153.9       1.0X
+collect 2 millions                                  307            439          78          3.4         292.6       0.5X
+collect 4 millions                                  692            771          82          1.5         660.2       0.2X
 
 
 ================================================================================================
 collect limit
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect limit 1 million                             153            229         120          6.9         145.6       1.0X
-collect limit 2 millions                            283            420         139          3.7         269.6       0.5X
+collect limit 1 million                             156            223          80          6.7         148.9       1.0X
+collect limit 2 millions                            290            412         108          3.6         276.8       0.5X
 
 
 ================================================================================================
 generate explode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode array wholestage off             13769          13835          94          1.2         820.7       1.0X
-generate explode array wholestage on               2901           2973          67          5.8         172.9       4.7X
+generate explode array wholestage off             12014          12148         189          1.4         716.1       1.0X
+generate explode array wholestage on               2952           3034          65          5.7         175.9       4.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode map wholestage off               25526          25775         352          0.7        1521.4       1.0X
-generate explode map wholestage on                 9201           9259          68          1.8         548.4       2.8X
+generate explode map wholestage off               24447          24465          25          0.7        1457.1       1.0X
+generate explode map wholestage on                 9883           9992          65          1.7         589.1       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate posexplode array wholestage off          14288          14334          65          1.2         851.6       1.0X
-generate posexplode array wholestage on            2959           3006          49          5.7         176.3       4.8X
+generate posexplode array wholestage off          12403          12597         274          1.4         739.3       1.0X
+generate posexplode array wholestage on            2935           3002          62          5.7         174.9       4.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate inline array wholestage off               7176           7181           8          2.3         427.7       1.0X
-generate inline array wholestage on                2383           2471          61          7.0         142.0       3.0X
+generate inline array wholestage off               6683           6740          81          2.5         398.4       1.0X
+generate inline array wholestage on                2455           2517          45          6.8         146.4       2.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate big struct array wholestage off            209            296         123          0.3        3483.4       1.0X
-generate big struct array wholestage on             178            188          11          0.3        2965.6       1.2X
+generate big struct array wholestage off            236            250          20          0.3        3938.8       1.0X
+generate big struct array wholestage on             185            217          27          0.3        3089.8       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate big nested struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-generate big nested struct array wholestage off          18690          20677        2809          0.0      311503.1       1.0X
-generate big nested struct array wholestage on             172            186          15          0.3        2860.7     108.9X
+generate big nested struct array wholestage off          18566          21321        3897          0.0      309426.4       1.0X
+generate big nested struct array wholestage on             188            206          33          0.3        3132.8      98.8X
 
 
 ================================================================================================
 generate regular generator
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate stack wholestage off                     15372          15414          60          1.1         916.2       1.0X
-generate stack wholestage on                       3053           3069          15          5.5         182.0       5.0X
+generate stack wholestage off                     13373          13401          39          1.3         797.1       1.0X
+generate stack wholestage on                       3053           3068          13          5.5         182.0       4.4X
 
 
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk21-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk21-results.txt
index d54a37baa5770..922ec22d5e0af 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk21-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v1
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     64             85          12         15.7          63.6       1.0X
-Nested column                                        58             65           7         17.3          57.9       1.1X
-Nested column in array                              165            170           5          6.1         164.6       0.4X
+Top-level column                                     57             69           8         17.6          56.7       1.0X
+Nested column                                        55             64           6         18.1          55.3       1.0X
+Nested column in array                              165            174           6          6.1         165.3       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    236            264          18          4.2         235.7       1.0X
-Nested column                                       241            259          11          4.2         240.7       1.0X
-Nested column in array                              518            537          11          1.9         518.3       0.5X
+Top-level column                                    237            250          15          4.2         236.6       1.0X
+Nested column                                       241            255          12          4.1         241.1       1.0X
+Nested column in array                              527            544          16          1.9         527.0       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    219            230           7          4.6         218.5       1.0X
-Nested column                                       224            235          10          4.5         223.6       1.0X
-Nested column in array                              483            487           5          2.1         482.6       0.5X
+Top-level column                                    207            219           8          4.8         206.6       1.0X
+Nested column                                       218            229           7          4.6         218.1       0.9X
+Nested column in array                              476            480           4          2.1         476.0       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    218            226           5          4.6         218.5       1.0X
-Nested column                                       240            251           7          4.2         240.2       0.9X
-Nested column in array                              511            515           4          2.0         510.7       0.4X
+Top-level column                                    207            219           8          4.8         206.6       1.0X
+Nested column                                       228            244          15          4.4         227.7       0.9X
+Nested column in array                              504            508           4          2.0         503.9       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     65             78          13         15.5          64.6       1.0X
-Nested column                                        74             89          15         13.5          74.0       0.9X
-Nested column in array                              200            219          16          5.0         199.9       0.3X
+Top-level column                                     65             91          21         15.3          65.2       1.0X
+Nested column                                        75             86           6         13.4          74.6       0.9X
+Nested column in array                              207            234          16          4.8         206.6       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    302            318          22          3.3         301.5       1.0X
-Nested column                                       353            368          17          2.8         352.9       0.9X
-Nested column in array                              720            755          35          1.4         720.5       0.4X
+Top-level column                                    292            305          14          3.4         291.9       1.0X
+Nested column                                       344            357          18          2.9         344.2       0.8X
+Nested column in array                              713            760          41          1.4         712.9       0.4X
 
 
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
index 77a9e92525691..262ce4adc26e0 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v1
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     67             83          11         14.8          67.4       1.0X
-Nested column                                        58             65           7         17.3          57.6       1.2X
-Nested column in array                              158            163           5          6.3         158.2       0.4X
+Top-level column                                     64             82          12         15.7          63.7       1.0X
+Nested column                                        58             67           6         17.2          58.1       1.1X
+Nested column in array                              162            169           4          6.2         161.6       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    238            263          18          4.2         237.5       1.0X
-Nested column                                       237            255          12          4.2         237.2       1.0X
-Nested column in array                              523            534           6          1.9         523.3       0.5X
+Top-level column                                    231            253          18          4.3         230.7       1.0X
+Nested column                                       233            252          13          4.3         233.0       1.0X
+Nested column in array                              501            520          19          2.0         500.6       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    212            221           8          4.7         212.3       1.0X
-Nested column                                       219            230           9          4.6         219.4       1.0X
-Nested column in array                              470            477           5          2.1         470.4       0.5X
+Top-level column                                    207            213           4          4.8         207.1       1.0X
+Nested column                                       210            219           6          4.8         209.8       1.0X
+Nested column in array                              448            452           2          2.2         448.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    213            218           6          4.7         213.2       1.0X
-Nested column                                       237            245           7          4.2         236.6       0.9X
-Nested column in array                              504            510           6          2.0         503.8       0.4X
+Top-level column                                    202            211           8          4.9         202.3       1.0X
+Nested column                                       227            233           5          4.4         227.4       0.9X
+Nested column in array                              480            487           7          2.1         480.2       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     66             78           8         15.1          66.4       1.0X
-Nested column                                        73             84          12         13.7          73.0       0.9X
-Nested column in array                              202            234          31          4.9         202.3       0.3X
+Top-level column                                     61             77          10         16.5          60.7       1.0X
+Nested column                                        74             90          15         13.4          74.5       0.8X
+Nested column in array                              218            239          12          4.6         218.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    304            317          12          3.3         303.9       1.0X
-Nested column                                       350            358           7          2.9         350.3       0.9X
-Nested column in array                              722            730           7          1.4         721.8       0.4X
+Top-level column                                    289            300           9          3.5         289.2       1.0X
+Nested column                                       340            348           4          2.9         340.4       0.8X
+Nested column in array                              685            699          11          1.5         685.2       0.4X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk21-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk21-results.txt
index 5585eabfe717b..4676ae5a6bb3c 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk21-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     66             84          14         15.1          66.0       1.0X
-Nested column                                        57             68           7         17.6          56.7       1.2X
-Nested column in array                              167            173           5          6.0         166.9       0.4X
+Top-level column                                     59             73           9         17.1          58.5       1.0X
+Nested column                                        58             67           7         17.4          57.5       1.0X
+Nested column in array                              171            177           4          5.9         170.9       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    243            260          12          4.1         242.6       1.0X
-Nested column                                       245            260          12          4.1         244.9       1.0X
-Nested column in array                              515            532          12          1.9         514.8       0.5X
+Top-level column                                    233            256          18          4.3         232.9       1.0X
+Nested column                                       229            241           8          4.4         229.5       1.0X
+Nested column in array                              520            551          30          1.9         520.3       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    218            230           7          4.6         218.1       1.0X
-Nested column                                       223            234          14          4.5         223.4       1.0X
-Nested column in array                              472            484           7          2.1         472.1       0.5X
+Top-level column                                    203            213           6          4.9         203.0       1.0X
+Nested column                                       208            219           6          4.8         207.8       1.0X
+Nested column in array                              467            476           4          2.1         467.3       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    213            231          14          4.7         213.3       1.0X
-Nested column                                       240            252           8          4.2         240.1       0.9X
-Nested column in array                              510            514           3          2.0         509.7       0.4X
+Top-level column                                    201            206           5          5.0         201.2       1.0X
+Nested column                                       238            244           4          4.2         238.0       0.8X
+Nested column in array                              504            522          30          2.0         503.6       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     65             77          10         15.3          65.3       1.0X
-Nested column                                        74             89          15         13.6          73.7       0.9X
-Nested column in array                              206            242          28          4.8         206.2       0.3X
+Top-level column                                     53             76          10         18.9          53.0       1.0X
+Nested column                                        73             87           9         13.6          73.5       0.7X
+Nested column in array                              208            247          24          4.8         208.4       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    301            316          21          3.3         300.7       1.0X
-Nested column                                       361            378          21          2.8         360.9       0.8X
-Nested column in array                              723            765          42          1.4         722.8       0.4X
+Top-level column                                    289            297          12          3.5         289.3       1.0X
+Nested column                                       334            344          14          3.0         333.8       0.9X
+Nested column in array                              702            767          41          1.4         701.8       0.4X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
index e2eba2b51fb49..c7825eb9f4a8f 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     66             83          11         15.2          66.0       1.0X
-Nested column                                        60             68           7         16.6          60.4       1.1X
-Nested column in array                              165            170           3          6.1         164.6       0.4X
+Top-level column                                     62             78          12         16.2          61.8       1.0X
+Nested column                                        58             65           5         17.3          57.8       1.1X
+Nested column in array                              164            173           6          6.1         164.1       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    252            265          16          4.0         252.0       1.0X
-Nested column                                       243            258          14          4.1         242.8       1.0X
-Nested column in array                              503            532          14          2.0         503.4       0.5X
+Top-level column                                    232            252          15          4.3         231.9       1.0X
+Nested column                                       230            243           8          4.3         229.9       1.0X
+Nested column in array                              498            525          22          2.0         497.6       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    216            221           5          4.6         216.1       1.0X
-Nested column                                       221            228           5          4.5         220.8       1.0X
-Nested column in array                              471            476           3          2.1         470.9       0.5X
+Top-level column                                    210            215           6          4.8         209.9       1.0X
+Nested column                                       210            220           9          4.8         210.3       1.0X
+Nested column in array                              449            455           5          2.2         449.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    213            219           5          4.7         213.2       1.0X
-Nested column                                       238            247           7          4.2         237.9       0.9X
-Nested column in array                              504            510           5          2.0         503.9       0.4X
+Top-level column                                    204            215          23          4.9         203.7       1.0X
+Nested column                                       227            232           4          4.4         226.9       0.9X
+Nested column in array                              483            491           5          2.1         483.4       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     60             76           7         16.5          60.4       1.0X
-Nested column                                        71             89          15         14.2          70.5       0.9X
-Nested column in array                              202            242          47          4.9         202.5       0.3X
+Top-level column                                     69             88          23         14.5          68.8       1.0X
+Nested column                                        73             92          16         13.8          72.7       0.9X
+Nested column in array                              223            242          15          4.5         223.2       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    309            317          12          3.2         309.4       1.0X
-Nested column                                       355            362           5          2.8         354.8       0.9X
-Nested column in array                              710            732          19          1.4         710.5       0.4X
+Top-level column                                    290            300           7          3.4         290.0       1.0X
+Nested column                                       342            348           5          2.9         341.9       0.8X
+Nested column in array                              707            714           7          1.4         706.7       0.4X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk21-results.txt b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk21-results.txt
index 743331fb4dae2..11fbaf8abd6cd 100644
--- a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk21-results.txt
@@ -1,21 +1,21 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Can skip all row groups:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                  6375           6430          47         16.4          60.8       1.0X
-With nested predicate Pushdown                       50             65          14       2093.7           0.5     127.3X
+Without nested predicate Pushdown                  6543           6575          34         16.0          62.4       1.0X
+With nested predicate Pushdown                       53             69           9       1979.9           0.5     123.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Can skip some row groups:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                  6877           6916          20         15.2          65.6       1.0X
-With nested predicate Pushdown                       45             60          10       2345.3           0.4     153.8X
+Without nested predicate Pushdown                  7146           7174          30         14.7          68.1       1.0X
+With nested predicate Pushdown                       48             61          11       2176.9           0.5     148.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Can skip no row groups:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 13281          13345          48          7.9         126.7       1.0X
-With nested predicate Pushdown                    13310          13352          34          7.9         126.9       1.0X
+Without nested predicate Pushdown                 13410          13505          61          7.8         127.9       1.0X
+With nested predicate Pushdown                    13459          13550          71          7.8         128.4       1.0X
 
diff --git a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt
index f6a914114a017..aecea8e69fe1d 100644
--- a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt
+++ b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt
@@ -1,21 +1,21 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Can skip all row groups:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                  7157           7297          99         14.7          68.3       1.0X
-With nested predicate Pushdown                       82             99          13       1279.1           0.8      87.3X
+Without nested predicate Pushdown                  7218           7274          50         14.5          68.8       1.0X
+With nested predicate Pushdown                       57             78          12       1844.1           0.5     126.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Can skip some row groups:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                  7499           7833         119         14.0          71.5       1.0X
-With nested predicate Pushdown                       61             74           6       1714.3           0.6     122.6X
+Without nested predicate Pushdown                  7511           7878         130         14.0          71.6       1.0X
+With nested predicate Pushdown                       68             79          10       1535.8           0.7     110.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Can skip no row groups:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 14150          14216          75          7.4         134.9       1.0X
-With nested predicate Pushdown                    14150          14221          50          7.4         134.9       1.0X
+Without nested predicate Pushdown                 14244          14289          41          7.4         135.8       1.0X
+With nested predicate Pushdown                    14288          14318          24          7.3         136.3       1.0X
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk21-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk21-results.txt
index 9f48b560d615a..e2dde58903157 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk21-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For Parquet
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     66             82          12         15.2          66.0       1.0X
-Nested column                                        67             77           6         15.0          66.6       1.0X
-Nested column in array                              230            239           8          4.4         229.7       0.3X
+Top-level column                                     65             77          12         15.5          64.7       1.0X
+Nested column                                        66             74          12         15.1          66.3       1.0X
+Nested column in array                              221            255          19          4.5         220.9       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    248            266          13          4.0         247.7       1.0X
-Nested column                                       242            259          12          4.1         242.3       1.0X
-Nested column in array                              564            594          22          1.8         563.9       0.4X
+Top-level column                                    242            257          11          4.1         241.8       1.0X
+Nested column                                       240            258          17          4.2         239.7       1.0X
+Nested column in array                              560            588          25          1.8         560.5       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    218            229           7          4.6         218.4       1.0X
-Nested column                                       222            234          10          4.5         221.8       1.0X
-Nested column in array                              521            537           8          1.9         521.2       0.4X
+Top-level column                                    218            227          14          4.6         218.1       1.0X
+Nested column                                       220            230          14          4.5         219.9       1.0X
+Nested column in array                              509            525          14          2.0         509.5       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    216            224           7          4.6         216.4       1.0X
-Nested column                                       237            251          10          4.2         236.9       0.9X
-Nested column in array                              559            568           7          1.8         558.7       0.4X
+Top-level column                                    216            228          13          4.6         215.9       1.0X
+Nested column                                       242            254          12          4.1         241.8       0.9X
+Nested column in array                              542            559          16          1.8         542.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     74             91          10         13.4          74.4       1.0X
-Nested column                                        81             98          12         12.3          81.2       0.9X
-Nested column in array                              264            287          18          3.8         264.1       0.3X
+Top-level column                                     73             85          13         13.8          72.6       1.0X
+Nested column                                        75             96          17         13.3          75.2       1.0X
+Nested column in array                              274            310          31          3.6         274.3       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    303            370          49          3.3         302.9       1.0X
-Nested column                                       432            447          11          2.3         432.0       0.7X
-Nested column in array                              779            833          37          1.3         779.2       0.4X
+Top-level column                                    298            314          22          3.4         298.2       1.0X
+Nested column                                       339            353          17          2.9         339.3       0.9X
+Nested column in array                              757            781          30          1.3         757.3       0.4X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
index 094a254580f30..87e3441303e76 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For Parquet
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     68             84          11         14.6          68.4       1.0X
-Nested column                                        69             76           5         14.5          69.2       1.0X
-Nested column in array                              224            229           5          4.5         224.0       0.3X
+Top-level column                                     75             98          14         13.3          75.0       1.0X
+Nested column                                        67             77           8         15.0          66.8       1.1X
+Nested column in array                              226            233           6          4.4         225.7       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    241            267          25          4.1         241.1       1.0X
-Nested column                                       244            258          10          4.1         243.8       1.0X
-Nested column in array                              562            583          18          1.8         562.2       0.4X
+Top-level column                                    250            274          20          4.0         250.3       1.0X
+Nested column                                       244            261          14          4.1         244.3       1.0X
+Nested column in array                              552            569           8          1.8         551.9       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    220            226           3          4.5         220.2       1.0X
-Nested column                                       223            230           8          4.5         223.0       1.0X
-Nested column in array                              525            530           4          1.9         525.2       0.4X
+Top-level column                                    217            223           5          4.6         217.3       1.0X
+Nested column                                       223            231           6          4.5         222.8       1.0X
+Nested column in array                              513            521          10          1.9         513.1       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    216            221           5          4.6         216.0       1.0X
-Nested column                                       242            252           6          4.1         242.0       0.9X
-Nested column in array                              561            565           4          1.8         561.1       0.4X
+Top-level column                                    215            220           3          4.7         215.0       1.0X
+Nested column                                       240            244           3          4.2         239.7       0.9X
+Nested column in array                              551            559          10          1.8         551.1       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     74             84           8         13.5          73.8       1.0X
-Nested column                                        79             94          12         12.6          79.4       0.9X
-Nested column in array                              263            283          13          3.8         263.3       0.3X
+Top-level column                                     78             91          12         12.8          78.3       1.0X
+Nested column                                        84             96           9         11.9          84.0       0.9X
+Nested column in array                              285            301          11          3.5         284.6       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    309            323          11          3.2         309.0       1.0X
-Nested column                                       358            367           7          2.8         358.3       0.9X
-Nested column in array                              771            793          23          1.3         770.7       0.4X
+Top-level column                                    303            315          10          3.3         302.6       1.0X
+Nested column                                       356            360           3          2.8         356.5       0.8X
+Nested column in array                              759            778          14          1.3         758.8       0.4X
 
 
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk21-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk21-results.txt
index 3c57cee485c54..fec73347a3711 100644
--- a/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 Write primitive arrays in dataset
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                 211            241          25         39.8          25.2       1.0X
-Double                                              287            303          12         29.3          34.2       0.7X
+Int                                                 165            203          26         50.9          19.7       1.0X
+Double                                              226            252          21         37.1          27.0       0.7X
 
 
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
index 1f8ea79f262be..50acc6af2e6f9 100644
--- a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
@@ -2,11 +2,11 @@
 Write primitive arrays in dataset
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                 256            291          24         32.8          30.5       1.0X
-Double                                              305            327          15         27.5          36.4       0.8X
+Int                                                 199            230          18         42.2          23.7       1.0X
+Double                                              282            290           8         29.7          33.6       0.7X
 
 
diff --git a/sql/core/benchmarks/RangeBenchmark-jdk21-results.txt b/sql/core/benchmarks/RangeBenchmark-jdk21-results.txt
index 33fa0ff972d15..fe289c99daef8 100644
--- a/sql/core/benchmarks/RangeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/RangeBenchmark-jdk21-results.txt
@@ -2,14 +2,14 @@
 range
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-full scan                                         10051          10241         242         52.2          19.2       1.0X
-limit after range                                    36             48          10      14566.1           0.1     279.3X
-filter after range                                 1003           1012           7        522.9           1.9      10.0X
-count after range                                   344            371          42       1522.3           0.7      29.2X
-count after limit after range                        40             50          10      13166.5           0.1     252.4X
+full scan                                         10142          10196          93         51.7          19.3       1.0X
+limit after range                                    44             47           3      12012.9           0.1     232.4X
+filter after range                                 1005           1019          15        521.5           1.9      10.1X
+count after range                                   345            354           9       1519.4           0.7      29.4X
+count after limit after range                        48             52           4      10846.5           0.1     209.8X
 
 
diff --git a/sql/core/benchmarks/RangeBenchmark-results.txt b/sql/core/benchmarks/RangeBenchmark-results.txt
index faca550c9e2d5..fdae5cc68e8ce 100644
--- a/sql/core/benchmarks/RangeBenchmark-results.txt
+++ b/sql/core/benchmarks/RangeBenchmark-results.txt
@@ -2,14 +2,14 @@
 range
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-full scan                                          9920          10204         190         52.9          18.9       1.0X
-limit after range                                    44             51           7      11786.7           0.1     223.0X
-filter after range                                 1011           1031          16        518.5           1.9       9.8X
-count after range                                   370            372           2       1417.0           0.7      26.8X
-count after limit after range                        47             49           1      11082.6           0.1     209.7X
+full scan                                          9433           9964         355         55.6          18.0       1.0X
+limit after range                                    53             56           2       9946.9           0.1     179.0X
+filter after range                                 1004           1039          48        522.3           1.9       9.4X
+count after range                                   348            353           6       1508.6           0.7      27.1X
+count after limit after range                        65             72          10       8068.8           0.1     145.2X
 
 
diff --git a/sql/core/benchmarks/SetOperationsBenchmark-jdk21-results.txt b/sql/core/benchmarks/SetOperationsBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..37a2d749eb195
--- /dev/null
+++ b/sql/core/benchmarks/SetOperationsBenchmark-jdk21-results.txt
@@ -0,0 +1,13 @@
+================================================================================================
+Set Operations Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
+Parsing + Analysis:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UNION ALL                                           319            415         124          0.0       21283.9       1.0X
+EXCEPT ALL                                          259            266           7          0.1       17287.8       1.2X
+INTERSECT ALL                                       257            263           4          0.1       17101.2       1.2X
+
+
diff --git a/sql/core/benchmarks/SetOperationsBenchmark-results.txt b/sql/core/benchmarks/SetOperationsBenchmark-results.txt
new file mode 100644
index 0000000000000..cb944d1d61fd0
--- /dev/null
+++ b/sql/core/benchmarks/SetOperationsBenchmark-results.txt
@@ -0,0 +1,13 @@
+================================================================================================
+Set Operations Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
+AMD EPYC 7763 64-Core Processor
+Parsing + Analysis:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UNION ALL                                           325            445          92          0.0       21641.7       1.0X
+EXCEPT ALL                                          272            277           5          0.1       18110.7       1.2X
+INTERSECT ALL                                       269            276           6          0.1       17938.3       1.2X
+
+
diff --git a/sql/core/benchmarks/SortBenchmark-jdk21-results.txt b/sql/core/benchmarks/SortBenchmark-jdk21-results.txt
index 4125c72bce4ab..683aa1f400f5c 100644
--- a/sql/core/benchmarks/SortBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/SortBenchmark-jdk21-results.txt
@@ -2,15 +2,15 @@
 radix sort
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-reference TimSort key prefix array                 8456           8460           5          3.0         338.3       1.0X
-reference Arrays.sort                              2041           2067          37         12.2          81.6       4.1X
-radix sort one byte                                  68             76           8        368.5           2.7     124.6X
-radix sort two bytes                                125            133           7        200.4           5.0      67.8X
-radix sort eight bytes                              479            494          17         52.2          19.2      17.7X
-radix sort key prefix array                         564            584          33         44.3          22.6      15.0X
+reference TimSort key prefix array                 8456           9400        1334          3.0         338.3       1.0X
+reference Arrays.sort                              2181           2207          37         11.5          87.2       3.9X
+radix sort one byte                                  67             72           4        372.6           2.7     126.0X
+radix sort two bytes                                123            129           6        202.7           4.9      68.6X
+radix sort eight bytes                              473            478           6         52.9          18.9      17.9X
+radix sort key prefix array                         575            585          10         43.4          23.0      14.7X
 
 
diff --git a/sql/core/benchmarks/SortBenchmark-results.txt b/sql/core/benchmarks/SortBenchmark-results.txt
index 533049edd2237..49205fbe84798 100644
--- a/sql/core/benchmarks/SortBenchmark-results.txt
+++ b/sql/core/benchmarks/SortBenchmark-results.txt
@@ -2,15 +2,15 @@
 radix sort
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-reference TimSort key prefix array                 8140           8157          23          3.1         325.6       1.0X
-reference Arrays.sort                              2063           2087          35         12.1          82.5       3.9X
-radix sort one byte                                  64             73           6        393.0           2.5     128.0X
-radix sort two bytes                                116            129           8        216.1           4.6      70.4X
-radix sort eight bytes                              454            475          16         55.1          18.2      17.9X
-radix sort key prefix array                         885            896          11         28.3          35.4       9.2X
+reference TimSort key prefix array                 8207           8241          48          3.0         328.3       1.0X
+reference Arrays.sort                              2069           2095          37         12.1          82.8       4.0X
+radix sort one byte                                  62             71           5        400.4           2.5     131.4X
+radix sort two bytes                                127            137           6        196.3           5.1      64.4X
+radix sort eight bytes                              488            500           8         51.2          19.5      16.8X
+radix sort key prefix array                         929            930           2         26.9          37.1       8.8X
 
 
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk21-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk21-results.txt
index 4ab5f6d0061cc..9ec0af2d17a71 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk21-results.txt
@@ -2,143 +2,143 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                           10             14           1          1.0        1006.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              43             45           2          0.2        4345.4       0.2X
-RocksDB (trackTotalNumberOfRows: false)                             15             17           1          0.6        1547.6       0.7X
+In-memory                                                           10             12           1          1.0        1034.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              45             47           2          0.2        4504.4       0.2X
+RocksDB (trackTotalNumberOfRows: false)                             16             17           1          0.6        1617.3       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                         10             12           1          1.0        1011.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            44             46           1          0.2        4441.2       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1521.7       0.7X
+In-memory                                                          9             11           1          1.1         903.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            46             47           1          0.2        4592.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           16             17           1          0.6        1614.6       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             10           1          1.1         940.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            44             46           1          0.2        4425.1       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1515.2       0.6X
+In-memory                                                          9             11           1          1.1         941.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            46             47           1          0.2        4572.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           16             17           1          0.6        1589.0       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      9             11           2          1.1         932.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        44             46           1          0.2        4400.3       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       15             17           1          0.7        1506.0       0.6X
+In-memory                                                      8              9           1          1.3         797.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        45             46           1          0.2        4468.9       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       16             17           1          0.6        1575.2       0.5X
 
 
 ================================================================================================
 merge rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                    532            547           8          0.0       53154.1       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                   174            180           3          0.1       17410.5       3.1X
+RocksDB (trackTotalNumberOfRows: true)                                                    566            584           6          0.0       56623.9       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                   175            185           3          0.1       17469.9       3.2X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                  472            484           5          0.0       47228.8       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                 174            179           3          0.1       17433.5       2.7X
+RocksDB (trackTotalNumberOfRows: true)                                                  488            502           5          0.0       48798.8       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                 177            184           3          0.1       17675.0       2.8X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                  422            434           5          0.0       42226.0       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                 172            179           3          0.1       17235.9       2.4X
+RocksDB (trackTotalNumberOfRows: true)                                                  424            437           6          0.0       42429.3       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                 176            182           3          0.1       17608.0       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                              406            419           7          0.0       40646.7       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                             173            179           3          0.1       17265.8       2.4X
+RocksDB (trackTotalNumberOfRows: true)                                              406            420           6          0.0       40630.6       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                             171            178           3          0.1       17137.0       2.4X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        0              1           0         27.0          37.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          44             46           1          0.2        4447.0       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         15             16           1          0.7        1453.0       0.0X
+In-memory                                                                                        0              1           0         26.6          37.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          43             45           1          0.2        4303.7       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         15             16           0          0.6        1543.9       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      8              9           1          1.3         796.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        44             45           1          0.2        4384.0       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       15             15           0          0.7        1463.5       0.5X
+In-memory                                                                                      7              7           1          1.5         650.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        45             46           1          0.2        4469.5       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       15             16           1          0.7        1496.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      9              9           1          1.2         853.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        43             44           1          0.2        4278.0       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       15             15           1          0.7        1460.7       0.6X
+In-memory                                                                                      7              7           0          1.5         687.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        44             45           1          0.2        4357.2       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       15             16           1          0.7        1479.4       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  9             10           2          1.2         854.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    42             44           1          0.2        4183.1       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                   15             16           1          0.7        1457.0       0.6X
+In-memory                                                                                  7              7           0          1.4         697.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    43             45           1          0.2        4332.4       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                   14             15           0          0.7        1448.6       0.5X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            8              9           0          1.2         837.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              41             42           1          0.2        4146.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                             16             17           1          0.6        1623.1       0.5X
+In-memory                                                                            7              7           0          1.5         683.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              43             44           1          0.2        4257.5       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                             17             17           1          0.6        1669.2       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           8              9           1          1.3         798.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             22             23           1          0.5        2201.4       0.4X
-RocksDB (trackTotalNumberOfRows: false)                                            10             10           1          1.0         956.5       0.8X
+In-memory                                                                           6              7           0          1.6         643.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             23             24           1          0.4        2349.6       0.3X
+RocksDB (trackTotalNumberOfRows: false)                                            10             10           0          1.0         999.9       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          7              8           1          1.4         724.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             7              7           0          1.4         698.4       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                            5              5           0          2.2         450.9       1.6X
+In-memory                                                                          6              6           0          1.7         590.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             8              8           0          1.3         758.5       0.8X
+RocksDB (trackTotalNumberOfRows: false)                                            5              5           0          2.0         491.5       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      0              0           0         24.0          41.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         3              3           1          3.2         317.3       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                        3              3           0          3.2         317.2       0.1X
+In-memory                                                                      0              0           0         23.7          42.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         4              4           0          2.8         354.3       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                        4              4           0          2.8         354.8       0.1X
 
 
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
index 856985b5d071f..a8e4c83be80e1 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
@@ -2,143 +2,143 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                           10             10           1          1.0         953.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              43             44           2          0.2        4269.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                             16             16           1          0.6        1550.5       0.6X
+In-memory                                                            8             10           1          1.2         843.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              44             46           1          0.2        4423.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                             16             17           1          0.6        1616.4       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             10           0          1.1         930.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            44             45           1          0.2        4387.9       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1521.4       0.6X
+In-memory                                                          8              9           1          1.2         830.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            45             47           1          0.2        4506.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           16             17           1          0.6        1576.7       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             10           0          1.1         918.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            44             45           1          0.2        4441.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1521.7       0.6X
+In-memory                                                          8              8           0          1.2         808.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            45             46           1          0.2        4489.0       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           16             17           1          0.6        1588.0       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      9             10           0          1.1         916.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        44             45           1          0.2        4413.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       15             16           0          0.7        1522.0       0.6X
+In-memory                                                      8              8           0          1.3         796.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        44             46           1          0.2        4437.2       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       16             17           1          0.6        1573.0       0.5X
 
 
 ================================================================================================
 merge rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                    542            553           6          0.0       54222.4       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                   174            179           3          0.1       17391.9       3.1X
+RocksDB (trackTotalNumberOfRows: true)                                                    549            562           6          0.0       54902.6       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                   179            184           2          0.1       17887.1       3.1X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                  479            490           5          0.0       47921.1       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                 174            179           3          0.1       17446.2       2.7X
+RocksDB (trackTotalNumberOfRows: true)                                                  486            496           6          0.0       48554.8       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                 180            185           3          0.1       17973.1       2.7X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                  423            433           5          0.0       42311.4       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                 173            178           3          0.1       17309.1       2.4X
+RocksDB (trackTotalNumberOfRows: true)                                                  429            440           4          0.0       42859.9       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                 178            184           2          0.1       17776.5       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                              408            419           5          0.0       40762.3       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                             174            183           3          0.1       17377.7       2.3X
+RocksDB (trackTotalNumberOfRows: true)                                              409            423           4          0.0       40946.3       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                             178            183           2          0.1       17820.2       2.3X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        0              0           0         26.1          38.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          44             46           1          0.2        4444.2       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         15             15           0          0.7        1489.6       0.0X
+In-memory                                                                                        1              1           0         19.6          51.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          44             45           1          0.2        4403.8       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         15             16           0          0.7        1527.7       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      8              8           0          1.3         788.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        44             45           1          0.2        4425.4       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       15             16           1          0.7        1499.2       0.5X
+In-memory                                                                                      7              7           0          1.5         671.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        45             46           1          0.2        4484.7       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       15             16           0          0.7        1516.5       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      8              9           0          1.2         841.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        43             44           1          0.2        4336.9       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       15             16           1          0.7        1493.6       0.6X
+In-memory                                                                                      7              8           0          1.4         727.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        44             45           1          0.2        4433.9       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       15             16           0          0.7        1512.5       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  8              9           0          1.2         848.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    42             43           1          0.2        4216.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                   15             15           0          0.7        1467.4       0.6X
+In-memory                                                                                  7              8           1          1.4         740.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    44             45           1          0.2        4390.9       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                   15             16           0          0.7        1500.6       0.5X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            8              9           0          1.2         836.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              42             43           2          0.2        4182.0       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                             16             17           0          0.6        1645.0       0.5X
+In-memory                                                                            7              7           0          1.5         688.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              43             44           1          0.2        4337.8       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                             17             17           0          0.6        1678.8       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           8              8           0          1.3         785.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             23             23           1          0.4        2258.3       0.3X
-RocksDB (trackTotalNumberOfRows: false)                                            10             10           0          1.0         999.7       0.8X
+In-memory                                                                           6              7           0          1.5         645.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             24             24           1          0.4        2370.3       0.3X
+RocksDB (trackTotalNumberOfRows: false)                                            11             11           0          0.9        1082.4       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          7              8           0          1.4         726.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             7              8           0          1.4         736.8       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                            5              5           0          2.1         487.0       1.5X
+In-memory                                                                          6              6           0          1.7         587.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             8              8           0          1.3         788.1       0.7X
+RocksDB (trackTotalNumberOfRows: false)                                            6              6           0          1.8         554.1       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      0              0           0         22.8          43.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         4              4           0          2.8         354.8       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                        4              4           0          2.8         353.1       0.1X
+In-memory                                                                      0              0           0         23.9          41.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         4              4           0          2.6         387.4       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                        4              4           0          2.6         389.4       0.1X
 
 
diff --git a/sql/core/benchmarks/StringFunctionsBenchmark-jdk21-results.txt b/sql/core/benchmarks/StringFunctionsBenchmark-jdk21-results.txt
index 04720fb50b41c..ac6a3ea26717f 100644
--- a/sql/core/benchmarks/StringFunctionsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/StringFunctionsBenchmark-jdk21-results.txt
@@ -2,10 +2,10 @@
 SQL string functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 regexp_replace:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-regexp_replace('*-*', '(\\d+)', 'num')              503            527          32          2.0         503.0       1.0X
+regexp_replace('*-*', '(\\d+)', 'num')              505            521          11          2.0         505.0       1.0X
 
 
diff --git a/sql/core/benchmarks/StringFunctionsBenchmark-results.txt b/sql/core/benchmarks/StringFunctionsBenchmark-results.txt
index c1b9bdb4ea3da..7ecccc7a826f4 100644
--- a/sql/core/benchmarks/StringFunctionsBenchmark-results.txt
+++ b/sql/core/benchmarks/StringFunctionsBenchmark-results.txt
@@ -2,10 +2,10 @@
 SQL string functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 regexp_replace:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-regexp_replace('*-*', '(\\d+)', 'num')              512            529          24          2.0         512.0       1.0X
+regexp_replace('*-*', '(\\d+)', 'num')              509            533          15          2.0         509.1       1.0X
 
 
diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt
index 8a79199cd92ed..8e75b2b3d4697 100644
--- a/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt
@@ -1,810 +1,810 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q1                                                  756            854         165          0.6        1639.1       1.0X
+q1                                                  672            845         214          0.7        1456.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q2                                                  841            929         113          2.7         376.8       1.0X
+q2                                                  890            937          57          2.5         398.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q3                                                  239            288          37         12.4          80.6       1.0X
+q3                                                  244            297          49         12.2          82.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q4                                                 5033           5148         163          1.0         965.7       1.0X
+q4                                                 4653           5006         500          1.1         892.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5                                                 1125           1352         321          5.0         199.9       1.0X
+q5                                                 1406           1469          89          4.0         249.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6                                                 1095           1121          37          2.8         350.9       1.0X
+q6                                                 1122           1134          17          2.8         359.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q7                                                  601            631          48          8.1         122.9       1.0X
+q7                                                  567            603          39          8.6         116.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q8                                                  453            501          54          6.8         146.1       1.0X
+q8                                                  455            510          51          6.8         146.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q9                                                  895            929          36          0.0    25559860.1       1.0X
+q9                                                  874            935          90          0.0    24957685.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10                                                1917           1986          98          1.1         925.6       1.0X
+q10                                                1916           2021         148          1.1         925.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11                                                1897           2025         181          2.0         502.9       1.0X
+q11                                                1781           2094         443          2.1         472.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12                                                 164            205          47          4.9         203.0       1.0X
+q12                                                 165            209          61          4.9         204.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q13                                                 778            820          49          6.3         157.8       1.0X
+q13                                                 767            808          44          6.4         155.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a                                               4952           5091         196          1.0         965.3       1.0X
+q14a                                               4824           5200         532          1.1         940.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14b                                               3747           3786          56          1.4         730.4       1.0X
+q14b                                               4151           4159          11          1.2         809.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q15                                                 443            505          48          3.8         266.7       1.0X
+q15                                                 452            485          38          3.7         271.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q16                                                 618            652          44          2.5         395.3       1.0X
+q16                                                 589            628          35          2.7         377.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q17                                                1531           1632         144          3.1         325.8       1.0X
+q17                                                1392           1400          12          3.4         296.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18                                                1107           1243         193          3.3         307.3       1.0X
+q18                                                1055           1107          73          3.4         293.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q19                                                 299            335          42         10.4          95.9       1.0X
+q19                                                 302            326          28         10.3          96.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20                                                 180            205          33          8.5         117.3       1.0X
+q20                                                 176            213          52          8.7         115.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q21                                                 595            639          32         19.9          50.3       1.0X
+q21                                                 576            621          51         20.5          48.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22                                                3548           3579          43          3.3         299.8       1.0X
+q22                                                3218           3259          58          3.7         271.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23a                                               6699           6712          20          0.8        1280.9       1.0X
+q23a                                               5971           6291         453          0.9        1141.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23b                                               6338           6683         488          0.8        1211.9       1.0X
+q23b                                               6194           6443         353          0.8        1184.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24a                                                118            248          53         28.3          35.4       1.0X
+q24a                                                214            254          46         15.6          64.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24b                                                214            264          41         15.6          64.1       1.0X
+q24b                                                155            229          49         21.6          46.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q25                                                1381           1456         106          3.4         293.9       1.0X
+q25                                                1379           1525         207          3.4         293.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q26                                                 344            378          37         10.0          99.6       1.0X
+q26                                                 318            362          48         10.8          92.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27                                                 531            580          41          9.2         108.5       1.0X
+q27                                                 538            555          14          9.1         110.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q28                                                1202           1337         190          2.4         417.6       1.0X
+q28                                                1187           1291         147          2.4         412.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q29                                                1540           1703         230          3.1         327.7       1.0X
+q29                                                1566           1636          99          3.0         333.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q30                                                 402            447          43          0.7        1364.7       1.0X
+q30                                                 381            444          53          0.8        1292.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q31                                                 839            851          14          4.4         225.5       1.0X
+q31                                                 770            871          89          4.8         207.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q32                                                 198            246          53          7.7         129.0       1.0X
+q32                                                 192            213          28          8.0         125.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q33                                                 405            447          56         12.8          78.2       1.0X
+q33                                                 393            423          33         13.2          75.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34                                                 353            384          32          8.7         115.3       1.0X
+q34                                                 350            393          35          8.7         114.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35                                                1296           1367         101          1.6         625.7       1.0X
+q35                                                1281           1303          32          1.6         618.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36                                                 544            583          43          5.5         183.2       1.0X
+q36                                                 516            544          35          5.8         173.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q37                                                 765            807          45         17.4          57.6       1.0X
+q37                                                 780            797          21         17.0          58.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q38                                                 696            740          39          7.5         133.5       1.0X
+q38                                                 639            690          49          8.2         122.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39a                                               1244           1409         233          9.5         105.1       1.0X
+q39a                                               1442           1443           1          8.2         121.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39b                                               1269           1285          23          9.3         107.2       1.0X
+q39b                                               1298           1305           9          9.1         109.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q40                                                 398            446          51          4.2         237.8       1.0X
+q40                                                 319            386          46          5.2         190.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q41                                                 140            170          37          0.1        7757.0       1.0X
+q41                                                 143            156          20          0.1        7952.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q42                                                 155            169          24         19.2          52.2       1.0X
+q42                                                 147            165          34         20.2          49.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q43                                                 308            335          30          9.6         104.4       1.0X
+q43                                                 304            337          39          9.7         103.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q44                                                 338            403          45          8.6         116.6       1.0X
+q44                                                 325            371          39          8.9         112.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q45                                                 196            226          31          4.9         204.6       1.0X
+q45                                                 197            227          27          4.9         204.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q46                                                 462            505          67          6.7         148.4       1.0X
+q46                                                 452            485          35          6.9         145.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47                                                1578           1797         310          1.9         531.1       1.0X
+q47                                                1500           1649         211          2.0         504.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q48                                                 924            945          25          5.3         187.7       1.0X
+q48                                                 825            848          38          6.0         167.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49                                                 670            761         131          8.4         119.3       1.0X
+q49                                                 544            603          56         10.3          96.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q50                                                 784            841          51          4.1         241.8       1.0X
+q50                                                 564            636          69          5.7         174.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51                                                2769           2809          56          1.3         754.1       1.0X
+q51                                                2540           2716         250          1.4         691.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q52                                                 153            186          33         19.5          51.3       1.0X
+q52                                                 140            157          21         21.2          47.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q53                                                 277            340          52         10.7          93.1       1.0X
+q53                                                 265            290          42         11.2          89.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q54                                                1250           1262          17          4.2         236.7       1.0X
+q54                                                1253           1262          13          4.2         237.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q55                                                 154            168          19         19.3          51.7       1.0X
+q55                                                 143            155          17         20.8          48.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q56                                                 408            473          67         12.7          78.8       1.0X
+q56                                                 415            460          29         12.5          80.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57                                                 778            837          65          2.0         508.0       1.0X
+q57                                                 758            807          45          2.0         494.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q58                                                 412            543         195         12.4          80.4       1.0X
+q58                                                 438            561         203         11.7          85.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q59                                                 669            747         118          4.4         226.6       1.0X
+q59                                                 635            700          58          4.7         215.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q60                                                 427            473          27         12.1          82.4       1.0X
+q60                                                 416            452          46         12.5          80.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q61                                                 556            586          30          5.6         178.2       1.0X
+q61                                                 537            574          54          5.8         172.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q62                                                 183            204          25          4.3         230.9       1.0X
+q62                                                 177            201          32          4.5         223.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q63                                                 281            301          18         10.6          94.7       1.0X
+q63                                                 276            306          31         10.8          93.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64                                                2377           2586         296          2.9         343.5       1.0X
+q64                                                2141           2374         330          3.2         309.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q65                                                 707            782          71          4.2         237.8       1.0X
+q65                                                 607            670          64          4.9         204.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q66                                                 510            565          54          4.5         219.8       1.0X
+q66                                                 530            565          37          4.4         228.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67                                                5734           5829         134          0.5        1930.2       1.0X
+q67                                                5561           5583          31          0.5        1871.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q68                                                 539            559          20          5.8         173.2       1.0X
+q68                                                 454            485          22          6.8         146.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q69                                                1756           1826          99          1.2         848.0       1.0X
+q69                                                1669           1751         115          1.2         806.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70                                                 587            620          35          5.0         198.7       1.0X
+q70                                                 547            581          36          5.4         185.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q71                                                 352            394          43         14.8          67.5       1.0X
+q71                                                 344            368          38         15.2          65.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72                                              136543         138257        2425          0.1        8896.5       1.0X
+q72                                              119142         119748         857          0.1        7762.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q73                                                 349            380          32          8.8         114.1       1.0X
+q73                                                 328            355          27          9.3         107.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74                                                1353           1607         359          2.8         358.8       1.0X
+q74                                                1250           1720         665          3.0         331.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75                                                1482           1683         285          3.8         263.0       1.0X
+q75                                                1347           1539         272          4.2         239.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q76                                                 298            335          28         17.2          58.1       1.0X
+q76                                                 291            319          33         17.6          56.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77                                                 610            791         184          9.2         108.7       1.0X
+q77                                                 544            719         162         10.3          96.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78                                                2709           2767          82          2.1         482.4       1.0X
+q78                                                2167           2456         408          2.6         386.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q79                                                 422            449          29          7.3         137.8       1.0X
+q79                                                 414            442          26          7.4         135.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80                                                1318           1555         335          4.3         233.5       1.0X
+q80                                                1401           1542         198          4.0         248.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q81                                                 371            421          42          1.0        1012.0       1.0X
+q81                                                 336            416          66          1.1         916.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q82                                                1019           1064          64         14.4          69.2       1.0X
+q82                                                1003           1036          46         14.7          68.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q83                                                 230            286          27          2.6         387.4       1.0X
+q83                                                 235            267          31          2.5         394.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q84                                                 685            705          25          3.5         289.6       1.0X
+q84                                                 659            707          82          3.6         278.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q85                                                1656           1798         200          1.7         584.3       1.0X
+q85                                                1993           2062          97          1.4         703.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86                                                 184            204          30          4.4         227.5       1.0X
+q86                                                 189            209          23          4.3         233.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q87                                                 660            727          65          7.9         126.7       1.0X
+q87                                                 693            729          31          7.5         133.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q88                                                1241           1402         229          2.4         417.3       1.0X
+q88                                                1156           1373         307          2.6         388.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q89                                                 309            357          53          9.6         103.9       1.0X
+q89                                                 293            334          36         10.1          98.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q90                                                 132            159          19          6.1         162.8       1.0X
+q90                                                 113            136          20          7.2         139.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q91                                                 327            365          38          7.0         142.4       1.0X
+q91                                                 330            355          20          7.0         143.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q92                                                 135            160          19          6.0         166.3       1.0X
+q92                                                 133            173          70          6.1         164.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q93                                                 529            549          38          6.0         166.9       1.0X
+q93                                                 423            452          41          7.5         133.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q94                                                 307            364          42          2.7         364.6       1.0X
+q94                                                 303            329          24          2.8         359.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q95                                                5173           5213          57          0.2        6143.9       1.0X
+q95                                                5193           5248          78          0.2        6167.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q96                                                 163            179          17         18.3          54.7       1.0X
+q96                                                 160            174          17         18.6          53.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q97                                                1257           1323          93          3.5         286.2       1.0X
+q97                                                1153           1170          24          3.8         262.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98                                                 268            304          31         11.1          90.4       1.0X
+q98                                                 267            305          45         11.1          89.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q99                                                 264            295          38          5.7         174.4       1.0X
+q99                                                 262            285          22          5.8         172.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5a-v2.7                                           1294           1378         120          4.3         229.9       1.0X
+q5a-v2.7                                           1134           1159          35          5.0         201.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6-v2.7                                             952            977          22          3.3         305.1       1.0X
+q6-v2.7                                             916            932          26          3.4         293.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10a-v2.7                                          1806           1867          88          1.1         871.9       1.0X
+q10a-v2.7                                          1757           1846         126          1.2         848.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11-v2.7                                           1867           2114         350          2.0         494.9       1.0X
+q11-v2.7                                           1725           2043         449          2.2         457.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12-v2.7                                            128            143          18          6.4         157.4       1.0X
+q12-v2.7                                            125            139          19          6.5         154.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14-v2.7                                           3604           3857         358          1.4         702.5       1.0X
+q14-v2.7                                           3771           3829          82          1.4         735.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a-v2.7                                          6933           7124         270          0.7        1351.6       1.0X
+q14a-v2.7                                          6402           6605         287          0.8        1248.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18a-v2.7                                          1805           2075         382          2.0         501.2       1.0X
+q18a-v2.7                                          1831           2129         421          2.0         508.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20-v2.7                                            157            175          20          9.7         102.6       1.0X
+q20-v2.7                                            153            174          18         10.0         100.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22-v2.7                                          13585          13655          98          0.9        1147.8       1.0X
+q22-v2.7                                          12999          13185         264          0.9        1098.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22a-v2.7                                          2043           2046           4          5.8         172.6       1.0X
+q22a-v2.7                                          1909           1970          87          6.2         161.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24-v2.7                                            207            238          34         16.1          61.9       1.0X
+q24-v2.7                                            190            235          32         17.5          57.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27a-v2.7                                          1452           1553         143          3.4         296.8       1.0X
+q27a-v2.7                                          1710           1727          23          2.9         349.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34-v2.7                                            363            390          39          8.4         118.7       1.0X
+q34-v2.7                                            336            369          35          9.1         109.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35-v2.7                                           1310           1320          14          1.6         632.5       1.0X
+q35-v2.7                                           1195           1215          28          1.7         577.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35a-v2.7                                          1283           1290          10          1.6         619.7       1.0X
+q35a-v2.7                                          1174           1214          56          1.8         567.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36a-v2.7                                           492            538          44          6.0         165.5       1.0X
+q36a-v2.7                                           481            510          50          6.2         161.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47-v2.7                                           1573           1759         262          1.9         529.6       1.0X
+q47-v2.7                                           1567           1672         148          1.9         527.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49-v2.7                                            560            618          39         10.0          99.8       1.0X
+q49-v2.7                                            534            636         127         10.5          95.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51a-v2.7                                         15223          15578         502          0.2        4146.0       1.0X
+q51a-v2.7                                         14944          15519         814          0.2        4069.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57-v2.7                                            756            801          50          2.0         494.0       1.0X
+q57-v2.7                                            686            789         120          2.2         448.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64-v2.7                                           2553           2715         230          2.7         368.8       1.0X
+q64-v2.7                                           1878           2187         436          3.7         271.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67a-v2.7                                          7363           7679         446          0.4        2478.4       1.0X
+q67a-v2.7                                          6928           7238         439          0.4        2331.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70a-v2.7                                           630            674          35          4.7         213.5       1.0X
+q70a-v2.7                                           618            674          52          4.8         209.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72-v2.7                                         137936         138063         180          0.1        8987.2       1.0X
+q72-v2.7                                         125912         126950        1468          0.1        8203.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74-v2.7                                           1287           1678         553          2.9         341.3       1.0X
+q74-v2.7                                           1078           1567         691          3.5         285.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75-v2.7                                           1379           1615         333          4.1         244.9       1.0X
+q75-v2.7                                           1297           1566         380          4.3         230.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77a-v2.7                                           898           1007         155          6.3         159.8       1.0X
+q77a-v2.7                                           885           1077         187          6.3         157.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78-v2.7                                           2389           2652         372          2.4         425.5       1.0X
+q78-v2.7                                           2429           2558         183          2.3         432.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80a-v2.7                                          1616           2053         618          3.5         286.2       1.0X
+q80a-v2.7                                          1613           1917         430          3.5         285.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86a-v2.7                                           237            278          36          3.4         292.6       1.0X
+q86a-v2.7                                           240            265          36          3.4         295.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98-v2.7                                            259            296          65         11.5          87.0       1.0X
+q98-v2.7                                            260            287          22         11.4          87.7       1.0X
 
diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
index 4831dffceecd1..7458fd93a4f38 100644
--- a/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
+++ b/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
@@ -1,810 +1,810 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q1                                                  600            728         155          0.8        1300.2       1.0X
+q1                                                  661            864         259          0.7        1432.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q2                                                  823            845          23          2.7         368.5       1.0X
+q2                                                  817            864          59          2.7         366.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q3                                                  225            259          25         13.2          75.6       1.0X
+q3                                                  234            277          31         12.7          78.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q4                                                 4365           4759         557          1.2         837.4       1.0X
+q4                                                 4856           5073         308          1.1         931.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5                                                 1027           1178         214          5.5         182.5       1.0X
+q5                                                 1118           1254         191          5.0         198.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6                                                 1062           1102          56          2.9         340.4       1.0X
+q6                                                 1228           1362         191          2.5         393.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q7                                                  583            611          30          8.4         119.1       1.0X
+q7                                                  756            776          17          6.5         154.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q8                                                  462            483          23          6.7         149.0       1.0X
+q8                                                  567            619          61          5.5         182.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q9                                                  878            890          11          0.0    25071759.3       1.0X
+q9                                                  907            945          55          0.0    25911119.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10                                                1901           2002         143          1.1         917.8       1.0X
+q10                                                2016           2062          65          1.0         973.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11                                                1901           2269         521          2.0         504.1       1.0X
+q11                                                1845           2147         427          2.0         489.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12                                                 228            263          28          3.5         281.8       1.0X
+q12                                                 202            240          29          4.0         250.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q13                                                 856            885          27          5.8         173.6       1.0X
+q13                                                 852            889          33          5.8         172.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a                                               4584           5058         670          1.1         893.5       1.0X
+q14a                                               4637           4894         363          1.1         904.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14b                                               3771           3852         115          1.4         735.1       1.0X
+q14b                                               3528           3702         246          1.5         687.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q15                                                 383            418          39          4.3         230.4       1.0X
+q15                                                 402            413          12          4.1         241.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q16                                                 645            781         127          2.4         412.7       1.0X
+q16                                                 750            791          37          2.1         480.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q17                                                1451           1466          21          3.2         308.8       1.0X
+q17                                                1691           1730          55          2.8         359.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18                                                1305           1446         200          2.8         362.3       1.0X
+q18                                                1414           1569         219          2.5         392.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q19                                                 359            415          50          8.7         114.9       1.0X
+q19                                                 376            403          43          8.3         120.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20                                                 185            203          26          8.3         121.1       1.0X
+q20                                                 190            204          14          8.1         123.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q21                                                 699            727          27         16.9          59.0       1.0X
+q21                                                 672            712          54         17.6          56.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22                                                3295           3404         154          3.6         278.4       1.0X
+q22                                                3351           3407          80          3.5         283.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23a                                               5808           5881         103          0.9        1110.6       1.0X
+q23a                                               5597           5928         467          0.9        1070.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23b                                               5902           5980         111          0.9        1128.5       1.0X
+q23b                                               5925           6131         291          0.9        1133.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24a                                                146            308          88         22.8          43.8       1.0X
+q24a                                                231            263          30         14.4          69.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24b                                                210            253          43         15.9          62.9       1.0X
+q24b                                                217            257          49         15.4          65.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q25                                                1249           1294          63          3.8         265.8       1.0X
+q25                                                1468           1480          17          3.2         312.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q26                                                 391            436          39          8.8         113.3       1.0X
+q26                                                 426            443          14          8.1         123.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27                                                 515            575          51          9.5         105.2       1.0X
+q27                                                 603            618          16          8.1         123.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q28                                                1188           1378         268          2.4         412.6       1.0X
+q28                                                1402           1654         356          2.1         486.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q29                                                1239           1246           9          3.8         263.8       1.0X
+q29                                                1496           1569         104          3.1         318.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q30                                                 473            511          36          0.6        1606.0       1.0X
+q30                                                 453            511          72          0.7        1537.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q31                                                1043           1236         273          3.6         280.2       1.0X
+q31                                                 992           1280         407          3.8         266.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q32                                                 254            310          63          6.0         165.6       1.0X
+q32                                                 247            291          37          6.2         161.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q33                                                 484            537          38         10.7          93.5       1.0X
+q33                                                 407            441          28         12.7          78.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34                                                 362            393          55          8.4         118.3       1.0X
+q34                                                 409            425          20          7.5         133.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35                                                1405           1427          31          1.5         678.3       1.0X
+q35                                                1442           1460          26          1.4         696.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36                                                 559            580          18          5.3         188.1       1.0X
+q36                                                 570            582          20          5.2         191.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q37                                                 931            945          23         14.3          70.1       1.0X
+q37                                                 896            901           6         14.8          67.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q38                                                 759            830          85          6.9         145.7       1.0X
+q38                                                 888           1151         371          5.9         170.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39a                                               1611           1833         314          7.3         136.1       1.0X
+q39a                                               1533           1756         315          7.7         129.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39b                                               1526           1610         118          7.8         129.0       1.0X
+q39b                                               1514           1748         331          7.8         127.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q40                                                 331            358          19          5.1         197.7       1.0X
+q40                                                 355            377          20          4.7         212.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q41                                                 164            180          13          0.1        9114.0       1.0X
+q41                                                 166            184          10          0.1        9202.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q42                                                 169            196          20         17.6          57.0       1.0X
+q42                                                 151            163          13         19.6          51.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q43                                                 330            344          15          9.0         111.7       1.0X
+q43                                                 306            328          16          9.6         103.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q44                                                 401            431          38          7.2         138.4       1.0X
+q44                                                 338            347           9          8.6         116.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q45                                                 207            249          24          4.6         215.6       1.0X
+q45                                                 187            214          28          5.1         194.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q46                                                 523            534          14          5.9         168.2       1.0X
+q46                                                 471            491          21          6.6         151.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47                                                1714           1776          88          1.7         576.8       1.0X
+q47                                                1780           1899         168          1.7         599.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q48                                                 887            897          13          5.6         180.1       1.0X
+q48                                                 937            945           7          5.3         190.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49                                                 681            732          49          8.2         121.3       1.0X
+q49                                                 728            753          41          7.7         129.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q50                                                 672            694          37          4.8         207.3       1.0X
+q50                                                 754            786          28          4.3         232.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51                                                2761           2791          41          1.3         752.1       1.0X
+q51                                                2470           2795         459          1.5         672.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q52                                                 159            174          17         18.6          53.7       1.0X
+q52                                                 151            158           6         19.7          50.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q53                                                 279            320          31         10.7          93.9       1.0X
+q53                                                 299            311          23          9.9         100.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q54                                                1304           1308           5          4.0         247.0       1.0X
+q54                                                1292           1341          70          4.1         244.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q55                                                 161            186          13         18.5          54.1       1.0X
+q55                                                 169            186          14         17.6          56.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q56                                                 525            550          22          9.9         101.4       1.0X
+q56                                                 491            507          11         10.5          94.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57                                                 761            854         105          2.0         496.8       1.0X
+q57                                                1032           1203         241          1.5         674.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q58                                                 483            529          39         10.6          94.1       1.0X
+q58                                                 509            580          99         10.1          99.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q59                                                 680            692          14          4.3         230.3       1.0X
+q59                                                 753            754           2          3.9         255.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q60                                                 500            565          49         10.4          96.5       1.0X
+q60                                                 581            692         167          8.9         112.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q61                                                 579            630          56          5.4         185.4       1.0X
+q61                                                 578            605          52          5.4         185.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q62                                                 190            215          18          4.2         239.6       1.0X
+q62                                                 176            186           4          4.5         221.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q63                                                 268            282          14         11.1          90.3       1.0X
+q63                                                 320            351          28          9.3         107.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64                                                2422           2684         371          2.9         350.0       1.0X
+q64                                                2602           2865         372          2.7         376.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q65                                                 767            815          45          3.9         258.3       1.0X
+q65                                                 603            668          53          4.9         202.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q66                                                 681            707          26          3.4         293.7       1.0X
+q66                                                 538            600          82          4.3         232.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67                                                5411           5483         101          0.5        1821.5       1.0X
+q67                                                5377           5515         195          0.6        1809.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q68                                                 511            535          24          6.1         164.3       1.0X
+q68                                                 495            550          38          6.3         159.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q69                                                1532           1706         247          1.4         739.6       1.0X
+q69                                                1742           1842         140          1.2         841.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70                                                 548            560           9          5.4         185.5       1.0X
+q70                                                 651            667          14          4.5         220.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q71                                                 408            420          15         12.8          78.3       1.0X
+q71                                                 435            455          16         12.0          83.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72                                               93843          95348        2129          0.2        6114.3       1.0X
+q72                                              123701         123925         317          0.1        8059.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q73                                                 389            404          11          7.9         127.1       1.0X
+q73                                                 336            373          31          9.1         109.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74                                                1330           1616         405          2.8         352.6       1.0X
+q74                                                1419           1584         233          2.7         376.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75                                                1481           1759         394          3.8         262.9       1.0X
+q75                                                1665           1951         404          3.4         295.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q76                                                 325            364          30         15.8          63.4       1.0X
+q76                                                 358            369          11         14.3          69.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77                                                 573            780         185          9.8         102.0       1.0X
+q77                                                 711            859         128          7.9         126.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78                                                2164           2460         420          2.6         385.3       1.0X
+q78                                                2705           2843         195          2.1         481.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q79                                                 450            464          11          6.8         147.0       1.0X
+q79                                                 484            492           8          6.3         158.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80                                                1596           1722         178          3.5         282.8       1.0X
+q80                                                1533           1855         455          3.7         271.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q81                                                 408            458          53          0.9        1113.9       1.0X
+q81                                                 364            447          82          1.0         991.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q82                                                1177           1192          22         12.5          80.0       1.0X
+q82                                                1176           1196          27         12.5          79.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q83                                                 284            324          30          2.1         477.7       1.0X
+q83                                                 323            348          20          1.8         542.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q84                                                 776            778           2          3.0         328.0       1.0X
+q84                                                 742            767          28          3.2         313.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q85                                                1721           2185         656          1.6         607.0       1.0X
+q85                                                1817           2120         428          1.6         641.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86                                                 207            227          14          3.9         255.4       1.0X
+q86                                                 210            226           9          3.8         259.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q87                                                 731            791          74          7.1         140.3       1.0X
+q87                                                 919           1068         211          5.7         176.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q88                                                1414           1665         355          2.1         475.6       1.0X
+q88                                                1471           1642         243          2.0         494.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q89                                                 346            391          43          8.6         116.6       1.0X
+q89                                                 313            362          62          9.5         105.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q90                                                 146            171          21          5.5         180.3       1.0X
+q90                                                 145            165          20          5.6         178.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q91                                                 365            393          26          6.3         159.1       1.0X
+q91                                                 388            429          29          5.9         169.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q92                                                 153            183          22          5.3         189.5       1.0X
+q92                                                 152            173          17          5.3         188.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q93                                                 430            445           9          7.4         135.8       1.0X
+q93                                                 501            515          10          6.3         158.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q94                                                 356            377          23          2.4         422.7       1.0X
+q94                                                 367            387          12          2.3         436.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q95                                                5268           5437         240          0.2        6256.5       1.0X
+q95                                                5374           5470         137          0.2        6382.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q96                                                 188            207          20         15.8          63.3       1.0X
+q96                                                 186            208          19         16.0          62.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q97                                                1214           1299         120          3.6         276.4       1.0X
+q97                                                1318           1354          51          3.3         300.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98                                                 314            351          41          9.5         105.8       1.0X
+q98                                                 304            334          33          9.8         102.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q99                                                 312            321          11          4.8         206.2       1.0X
+q99                                                 310            330          19          4.9         205.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5a-v2.7                                           1495           1520          36          3.8         265.7       1.0X
+q5a-v2.7                                           1498           1674         250          3.8         266.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6-v2.7                                            1002           1015          18          3.1         321.0       1.0X
+q6-v2.7                                             980           1001          18          3.2         314.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10a-v2.7                                          1792           1914         172          1.2         865.6       1.0X
+q10a-v2.7                                          1863           2003         199          1.1         899.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11-v2.7                                           1809           2158         493          2.1         479.7       1.0X
+q11-v2.7                                           1811           2162         497          2.1         480.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12-v2.7                                            146            168          18          5.5         180.8       1.0X
+q12-v2.7                                            140            162          24          5.8         173.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14-v2.7                                           4035           4204         239          1.3         786.6       1.0X
+q14-v2.7                                           3748           3965         307          1.4         730.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a-v2.7                                          7068           7371         429          0.7        1377.8       1.0X
+q14a-v2.7                                          7129           7256         179          0.7        1389.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18a-v2.7                                          2182           2292         156          1.7         605.7       1.0X
+q18a-v2.7                                          2101           2292         270          1.7         583.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20-v2.7                                            179            193          11          8.5         117.1       1.0X
+q20-v2.7                                            162            182          16          9.5         105.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22-v2.7                                          13689          13818         183          0.9        1156.5       1.0X
+q22-v2.7                                          14202          14286         118          0.8        1199.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22a-v2.7                                          1981           2114         189          6.0         167.3       1.0X
+q22a-v2.7                                          2164           2288         176          5.5         182.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24-v2.7                                            228            270          21         14.6          68.3       1.0X
+q24-v2.7                                            254            278          26         13.1          76.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27a-v2.7                                          1361           1495         189          3.6         278.3       1.0X
+q27a-v2.7                                          1449           1664         304          3.4         296.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34-v2.7                                            403            409           8          7.6         131.6       1.0X
+q34-v2.7                                            404            415          11          7.6         131.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35-v2.7                                           1326           1387          87          1.6         640.1       1.0X
+q35-v2.7                                           1433           1462          41          1.4         691.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35a-v2.7                                          1296           1311          20          1.6         626.0       1.0X
+q35a-v2.7                                          1346           1394          68          1.5         650.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36a-v2.7                                           533            556          16          5.6         179.5       1.0X
+q36a-v2.7                                           575            607          44          5.2         193.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47-v2.7                                           1618           1744         178          1.8         544.6       1.0X
+q47-v2.7                                           1841           2000         226          1.6         619.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49-v2.7                                            617            669          35          9.1         109.8       1.0X
+q49-v2.7                                            620            682          63          9.1         110.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51a-v2.7                                         13989          14478         692          0.3        3809.9       1.0X
+q51a-v2.7                                         14407          14835         605          0.3        3923.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57-v2.7                                            799            867          82          1.9         521.6       1.0X
+q57-v2.7                                            966           1157         271          1.6         630.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64-v2.7                                           2391           2749         506          2.9         345.5       1.0X
+q64-v2.7                                           2494           2897         570          2.8         360.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67a-v2.7                                          7040           7300         367          0.4        2369.8       1.0X
+q67a-v2.7                                          7449           7556         152          0.4        2507.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70a-v2.7                                           702            723          33          4.2         237.8       1.0X
+q70a-v2.7                                           713            751          47          4.1         241.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72-v2.7                                          92914          94378        2071          0.2        6053.8       1.0X
+q72-v2.7                                         121071         121592         736          0.1        7888.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74-v2.7                                           1301           1522         314          2.9         344.9       1.0X
+q74-v2.7                                           1213           1347         190          3.1         321.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75-v2.7                                           1729           1814         121          3.3         306.9       1.0X
+q75-v2.7                                           1379           1739         509          4.1         244.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77a-v2.7                                           809            874          92          6.9         144.1       1.0X
+q77a-v2.7                                          1231           1282          72          4.6         219.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78-v2.7                                           2053           2458         573          2.7         365.5       1.0X
+q78-v2.7                                           2218           2579         511          2.5         395.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80a-v2.7                                          1612           1662          72          3.5         285.5       1.0X
+q80a-v2.7                                          1873           1912          56          3.0         331.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86a-v2.7                                           244            278          29          3.3         300.6       1.0X
+q86a-v2.7                                           232            262          24          3.5         286.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98-v2.7                                            289            308          19         10.3          97.2       1.0X
+q98-v2.7                                            285            300          12         10.4          95.8       1.0X
 
diff --git a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk21-results.txt b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk21-results.txt
index c746cde05060a..4888365b839e0 100644
--- a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 TakeOrderedAndProject
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TakeOrderedAndProject with SMJ:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-TakeOrderedAndProject with SMJ for doExecute                 214            243          27          0.0       21428.5       1.0X
-TakeOrderedAndProject with SMJ for executeCollect             97            102           4          0.1        9748.1       2.2X
+TakeOrderedAndProject with SMJ for doExecute                 160            200          39          0.1       15968.9       1.0X
+TakeOrderedAndProject with SMJ for executeCollect             96            101           5          0.1        9562.9       1.7X
 
 
diff --git a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt
index 1fa4496d6aea0..0cbc1823e7d29 100644
--- a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt
+++ b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt
@@ -2,11 +2,11 @@
 TakeOrderedAndProject
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 TakeOrderedAndProject with SMJ:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-TakeOrderedAndProject with SMJ for doExecute                 262            286          31          0.0       26165.4       1.0X
-TakeOrderedAndProject with SMJ for executeCollect            107            113           7          0.1       10681.8       2.4X
+TakeOrderedAndProject with SMJ for doExecute                 194            275          70          0.1       19414.1       1.0X
+TakeOrderedAndProject with SMJ for executeCollect            118            119           2          0.1       11785.9       1.6X
 
 
diff --git a/sql/core/benchmarks/TopKBenchmark-jdk21-results.txt b/sql/core/benchmarks/TopKBenchmark-jdk21-results.txt
index 269fdd7c815a2..7c2ab96ac4ec2 100644
--- a/sql/core/benchmarks/TopKBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/TopKBenchmark-jdk21-results.txt
@@ -2,21 +2,21 @@
 Top-K Computation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark Top-K:                                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------
-ROW_NUMBER (PARTITION: , WindowGroupLimit: false)                         9148           9493         278          2.3         436.2       1.0X
-ROW_NUMBER (PARTITION: , WindowGroupLimit: true)                          1698           1731          48         12.4          81.0       5.4X
-ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: false)          12103          12157          56          1.7         577.1       0.8X
-ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: true)            5049           5211          98          4.2         240.8       1.8X
-RANK (PARTITION: , WindowGroupLimit: false)                               9596           9842         149          2.2         457.6       1.0X
-RANK (PARTITION: , WindowGroupLimit: true)                                1896           2059         112         11.1          90.4       4.8X
-RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)                12338          12642         150          1.7         588.3       0.7X
-RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)                  4985           5179          95          4.2         237.7       1.8X
-DENSE_RANK (PARTITION: , WindowGroupLimit: false)                         9389           9628         171          2.2         447.7       1.0X
-DENSE_RANK (PARTITION: , WindowGroupLimit: true)                          1849           1900          71         11.3          88.2       4.9X
-DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)          12027          12393         186          1.7         573.5       0.8X
-DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)            5018           5083          47          4.2         239.3       1.8X
+ROW_NUMBER (PARTITION: , WindowGroupLimit: false)                         8651           8928         175          2.4         412.5       1.0X
+ROW_NUMBER (PARTITION: , WindowGroupLimit: true)                          1629           1647          13         12.9          77.7       5.3X
+ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: false)          11038          11214         143          1.9         526.3       0.8X
+ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: true)            5061           5109          33          4.1         241.3       1.7X
+RANK (PARTITION: , WindowGroupLimit: false)                               9458           9640         159          2.2         451.0       0.9X
+RANK (PARTITION: , WindowGroupLimit: true)                                1728           1749          16         12.1          82.4       5.0X
+RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)                11732          11986         175          1.8         559.4       0.7X
+RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)                  4823           4980          69          4.3         230.0       1.8X
+DENSE_RANK (PARTITION: , WindowGroupLimit: false)                         9262           9491         104          2.3         441.6       0.9X
+DENSE_RANK (PARTITION: , WindowGroupLimit: true)                          1961           1978          19         10.7          93.5       4.4X
+DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)          11727          11982         139          1.8         559.2       0.7X
+DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4876           4978         120          4.3         232.5       1.8X
 
 
diff --git a/sql/core/benchmarks/TopKBenchmark-results.txt b/sql/core/benchmarks/TopKBenchmark-results.txt
index 76efbf1397b08..4b335ce3e2d98 100644
--- a/sql/core/benchmarks/TopKBenchmark-results.txt
+++ b/sql/core/benchmarks/TopKBenchmark-results.txt
@@ -2,21 +2,21 @@
 Top-K Computation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark Top-K:                                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------
-ROW_NUMBER (PARTITION: , WindowGroupLimit: false)                         9462           9625         131          2.2         451.2       1.0X
-ROW_NUMBER (PARTITION: , WindowGroupLimit: true)                          1653           1694          28         12.7          78.8       5.7X
-ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: false)          11977          12058          82          1.8         571.1       0.8X
-ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: true)            5021           5081          35          4.2         239.4       1.9X
-RANK (PARTITION: , WindowGroupLimit: false)                              10017          10132          79          2.1         477.6       0.9X
-RANK (PARTITION: , WindowGroupLimit: true)                                1948           1984          22         10.8          92.9       4.9X
-RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)                12477          12533          73          1.7         594.9       0.8X
-RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)                  5033           5090          36          4.2         240.0       1.9X
-DENSE_RANK (PARTITION: , WindowGroupLimit: false)                         9757           9841          63          2.1         465.3       1.0X
-DENSE_RANK (PARTITION: , WindowGroupLimit: true)                          1968           1996          30         10.7          93.8       4.8X
-DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)          12419          12483          47          1.7         592.2       0.8X
-DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)            5060           5128          53          4.1         241.3       1.9X
+ROW_NUMBER (PARTITION: , WindowGroupLimit: false)                         9179           9279          81          2.3         437.7       1.0X
+ROW_NUMBER (PARTITION: , WindowGroupLimit: true)                          1609           1637          16         13.0          76.7       5.7X
+ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: false)          11629          11673          34          1.8         554.5       0.8X
+ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: true)            5008           5038          23          4.2         238.8       1.8X
+RANK (PARTITION: , WindowGroupLimit: false)                               9720           9859         339          2.2         463.5       0.9X
+RANK (PARTITION: , WindowGroupLimit: true)                                1955           1990          15         10.7          93.2       4.7X
+RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)                12101          12208         135          1.7         577.0       0.8X
+RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)                  5019           5127         167          4.2         239.3       1.8X
+DENSE_RANK (PARTITION: , WindowGroupLimit: false)                         9673           9748          62          2.2         461.3       0.9X
+DENSE_RANK (PARTITION: , WindowGroupLimit: true)                          1972           2007          31         10.6          94.0       4.7X
+DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)          12099          12148          42          1.7         576.9       0.8X
+DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4982           5028          28          4.2         237.5       1.8X
 
 
diff --git a/sql/core/benchmarks/UDFBenchmark-jdk21-results.txt b/sql/core/benchmarks/UDFBenchmark-jdk21-results.txt
index 81efa0b9b3a72..7c2f56761d1cd 100644
--- a/sql/core/benchmarks/UDFBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/UDFBenchmark-jdk21-results.txt
@@ -2,58 +2,58 @@
 UDF with mixed input types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to string wholestage off            129            165          50          0.8        1291.3       1.0X
-long/nullable int/string to string wholestage on              64             74           6          1.6         638.6       2.0X
+long/nullable int/string to string wholestage off            137            144           9          0.7        1370.6       1.0X
+long/nullable int/string to string wholestage on              72             82          11          1.4         719.9       1.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to option wholestage off             47             66          28          2.2         465.1       1.0X
-long/nullable int/string to option wholestage on              34             39           6          2.9         343.2       1.4X
+long/nullable int/string to option wholestage off             43             49           9          2.3         427.1       1.0X
+long/nullable int/string to option wholestage on              37             42           6          2.7         374.6       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to primitive wholestage off             30             31           1          3.3         299.6       1.0X
-long/nullable int/string to primitive wholestage on              28             29           2          3.6         280.4       1.1X
+long/nullable int/string to primitive wholestage off             34             39           7          2.9         340.1       1.0X
+long/nullable int/string to primitive wholestage on              32             35           4          3.2         315.5       1.1X
 
 
 ================================================================================================
 UDF with primitive types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to string wholestage off             30             31           2          3.4         297.4       1.0X
-long/nullable int to string wholestage on              28             35           5          3.5         283.9       1.0X
+long/nullable int to string wholestage off             30             30           0          3.3         301.5       1.0X
+long/nullable int to string wholestage on              31             33           1          3.3         306.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to option wholestage off             22             26           6          4.6         219.3       1.0X
-long/nullable int to option wholestage on              21             23           1          4.7         214.0       1.0X
+long/nullable int to option wholestage off             22             25           4          4.5         224.4       1.0X
+long/nullable int to option wholestage on              23             28           6          4.4         228.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-long/nullable int to primitive wholestage off             18             19           1          5.6         179.0       1.0X
-long/nullable int to primitive wholestage on              18             19           1          5.6         179.7       1.0X
+long/nullable int to primitive wholestage off             24             25           2          4.2         235.5       1.0X
+long/nullable int to primitive wholestage on              19             20           1          5.3         189.2       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Baseline                                             13             19           8          7.9         125.8       1.0X
-With identity UDF                                    16             18           2          6.2         160.4       0.8X
+Baseline                                             13             15           2          7.9         126.9       1.0X
+With identity UDF                                    18             20           3          5.7         176.4       0.7X
 
 
diff --git a/sql/core/benchmarks/UDFBenchmark-results.txt b/sql/core/benchmarks/UDFBenchmark-results.txt
index 818b51532da74..4e6c9dfdc74e6 100644
--- a/sql/core/benchmarks/UDFBenchmark-results.txt
+++ b/sql/core/benchmarks/UDFBenchmark-results.txt
@@ -2,58 +2,58 @@
 UDF with mixed input types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to string wholestage off            165            166           1          0.6        1648.2       1.0X
-long/nullable int/string to string wholestage on              87            114          18          1.1         869.9       1.9X
+long/nullable int/string to string wholestage off            130            141          15          0.8        1299.8       1.0X
+long/nullable int/string to string wholestage on              92             97           6          1.1         922.8       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to option wholestage off             66             68           3          1.5         659.8       1.0X
-long/nullable int/string to option wholestage on              53             63           6          1.9         525.8       1.3X
+long/nullable int/string to option wholestage off             52             57           7          1.9         523.5       1.0X
+long/nullable int/string to option wholestage on              42             49           5          2.4         420.5       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to primitive wholestage off             34             40           8          3.0         338.5       1.0X
-long/nullable int/string to primitive wholestage on              28             30           1          3.6         280.4       1.2X
+long/nullable int/string to primitive wholestage off             30             34           5          3.3         301.7       1.0X
+long/nullable int/string to primitive wholestage on              31             33           2          3.2         312.3       1.0X
 
 
 ================================================================================================
 UDF with primitive types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to string wholestage off             28             30           2          3.5         284.0       1.0X
-long/nullable int to string wholestage on              29             33           5          3.4         293.0       1.0X
+long/nullable int to string wholestage off             31             32           1          3.2         312.6       1.0X
+long/nullable int to string wholestage on              31             32           1          3.2         309.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to option wholestage off             22             23           1          4.6         218.1       1.0X
-long/nullable int to option wholestage on              22             23           1          4.5         224.7       1.0X
+long/nullable int to option wholestage off             23             23           0          4.4         226.9       1.0X
+long/nullable int to option wholestage on              24             25           2          4.2         240.8       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-long/nullable int to primitive wholestage off             16             16           0          6.4         157.3       1.0X
-long/nullable int to primitive wholestage on              18             21           4          5.7         175.0       0.9X
+long/nullable int to primitive wholestage off             18             20           3          5.5         180.6       1.0X
+long/nullable int to primitive wholestage on              19             21           3          5.2         193.0       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Baseline                                             14             16           1          7.1         141.4       1.0X
-With identity UDF                                    14             16           3          6.9         144.3       1.0X
+Baseline                                             13             16           2          7.5         133.4       1.0X
+With identity UDF                                    17             18           1          6.1         165.2       0.8X
 
 
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk21-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk21-results.txt
index d11fd0406e1b4..8148196e6b688 100644
--- a/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk21-results.txt
@@ -2,32 +2,32 @@
 Benchmark UnsafeArrayData
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  74             74           1       2281.5           0.4       1.0X
-Double                                              158            158           0       1064.2           0.9       0.5X
+Int                                                  76             77           1       2202.3           0.5       1.0X
+Double                                              159            159           0       1055.9           0.9       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  12             13           1       1709.6           0.6       1.0X
-Double                                               28             33           4        737.4           1.4       0.4X
+Int                                                  12             13           1       1797.9           0.6       1.0X
+Double                                               28             33           4        748.6           1.3       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  19             21           1       3322.4           0.3       1.0X
-Double                                               39             42           2       1600.5           0.6       0.5X
+Int                                                  20             21           1       3164.3           0.3       1.0X
+Double                                               40             42           1       1561.8           0.6       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Int                                                     20             24           2       3069.8           0.3       1.0X
-Double                                                  44             49           3       1444.2           0.7       0.5X
+Int                                                     20             22           1       3198.7           0.3       1.0X
+Double                                                  42             45           2       1508.3           0.7       0.5X
 
 
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
index 79032e13c0de3..1c9c5f4f5b4c3 100644
--- a/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
@@ -2,32 +2,32 @@
 Benchmark UnsafeArrayData
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  76             76           0       2215.1           0.5       1.0X
-Double                                              158            158           0       1062.9           0.9       0.5X
+Int                                                  76             77           3       2206.0           0.5       1.0X
+Double                                              158            159           1       1060.4           0.9       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  12             14           1       1690.2           0.6       1.0X
-Double                                               31             33           1        687.1           1.5       0.4X
+Int                                                  13             15           1       1671.6           0.6       1.0X
+Double                                               28             34           3        738.8           1.4       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  21             23           2       3025.8           0.3       1.0X
-Double                                               45             48           1       1410.8           0.7       0.5X
+Int                                                  19             22           2       3250.9           0.3       1.0X
+Double                                               40             43           2       1572.4           0.6       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Int                                                     22             24           1       2902.4           0.3       1.0X
-Double                                                  46             49           1       1374.3           0.7       0.5X
+Int                                                     20             23           2       3201.2           0.3       1.0X
+Double                                                  43             46           1       1460.5           0.7       0.5X
 
 
diff --git a/sql/core/benchmarks/UpdateFieldsBenchmark-jdk21-results.txt b/sql/core/benchmarks/UpdateFieldsBenchmark-jdk21-results.txt
index c5756342d99d5..eac137fad9594 100644
--- a/sql/core/benchmarks/UpdateFieldsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/UpdateFieldsBenchmark-jdk21-results.txt
@@ -2,25 +2,25 @@
 Add 2 columns and drop 2 columns at 3 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Add 2 columns and drop 2 columns at 3 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                            3              4           1          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                1              2           0          0.0      Infinity       1.9X
-To non-nullable StructTypes using non-performant method                       18             21           2          0.0      Infinity       0.1X
-To nullable StructTypes using non-performant method                          792            836          39          0.0      Infinity       0.0X
+To non-nullable StructTypes using performant method                            1              2           1          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                1              1           0          0.0      Infinity       1.4X
+To non-nullable StructTypes using non-performant method                       15             16           1          0.0      Infinity       0.1X
+To nullable StructTypes using non-performant method                          542            561          13          0.0      Infinity       0.0X
 
 
 ================================================================================================
 Add 50 columns and drop 50 columns at 100 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Add 50 columns and drop 50 columns at 100 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                             1088           1112          34          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                 1150           1151           1          0.0      Infinity       0.9X
+To non-nullable StructTypes using performant method                              168            172           4          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                  196            200           5          0.0      Infinity       0.9X
 
 
diff --git a/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt b/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
index 7bc440e192516..6e137bec68e30 100644
--- a/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
+++ b/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
@@ -2,25 +2,25 @@
 Add 2 columns and drop 2 columns at 3 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Add 2 columns and drop 2 columns at 3 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                            2              3           1          0.0      Infinity       1.0X
+To non-nullable StructTypes using performant method                            2              2           1          0.0      Infinity       1.0X
 To nullable StructTypes using performant method                                1              1           0          0.0      Infinity       1.4X
-To non-nullable StructTypes using non-performant method                       18             19           2          0.0      Infinity       0.1X
-To nullable StructTypes using non-performant method                          846            885          45          0.0      Infinity       0.0X
+To non-nullable StructTypes using non-performant method                       16             17           1          0.0      Infinity       0.1X
+To nullable StructTypes using non-performant method                          565            597          25          0.0      Infinity       0.0X
 
 
 ================================================================================================
 Add 50 columns and drop 50 columns at 100 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Add 50 columns and drop 50 columns at 100 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                             1087           1109          31          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                 1123           1190          95          0.0      Infinity       1.0X
+To non-nullable StructTypes using performant method                              178            188           8          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                  207            212           5          0.0      Infinity       0.9X
 
 
diff --git a/sql/core/benchmarks/V2FunctionBenchmark-jdk21-results.txt b/sql/core/benchmarks/V2FunctionBenchmark-jdk21-results.txt
index 49cf58086a51c..143c6c57232e1 100644
--- a/sql/core/benchmarks/V2FunctionBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/V2FunctionBenchmark-jdk21-results.txt
@@ -1,44 +1,44 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = true codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        9542           9554          11         52.4          19.1       1.0X
-java_long_add_default                                                                 22433          22756         485         22.3          44.9       0.4X
-java_long_add_magic                                                                   11747          11782          44         42.6          23.5       0.8X
-java_long_add_static_magic                                                            11539          11594          48         43.3          23.1       0.8X
-scala_long_add_default                                                                23789          25196        2336         21.0          47.6       0.4X
-scala_long_add_magic                                                                  11714          11758          38         42.7          23.4       0.8X
+native_long_add                                                                        9807          10151         549         51.0          19.6       1.0X
+java_long_add_default                                                                 22932          22997          56         21.8          45.9       0.4X
+java_long_add_magic                                                                   11408          11651         246         43.8          22.8       0.9X
+java_long_add_static_magic                                                            11451          11487          52         43.7          22.9       0.9X
+scala_long_add_default                                                                23554          23574          22         21.2          47.1       0.4X
+scala_long_add_magic                                                                  11686          11710          33         42.8          23.4       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = false codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        10296          10347          45         48.6          20.6       1.0X
-java_long_add_default                                                                  22464          23279        1403         22.3          44.9       0.5X
-java_long_add_magic                                                                    11775          11807          33         42.5          23.5       0.9X
-java_long_add_static_magic                                                             10049          10065          16         49.8          20.1       1.0X
-scala_long_add_default                                                                 22436          24439        3455         22.3          44.9       0.5X
-scala_long_add_magic                                                                   11815          11895         108         42.3          23.6       0.9X
+native_long_add                                                                        10397          10439          41         48.1          20.8       1.0X
+java_long_add_default                                                                  22679          22712          33         22.0          45.4       0.5X
+java_long_add_magic                                                                    11595          11695         100         43.1          23.2       0.9X
+java_long_add_static_magic                                                             10111          10146          38         49.5          20.2       1.0X
+scala_long_add_default                                                                 22592          22624          27         22.1          45.2       0.5X
+scala_long_add_magic                                                                   11593          11648          52         43.1          23.2       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = true codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        22445          22924         448         22.3          44.9       1.0X
-java_long_add_default                                                                  26468          26478          10         18.9          52.9       0.8X
-java_long_add_magic                                                                    32917          32937          26         15.2          65.8       0.7X
-java_long_add_static_magic                                                             31424          31496         108         15.9          62.8       0.7X
-scala_long_add_default                                                                 26265          26358         100         19.0          52.5       0.9X
-scala_long_add_magic                                                                   33764          34033         423         14.8          67.5       0.7X
+native_long_add                                                                        22769          22882         160         22.0          45.5       1.0X
+java_long_add_default                                                                  27959          28164         261         17.9          55.9       0.8X
+java_long_add_magic                                                                    32664          32703          36         15.3          65.3       0.7X
+java_long_add_static_magic                                                             31044          31293         407         16.1          62.1       0.7X
+scala_long_add_default                                                                 26593          26614          18         18.8          53.2       0.9X
+scala_long_add_magic                                                                   32909          33049         170         15.2          65.8       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = false codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                         21582          22336        1304         23.2          43.2       1.0X
-java_long_add_default                                                                   25194          25472         475         19.8          50.4       0.9X
-java_long_add_magic                                                                     32678          32727          43         15.3          65.4       0.7X
-java_long_add_static_magic                                                              30357          30481         214         16.5          60.7       0.7X
-scala_long_add_default                                                                  25166          25413         392         19.9          50.3       0.9X
-scala_long_add_magic                                                                    32759          32773          12         15.3          65.5       0.7X
+native_long_add                                                                         22966          22981          26         21.8          45.9       1.0X
+java_long_add_default                                                                   26581          26697         133         18.8          53.2       0.9X
+java_long_add_magic                                                                     32925          33042         131         15.2          65.9       0.7X
+java_long_add_static_magic                                                              31046          32306        2072         16.1          62.1       0.7X
+scala_long_add_default                                                                  26648          26670          19         18.8          53.3       0.9X
+scala_long_add_magic                                                                    32969          33052         129         15.2          65.9       0.7X
 
diff --git a/sql/core/benchmarks/V2FunctionBenchmark-results.txt b/sql/core/benchmarks/V2FunctionBenchmark-results.txt
index dca57e380c1a2..8dcacf05fa0eb 100644
--- a/sql/core/benchmarks/V2FunctionBenchmark-results.txt
+++ b/sql/core/benchmarks/V2FunctionBenchmark-results.txt
@@ -1,44 +1,44 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = true codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        9323           9391          60         53.6          18.6       1.0X
-java_long_add_default                                                                 22346          22797         425         22.4          44.7       0.4X
-java_long_add_magic                                                                   10786          10800          13         46.4          21.6       0.9X
-java_long_add_static_magic                                                            10625          10748         169         47.1          21.2       0.9X
-scala_long_add_default                                                                22788          22840          47         21.9          45.6       0.4X
-scala_long_add_magic                                                                  10709          10767          51         46.7          21.4       0.9X
+native_long_add                                                                        9192           9271         105         54.4          18.4       1.0X
+java_long_add_default                                                                 22377          22680         265         22.3          44.8       0.4X
+java_long_add_magic                                                                   10753          10776          28         46.5          21.5       0.9X
+java_long_add_static_magic                                                            10564          11517         825         47.3          21.1       0.9X
+scala_long_add_default                                                                23011          23250         273         21.7          46.0       0.4X
+scala_long_add_magic                                                                  10654          10734          97         46.9          21.3       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = false codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                         9743           9901         137         51.3          19.5       1.0X
-java_long_add_default                                                                  22268          22278          10         22.5          44.5       0.4X
-java_long_add_magic                                                                    10735          10785          44         46.6          21.5       0.9X
-java_long_add_static_magic                                                              9964          10028          94         50.2          19.9       1.0X
-scala_long_add_default                                                                 21995          22058          63         22.7          44.0       0.4X
-scala_long_add_magic                                                                   10726          10757          42         46.6          21.5       0.9X
+native_long_add                                                                        10044          10057          12         49.8          20.1       1.0X
+java_long_add_default                                                                  22261          22343          81         22.5          44.5       0.5X
+java_long_add_magic                                                                    10632          10644          17         47.0          21.3       0.9X
+java_long_add_static_magic                                                              9940           9974          59         50.3          19.9       1.0X
+scala_long_add_default                                                                 22279          22349          68         22.4          44.6       0.5X
+scala_long_add_magic                                                                   10616          10639          21         47.1          21.2       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = true codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        22837          22861          31         21.9          45.7       1.0X
-java_long_add_default                                                                  28062          28099          41         17.8          56.1       0.8X
-java_long_add_magic                                                                    32026          33081        1131         15.6          64.1       0.7X
-java_long_add_static_magic                                                             32031          32038           8         15.6          64.1       0.7X
-scala_long_add_default                                                                 26219          26263          63         19.1          52.4       0.9X
-scala_long_add_magic                                                                   32113          32182          65         15.6          64.2       0.7X
+native_long_add                                                                        22642          23179         680         22.1          45.3       1.0X
+java_long_add_default                                                                  27400          27497         102         18.2          54.8       0.8X
+java_long_add_magic                                                                    31896          31958          66         15.7          63.8       0.7X
+java_long_add_static_magic                                                             30630          31059         389         16.3          61.3       0.7X
+scala_long_add_default                                                                 26240          26339         156         19.1          52.5       0.9X
+scala_long_add_magic                                                                   32268          32325          90         15.5          64.5       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = false codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                         22056          22271         294         22.7          44.1       1.0X
-java_long_add_default                                                                   25840          25884          40         19.3          51.7       0.9X
-java_long_add_magic                                                                     31928          31992          55         15.7          63.9       0.7X
-java_long_add_static_magic                                                              31464          31507          46         15.9          62.9       0.7X
-scala_long_add_default                                                                  25851          25932         107         19.3          51.7       0.9X
-scala_long_add_magic                                                                    32315          32881         629         15.5          64.6       0.7X
+native_long_add                                                                         21853          22201         538         22.9          43.7       1.0X
+java_long_add_default                                                                   25860          25886          34         19.3          51.7       0.8X
+java_long_add_magic                                                                     32191          32350         218         15.5          64.4       0.7X
+java_long_add_static_magic                                                              30755          30812          52         16.3          61.5       0.7X
+scala_long_add_default                                                                  25872          25923          69         19.3          51.7       0.8X
+scala_long_add_magic                                                                    31910          31922          14         15.7          63.8       0.7X
 
diff --git a/sql/core/benchmarks/WideSchemaBenchmark-jdk21-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-jdk21-results.txt
index c4b6ef29d7074..91e51c6833aa5 100644
--- a/sql/core/benchmarks/WideSchemaBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/WideSchemaBenchmark-jdk21-results.txt
@@ -2,157 +2,157 @@
 parsing large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 parsing large select:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 select expressions                                  1              2           1          0.0     1196151.0       1.0X
-100 select expressions                                2              3           1          0.0     2095800.0       0.6X
-2500 select expressions                              36             39           4          0.0    35701821.0       0.0X
+1 select expressions                                  1              1           0          0.0      669297.0       1.0X
+100 select expressions                                3              3           1          0.0     2920356.0       0.2X
+2500 select expressions                              63             65           1          0.0    63383411.0       0.0X
 
 
 ================================================================================================
 optimize large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 optimize large select:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-100 columns                                           4              5           1          0.0     4384067.0       1.0X
-1000 columns                                         28             29           1          0.0    27845199.0       0.2X
-10000 columns                                       287            294           7          0.0   286788665.0       0.0X
+100 columns                                           6              7           1          0.0     6257029.0       1.0X
+1000 columns                                         48             49           1          0.0    47583298.0       0.1X
+10000 columns                                       488            504          11          0.0   487843016.0       0.0X
 
 
 ================================================================================================
 many column field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 many column field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 cols x 100000 rows (read in-mem)                   16             22           5          6.4         157.3       1.0X
-1 cols x 100000 rows (exec in-mem)                   18             23           5          5.6         179.9       0.9X
-1 cols x 100000 rows (read parquet)                  30             37           7          3.3         302.7       0.5X
-1 cols x 100000 rows (write parquet)                 98            106           8          1.0         978.0       0.2X
-100 cols x 1000 rows (read in-mem)                   12             17           4          8.1         123.9       1.3X
-100 cols x 1000 rows (exec in-mem)                   15             19           5          6.5         153.2       1.0X
-100 cols x 1000 rows (read parquet)                  24             30           7          4.1         244.2       0.6X
-100 cols x 1000 rows (write parquet)                 93            103           9          1.1         932.3       0.2X
-2500 cols x 40 rows (read in-mem)                    55             58           4          1.8         545.9       0.3X
-2500 cols x 40 rows (exec in-mem)                   100            107           6          1.0         995.7       0.2X
-2500 cols x 40 rows (read parquet)                  306            308           3          0.3        3060.5       0.1X
-2500 cols x 40 rows (write parquet)                 135            144          10          0.7        1349.9       0.1X
+1 cols x 100000 rows (read in-mem)                   15             22           5          6.8         147.1       1.0X
+1 cols x 100000 rows (exec in-mem)                   16             23           5          6.3         158.4       0.9X
+1 cols x 100000 rows (read parquet)                  28             35           7          3.6         281.5       0.5X
+1 cols x 100000 rows (write parquet)                 90            102           8          1.1         897.3       0.2X
+100 cols x 1000 rows (read in-mem)                   12             16           4          8.3         120.4       1.2X
+100 cols x 1000 rows (exec in-mem)                   15             18           4          6.8         146.1       1.0X
+100 cols x 1000 rows (read parquet)                  24             28           7          4.2         237.6       0.6X
+100 cols x 1000 rows (write parquet)                 85             92           6          1.2         847.2       0.2X
+2500 cols x 40 rows (read in-mem)                    43             45           5          2.3         426.0       0.3X
+2500 cols x 40 rows (exec in-mem)                    71             74           4          1.4         708.9       0.2X
+2500 cols x 40 rows (read parquet)                  295            301           4          0.3        2945.7       0.0X
+2500 cols x 40 rows (write parquet)                 115            119           5          0.9        1153.7       0.1X
 
 
 ================================================================================================
 wide shallowly nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 wide shallowly nested struct field r/w:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   20             26           6          5.0         201.6       1.0X
-1 wide x 100000 rows (exec in-mem)                   22             28           7          4.5         223.4       0.9X
-1 wide x 100000 rows (read parquet)                  25             31           8          4.0         249.3       0.8X
-1 wide x 100000 rows (write parquet)                 99            109           9          1.0         992.7       0.2X
-100 wide x 1000 rows (read in-mem)                   15             19           5          6.6         151.9       1.3X
-100 wide x 1000 rows (exec in-mem)                   23             28           6          4.4         229.1       0.9X
-100 wide x 1000 rows (read parquet)                  22             29           7          4.5         223.1       0.9X
-100 wide x 1000 rows (write parquet)                 95            103           7          1.1         947.8       0.2X
-2500 wide x 40 rows (read in-mem)                    23             27           5          4.3         231.0       0.9X
-2500 wide x 40 rows (exec in-mem)                   192            201           7          0.5        1920.9       0.1X
-2500 wide x 40 rows (read parquet)                   68             73           6          1.5         681.4       0.3X
-2500 wide x 40 rows (write parquet)                 102            107           7          1.0        1019.1       0.2X
+1 wide x 100000 rows (read in-mem)                   20             25           6          4.9         203.7       1.0X
+1 wide x 100000 rows (exec in-mem)                   21             26           7          4.7         211.7       1.0X
+1 wide x 100000 rows (read parquet)                  22             30           8          4.5         221.6       0.9X
+1 wide x 100000 rows (write parquet)                 94            103           9          1.1         939.9       0.2X
+100 wide x 1000 rows (read in-mem)                   15             18           5          6.8         147.8       1.4X
+100 wide x 1000 rows (exec in-mem)                   16             19           6          6.3         159.1       1.3X
+100 wide x 1000 rows (read parquet)                  22             27           7          4.6         216.4       0.9X
+100 wide x 1000 rows (write parquet)                 88             97          10          1.1         880.7       0.2X
+2500 wide x 40 rows (read in-mem)                    20             26           7          4.9         204.7       1.0X
+2500 wide x 40 rows (exec in-mem)                    22             27           7          4.6         216.8       0.9X
+2500 wide x 40 rows (read parquet)                   66             68           5          1.5         658.4       0.3X
+2500 wide x 40 rows (write parquet)                  94            103          10          1.1         941.6       0.2X
 
 
 ================================================================================================
 deeply nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 deeply nested struct field r/w:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 deep x 100000 rows (read in-mem)                   16             20           6          6.3         158.7       1.0X
-1 deep x 100000 rows (exec in-mem)                   18             20           4          5.6         177.3       0.9X
-1 deep x 100000 rows (read parquet)                  19             23           6          5.4         185.3       0.9X
-1 deep x 100000 rows (write parquet)                 95            104           8          1.1         951.5       0.2X
-100 deep x 1000 rows (read in-mem)                   44             47           3          2.3         444.0       0.4X
-100 deep x 1000 rows (exec in-mem)                  452            466          13          0.2        4520.5       0.0X
-100 deep x 1000 rows (read parquet)                 433            442          13          0.2        4329.1       0.0X
-100 deep x 1000 rows (write parquet)                122            129           6          0.8        1224.3       0.1X
-250 deep x 400 rows (read in-mem)                   191            195           3          0.5        1909.7       0.1X
-250 deep x 400 rows (exec in-mem)                  2893           2909          23          0.0       28927.4       0.0X
-250 deep x 400 rows (read parquet)                 2595           2598           4          0.0       25951.8       0.0X
-250 deep x 400 rows (write parquet)                 268            273           4          0.4        2675.1       0.1X
+1 deep x 100000 rows (read in-mem)                   15             19           6          6.5         154.5       1.0X
+1 deep x 100000 rows (exec in-mem)                   18             21           6          5.7         175.5       0.9X
+1 deep x 100000 rows (read parquet)                  18             23           7          5.6         178.4       0.9X
+1 deep x 100000 rows (write parquet)                 89             96           8          1.1         887.6       0.2X
+100 deep x 1000 rows (read in-mem)                   43             46           5          2.3         432.1       0.4X
+100 deep x 1000 rows (exec in-mem)                  445            453           8          0.2        4448.2       0.0X
+100 deep x 1000 rows (read parquet)                 445            453           6          0.2        4449.9       0.0X
+100 deep x 1000 rows (write parquet)                117            129           9          0.9        1168.8       0.1X
+250 deep x 400 rows (read in-mem)                   192            195           3          0.5        1915.7       0.1X
+250 deep x 400 rows (exec in-mem)                  2694           2700           9          0.0       26937.2       0.0X
+250 deep x 400 rows (read parquet)                 2683           2688           6          0.0       26833.1       0.0X
+250 deep x 400 rows (write parquet)                 267            273           4          0.4        2667.1       0.1X
 
 
 ================================================================================================
 bushy struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 bushy struct field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-1 x 1 deep x 100000 rows (read in-mem)                13             16           4          7.4         134.4       1.0X
-1 x 1 deep x 100000 rows (exec in-mem)                15             19           5          6.6         150.9       0.9X
-1 x 1 deep x 100000 rows (read parquet)               18             22           5          5.6         177.6       0.8X
-1 x 1 deep x 100000 rows (write parquet)              92             98           7          1.1         921.7       0.1X
-128 x 8 deep x 1000 rows (read in-mem)                13             16           4          7.4         134.3       1.0X
-128 x 8 deep x 1000 rows (exec in-mem)                25             28           5          4.0         248.9       0.5X
-128 x 8 deep x 1000 rows (read parquet)               21             26           6          4.7         213.3       0.6X
-128 x 8 deep x 1000 rows (write parquet)              91             98           8          1.1         911.6       0.1X
-1024 x 11 deep x 100 rows (read in-mem)               19             22           4          5.3         187.6       0.7X
-1024 x 11 deep x 100 rows (exec in-mem)              129            133           6          0.8        1286.3       0.1X
-1024 x 11 deep x 100 rows (read parquet)              36             40           4          2.8         363.4       0.4X
-1024 x 11 deep x 100 rows (write parquet)             96            102          10          1.0         962.5       0.1X
+1 x 1 deep x 100000 rows (read in-mem)                13             15           4          7.7         130.2       1.0X
+1 x 1 deep x 100000 rows (exec in-mem)                15             18           4          6.6         150.8       0.9X
+1 x 1 deep x 100000 rows (read parquet)               18             21           5          5.6         177.4       0.7X
+1 x 1 deep x 100000 rows (write parquet)              88             94           6          1.1         875.7       0.1X
+128 x 8 deep x 1000 rows (read in-mem)                13             16           5          7.6         131.7       1.0X
+128 x 8 deep x 1000 rows (exec in-mem)                15             18           5          6.7         148.7       0.9X
+128 x 8 deep x 1000 rows (read parquet)               22             26           6          4.6         215.6       0.6X
+128 x 8 deep x 1000 rows (write parquet)              86             93           6          1.2         864.9       0.2X
+1024 x 11 deep x 100 rows (read in-mem)               18             21           6          5.7         176.2       0.7X
+1024 x 11 deep x 100 rows (exec in-mem)               19             23           6          5.4         185.8       0.7X
+1024 x 11 deep x 100 rows (read parquet)              35             37           4          2.9         349.8       0.4X
+1024 x 11 deep x 100 rows (write parquet)             91             94           5          1.1         912.5       0.1X
 
 
 ================================================================================================
 wide array field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 wide array field r/w:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   15             18           4          6.6         151.1       1.0X
-1 wide x 100000 rows (exec in-mem)                   17             19           4          5.8         171.5       0.9X
-1 wide x 100000 rows (read parquet)                  17             21           5          5.8         172.7       0.9X
-1 wide x 100000 rows (write parquet)                 93            103           8          1.1         931.5       0.2X
-100 wide x 1000 rows (read in-mem)                   11             13           4          8.9         112.1       1.3X
-100 wide x 1000 rows (exec in-mem)                   13             15           4          7.8         128.9       1.2X
-100 wide x 1000 rows (read parquet)                  17             21           5          5.9         170.7       0.9X
-100 wide x 1000 rows (write parquet)                 90             98           9          1.1         900.2       0.2X
-2500 wide x 40 rows (read in-mem)                    11             13           3          9.0         111.5       1.4X
-2500 wide x 40 rows (exec in-mem)                    13             16           4          7.7         129.6       1.2X
-2500 wide x 40 rows (read parquet)                   17             19           4          5.9         168.5       0.9X
-2500 wide x 40 rows (write parquet)                  91             98           7          1.1         906.0       0.2X
+1 wide x 100000 rows (read in-mem)                   15             17           5          6.7         148.8       1.0X
+1 wide x 100000 rows (exec in-mem)                   17             19           4          5.9         170.7       0.9X
+1 wide x 100000 rows (read parquet)                  17             21           6          5.8         172.8       0.9X
+1 wide x 100000 rows (write parquet)                 88             93           9          1.1         878.8       0.2X
+100 wide x 1000 rows (read in-mem)                   11             14           5          9.1         110.1       1.4X
+100 wide x 1000 rows (exec in-mem)                   13             16           5          7.9         127.1       1.2X
+100 wide x 1000 rows (read parquet)                  17             21           6          5.9         168.2       0.9X
+100 wide x 1000 rows (write parquet)                 83             90           8          1.2         832.1       0.2X
+2500 wide x 40 rows (read in-mem)                    11             15           6          9.1         109.8       1.4X
+2500 wide x 40 rows (exec in-mem)                    12             17           6          8.0         125.0       1.2X
+2500 wide x 40 rows (read parquet)                   17             23           7          6.0         167.5       0.9X
+2500 wide x 40 rows (write parquet)                  84             94           8          1.2         841.2       0.2X
 
 
 ================================================================================================
 wide map field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 wide map field r/w:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   12             14           3          8.3         121.0       1.0X
-1 wide x 100000 rows (exec in-mem)                   16             18           3          6.3         159.0       0.8X
-1 wide x 100000 rows (read parquet)                  21             24           5          4.7         213.2       0.6X
-1 wide x 100000 rows (write parquet)                 91             96           6          1.1         905.2       0.1X
-100 wide x 1000 rows (read in-mem)                    8              9           3         13.3          75.2       1.6X
-100 wide x 1000 rows (exec in-mem)                   10             12           3         10.4          96.1       1.3X
-100 wide x 1000 rows (read parquet)                  19             21           4          5.3         187.5       0.6X
-100 wide x 1000 rows (write parquet)                 86             90           5          1.2         858.4       0.1X
-2500 wide x 40 rows (read in-mem)                     9             11           2         10.8          92.8       1.3X
-2500 wide x 40 rows (exec in-mem)                    11             13           3          9.0         111.5       1.1X
-2500 wide x 40 rows (read parquet)                   19             22           4          5.2         191.8       0.6X
-2500 wide x 40 rows (write parquet)                  90             94           5          1.1         899.2       0.1X
+1 wide x 100000 rows (read in-mem)                   12             15           4          8.3         121.2       1.0X
+1 wide x 100000 rows (exec in-mem)                   16             21           5          6.3         159.3       0.8X
+1 wide x 100000 rows (read parquet)                  21             24           5          4.8         208.1       0.6X
+1 wide x 100000 rows (write parquet)                 84             89           5          1.2         842.0       0.1X
+100 wide x 1000 rows (read in-mem)                    8              9           2         13.1          76.5       1.6X
+100 wide x 1000 rows (exec in-mem)                   10             11           3         10.5          95.3       1.3X
+100 wide x 1000 rows (read parquet)                  18             20           6          5.6         178.2       0.7X
+100 wide x 1000 rows (write parquet)                 80             85           6          1.2         801.0       0.2X
+2500 wide x 40 rows (read in-mem)                     9             10           3         10.7          93.4       1.3X
+2500 wide x 40 rows (exec in-mem)                    11             12           2          8.9         111.7       1.1X
+2500 wide x 40 rows (read parquet)                   18             21           6          5.5         183.2       0.7X
+2500 wide x 40 rows (write parquet)                  82             89           8          1.2         822.1       0.1X
 
 
diff --git a/sql/core/benchmarks/WideSchemaBenchmark-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-results.txt
index e61b27a7c727f..4931872223cfe 100644
--- a/sql/core/benchmarks/WideSchemaBenchmark-results.txt
+++ b/sql/core/benchmarks/WideSchemaBenchmark-results.txt
@@ -2,157 +2,157 @@
 parsing large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 parsing large select:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 select expressions                                  1              2           1          0.0     1177503.0       1.0X
-100 select expressions                                2              3           1          0.0     2179549.0       0.5X
-2500 select expressions                              40             43           4          0.0    39575214.0       0.0X
+1 select expressions                                  1              1           0          0.0      671442.0       1.0X
+100 select expressions                                3              3           0          0.0     3181250.0       0.2X
+2500 select expressions                              69             72           2          0.0    69457245.0       0.0X
 
 
 ================================================================================================
 optimize large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 optimize large select:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-100 columns                                           5              5           1          0.0     4710103.0       1.0X
-1000 columns                                         31             32           1          0.0    30879997.0       0.2X
-10000 columns                                       309            327          10          0.0   309351929.0       0.0X
+100 columns                                           7              8           1          0.0     6678458.0       1.0X
+1000 columns                                         53             55           2          0.0    53079798.0       0.1X
+10000 columns                                       530            538           7          0.0   529951923.0       0.0X
 
 
 ================================================================================================
 many column field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 many column field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 cols x 100000 rows (read in-mem)                   16             22           3          6.1         163.3       1.0X
-1 cols x 100000 rows (exec in-mem)                   16             22           3          6.1         162.8       1.0X
-1 cols x 100000 rows (read parquet)                  29             36           6          3.4         290.4       0.6X
-1 cols x 100000 rows (write parquet)                 96            105           8          1.0         961.4       0.2X
-100 cols x 1000 rows (read in-mem)                   13             16           3          7.8         128.8       1.3X
-100 cols x 1000 rows (exec in-mem)                   16             19           3          6.3         159.4       1.0X
-100 cols x 1000 rows (read parquet)                  24             28           4          4.2         240.6       0.7X
-100 cols x 1000 rows (write parquet)                 93             98           5          1.1         931.7       0.2X
-2500 cols x 40 rows (read in-mem)                    57             61           4          1.8         566.2       0.3X
-2500 cols x 40 rows (exec in-mem)                   105            108           5          0.9        1054.4       0.2X
-2500 cols x 40 rows (read parquet)                  285            287           2          0.4        2852.5       0.1X
-2500 cols x 40 rows (write parquet)                 136            143           7          0.7        1358.0       0.1X
+1 cols x 100000 rows (read in-mem)                   18             23           4          5.6         177.9       1.0X
+1 cols x 100000 rows (exec in-mem)                   17             23           4          5.8         171.6       1.0X
+1 cols x 100000 rows (read parquet)                  30             36           6          3.3         302.9       0.6X
+1 cols x 100000 rows (write parquet)                 91            101          11          1.1         909.2       0.2X
+100 cols x 1000 rows (read in-mem)                   12             16           3          8.0         124.3       1.4X
+100 cols x 1000 rows (exec in-mem)                   15             19           3          6.5         154.0       1.2X
+100 cols x 1000 rows (read parquet)                  25             29           4          4.0         248.6       0.7X
+100 cols x 1000 rows (write parquet)                 87             96           8          1.1         871.9       0.2X
+2500 cols x 40 rows (read in-mem)                    46             50           4          2.2         461.8       0.4X
+2500 cols x 40 rows (exec in-mem)                    77             81           4          1.3         766.0       0.2X
+2500 cols x 40 rows (read parquet)                  285            290           3          0.4        2849.8       0.1X
+2500 cols x 40 rows (write parquet)                 127            134           6          0.8        1265.8       0.1X
 
 
 ================================================================================================
 wide shallowly nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 wide shallowly nested struct field r/w:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   21             25           3          4.8         208.5       1.0X
-1 wide x 100000 rows (exec in-mem)                   23             26           4          4.4         228.5       0.9X
-1 wide x 100000 rows (read parquet)                  23             28           4          4.3         231.2       0.9X
-1 wide x 100000 rows (write parquet)                100            109           6          1.0        1002.6       0.2X
-100 wide x 1000 rows (read in-mem)                   15             18           4          6.7         148.9       1.4X
-100 wide x 1000 rows (exec in-mem)                   21             25           4          4.7         214.8       1.0X
-100 wide x 1000 rows (read parquet)                  22             26           4          4.6         218.0       1.0X
-100 wide x 1000 rows (write parquet)                 98            102           5          1.0         975.5       0.2X
-2500 wide x 40 rows (read in-mem)                    23             27           3          4.4         227.3       0.9X
-2500 wide x 40 rows (exec in-mem)                   195            199           4          0.5        1951.3       0.1X
-2500 wide x 40 rows (read parquet)                   71             75           5          1.4         707.3       0.3X
-2500 wide x 40 rows (write parquet)                 107            110           4          0.9        1065.6       0.2X
+1 wide x 100000 rows (read in-mem)                   24             30           6          4.3         235.0       1.0X
+1 wide x 100000 rows (exec in-mem)                   25             29           4          4.0         252.3       0.9X
+1 wide x 100000 rows (read parquet)                  25             29           4          3.9         254.4       0.9X
+1 wide x 100000 rows (write parquet)                101            110           7          1.0        1010.2       0.2X
+100 wide x 1000 rows (read in-mem)                   20             23           4          5.1         195.0       1.2X
+100 wide x 1000 rows (exec in-mem)                   19             22           3          5.2         192.1       1.2X
+100 wide x 1000 rows (read parquet)                  25             28           4          4.0         249.4       0.9X
+100 wide x 1000 rows (write parquet)                 96            102           7          1.0         957.9       0.2X
+2500 wide x 40 rows (read in-mem)                    24             27           4          4.2         240.1       1.0X
+2500 wide x 40 rows (exec in-mem)                    25             29           5          3.9         253.4       0.9X
+2500 wide x 40 rows (read parquet)                   73             77           4          1.4         727.0       0.3X
+2500 wide x 40 rows (write parquet)                 106            111           4          0.9        1055.1       0.2X
 
 
 ================================================================================================
 deeply nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 deeply nested struct field r/w:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 deep x 100000 rows (read in-mem)                   17             20           3          5.8         171.8       1.0X
-1 deep x 100000 rows (exec in-mem)                   18             21           3          5.5         183.1       0.9X
-1 deep x 100000 rows (read parquet)                  19             22           4          5.4         186.1       0.9X
-1 deep x 100000 rows (write parquet)                 96            102           6          1.0         962.2       0.2X
-100 deep x 1000 rows (read in-mem)                   31             33           3          3.2         314.7       0.5X
-100 deep x 1000 rows (exec in-mem)                  462            469           4          0.2        4622.7       0.0X
-100 deep x 1000 rows (read parquet)                 458            465           8          0.2        4576.2       0.0X
-100 deep x 1000 rows (write parquet)                110            116           4          0.9        1100.9       0.2X
-250 deep x 400 rows (read in-mem)                   123            127           4          0.8        1230.2       0.1X
-250 deep x 400 rows (exec in-mem)                  2940           2943           4          0.0       29395.9       0.0X
-250 deep x 400 rows (read parquet)                 2723           2741          25          0.0       27229.1       0.0X
-250 deep x 400 rows (write parquet)                 206            219          11          0.5        2055.2       0.1X
+1 deep x 100000 rows (read in-mem)                   17             20           4          5.8         172.3       1.0X
+1 deep x 100000 rows (exec in-mem)                   20             23           4          5.1         195.7       0.9X
+1 deep x 100000 rows (read parquet)                  21             24           4          4.7         211.9       0.8X
+1 deep x 100000 rows (write parquet)                 93            100           7          1.1         931.1       0.2X
+100 deep x 1000 rows (read in-mem)                   39             41           3          2.6         389.5       0.4X
+100 deep x 1000 rows (exec in-mem)                  430            434           6          0.2        4300.2       0.0X
+100 deep x 1000 rows (read parquet)                 439            450           6          0.2        4388.3       0.0X
+100 deep x 1000 rows (write parquet)                114            118           4          0.9        1141.3       0.2X
+250 deep x 400 rows (read in-mem)                   155            160           5          0.6        1552.9       0.1X
+250 deep x 400 rows (exec in-mem)                  2583           2589           9          0.0       25828.0       0.0X
+250 deep x 400 rows (read parquet)                 2598           2615          24          0.0       25976.7       0.0X
+250 deep x 400 rows (write parquet)                 233            249          13          0.4        2334.4       0.1X
 
 
 ================================================================================================
 bushy struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 bushy struct field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-1 x 1 deep x 100000 rows (read in-mem)                13             15           3          7.6         132.0       1.0X
-1 x 1 deep x 100000 rows (exec in-mem)                15             18           4          6.8         147.3       0.9X
-1 x 1 deep x 100000 rows (read parquet)               17             19           3          5.8         172.4       0.8X
-1 x 1 deep x 100000 rows (write parquet)              97            100           4          1.0         965.0       0.1X
-128 x 8 deep x 1000 rows (read in-mem)                15             17           3          6.9         145.3       0.9X
-128 x 8 deep x 1000 rows (exec in-mem)                26             28           3          3.9         257.4       0.5X
-128 x 8 deep x 1000 rows (read parquet)               22             24           3          4.5         221.1       0.6X
-128 x 8 deep x 1000 rows (write parquet)              92             95           5          1.1         916.0       0.1X
-1024 x 11 deep x 100 rows (read in-mem)               19             22           3          5.3         188.5       0.7X
-1024 x 11 deep x 100 rows (exec in-mem)              126            128           2          0.8        1257.4       0.1X
-1024 x 11 deep x 100 rows (read parquet)              37             39           3          2.7         368.9       0.4X
-1024 x 11 deep x 100 rows (write parquet)             97            102           5          1.0         971.1       0.1X
+1 x 1 deep x 100000 rows (read in-mem)                15             18           3          6.5         153.9       1.0X
+1 x 1 deep x 100000 rows (exec in-mem)                17             19           3          5.9         168.3       0.9X
+1 x 1 deep x 100000 rows (read parquet)               20             23           4          5.0         200.6       0.8X
+1 x 1 deep x 100000 rows (write parquet)              92             96           4          1.1         919.2       0.2X
+128 x 8 deep x 1000 rows (read in-mem)                16             19           4          6.1         164.8       0.9X
+128 x 8 deep x 1000 rows (exec in-mem)                16             19           4          6.2         161.9       1.0X
+128 x 8 deep x 1000 rows (read parquet)               22             26           4          4.5         223.7       0.7X
+128 x 8 deep x 1000 rows (write parquet)              90             95           7          1.1         900.9       0.2X
+1024 x 11 deep x 100 rows (read in-mem)               19             21           3          5.4         186.8       0.8X
+1024 x 11 deep x 100 rows (exec in-mem)               21             23           3          4.8         206.9       0.7X
+1024 x 11 deep x 100 rows (read parquet)              37             40           4          2.7         373.4       0.4X
+1024 x 11 deep x 100 rows (write parquet)             96            105          11          1.0         965.0       0.2X
 
 
 ================================================================================================
 wide array field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 wide array field r/w:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   15             17           3          6.8         147.6       1.0X
-1 wide x 100000 rows (exec in-mem)                   17             19           3          6.0         167.8       0.9X
-1 wide x 100000 rows (read parquet)                  17             20           3          5.9         170.6       0.9X
-1 wide x 100000 rows (write parquet)                 93             96           3          1.1         926.3       0.2X
-100 wide x 1000 rows (read in-mem)                   11             12           3          9.1         109.4       1.3X
-100 wide x 1000 rows (exec in-mem)                   12             14           3          8.0         125.0       1.2X
-100 wide x 1000 rows (read parquet)                  17             19           3          6.0         165.8       0.9X
-100 wide x 1000 rows (write parquet)                 89             94           4          1.1         885.3       0.2X
-2500 wide x 40 rows (read in-mem)                    11             12           3          9.4         106.5       1.4X
-2500 wide x 40 rows (exec in-mem)                    12             14           3          8.2         121.9       1.2X
-2500 wide x 40 rows (read parquet)                   16             18           3          6.2         162.4       0.9X
-2500 wide x 40 rows (write parquet)                  89             94           5          1.1         885.5       0.2X
+1 wide x 100000 rows (read in-mem)                   17             20           4          5.7         174.7       1.0X
+1 wide x 100000 rows (exec in-mem)                   19             23           4          5.1         194.6       0.9X
+1 wide x 100000 rows (read parquet)                  20             26          10          5.1         196.4       0.9X
+1 wide x 100000 rows (write parquet)                 92             98           5          1.1         920.3       0.2X
+100 wide x 1000 rows (read in-mem)                   12             15           4          8.1         123.2       1.4X
+100 wide x 1000 rows (exec in-mem)                   15             19           4          6.9         145.0       1.2X
+100 wide x 1000 rows (read parquet)                  19             23           4          5.3         190.3       0.9X
+100 wide x 1000 rows (write parquet)                 89             95           4          1.1         894.3       0.2X
+2500 wide x 40 rows (read in-mem)                    13             16           4          7.8         128.9       1.4X
+2500 wide x 40 rows (exec in-mem)                    15             17           3          6.7         149.5       1.2X
+2500 wide x 40 rows (read parquet)                   19             21           4          5.4         185.9       0.9X
+2500 wide x 40 rows (write parquet)                  88             93           7          1.1         877.3       0.2X
 
 
 ================================================================================================
 wide map field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 wide map field r/w:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   12             13           2          8.3         120.7       1.0X
-1 wide x 100000 rows (exec in-mem)                   15             16           2          6.8         147.6       0.8X
-1 wide x 100000 rows (read parquet)                  20             26           4          5.0         201.5       0.6X
-1 wide x 100000 rows (write parquet)                 89             93           4          1.1         894.7       0.1X
-100 wide x 1000 rows (read in-mem)                    7              8           1         13.5          74.1       1.6X
-100 wide x 1000 rows (exec in-mem)                    9             10           2         10.9          91.6       1.3X
-100 wide x 1000 rows (read parquet)                  18             20           3          5.6         177.1       0.7X
-100 wide x 1000 rows (write parquet)                 84             87           4          1.2         843.3       0.1X
-2500 wide x 40 rows (read in-mem)                     9             10           1         11.0          91.3       1.3X
-2500 wide x 40 rows (exec in-mem)                    11             12           2          9.2         108.4       1.1X
-2500 wide x 40 rows (read parquet)                   18             20           3          5.6         180.1       0.7X
-2500 wide x 40 rows (write parquet)                  88             92           4          1.1         881.3       0.1X
+1 wide x 100000 rows (read in-mem)                   14             15           2          7.3         136.4       1.0X
+1 wide x 100000 rows (exec in-mem)                   17             18           2          6.1         165.2       0.8X
+1 wide x 100000 rows (read parquet)                  22             25           5          4.6         217.3       0.6X
+1 wide x 100000 rows (write parquet)                 87             91           6          1.2         866.7       0.2X
+100 wide x 1000 rows (read in-mem)                    8             10           3         12.5          80.3       1.7X
+100 wide x 1000 rows (exec in-mem)                   10             12           2          9.7         103.2       1.3X
+100 wide x 1000 rows (read parquet)                  21             24           4          4.9         205.3       0.7X
+100 wide x 1000 rows (write parquet)                 82             87           6          1.2         821.1       0.2X
+2500 wide x 40 rows (read in-mem)                    10             12           3          9.7         103.1       1.3X
+2500 wide x 40 rows (exec in-mem)                    12             14           3          8.2         121.4       1.1X
+2500 wide x 40 rows (read parquet)                   20             22           4          5.0         199.0       0.7X
+2500 wide x 40 rows (write parquet)                  84             89           7          1.2         842.7       0.2X
 
 
diff --git a/sql/core/benchmarks/WideTableBenchmark-jdk21-results.txt b/sql/core/benchmarks/WideTableBenchmark-jdk21-results.txt
index 04f1737afb586..62aea5f496f92 100644
--- a/sql/core/benchmarks/WideTableBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/WideTableBenchmark-jdk21-results.txt
@@ -2,16 +2,16 @@
 projection on wide table
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-split threshold 10                                 2606           2701          94          0.4        2485.4       1.0X
-split threshold 100                                2174           2193          24          0.5        2073.8       1.2X
-split threshold 1024                               1652           1662           9          0.6        1575.6       1.6X
-split threshold 2048                               1618           1625           6          0.6        1543.3       1.6X
-split threshold 4096                               1713           1734          12          0.6        1633.6       1.5X
-split threshold 8192                               2321           2336          25          0.5        2213.4       1.1X
-split threshold 65536                             20726          20950         265          0.1       19765.7       0.1X
+split threshold 10                                 2606           2655          71          0.4        2485.4       1.0X
+split threshold 100                                2142           2160          14          0.5        2043.1       1.2X
+split threshold 1024                               1632           1711         124          0.6        1556.0       1.6X
+split threshold 2048                               1608           1623          14          0.7        1533.1       1.6X
+split threshold 4096                               1725           1741          21          0.6        1644.7       1.5X
+split threshold 8192                               2456           2464           7          0.4        2342.5       1.1X
+split threshold 65536                             21150          21518         353          0.0       20170.3       0.1X
 
 
diff --git a/sql/core/benchmarks/WideTableBenchmark-results.txt b/sql/core/benchmarks/WideTableBenchmark-results.txt
index 1dda0fdd03fb9..e3f5c9bebeee6 100644
--- a/sql/core/benchmarks/WideTableBenchmark-results.txt
+++ b/sql/core/benchmarks/WideTableBenchmark-results.txt
@@ -2,16 +2,16 @@
 projection on wide table
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-split threshold 10                                 2543           2625          73          0.4        2425.1       1.0X
-split threshold 100                                2035           2074          30          0.5        1940.5       1.2X
-split threshold 1024                               1641           1658          12          0.6        1565.4       1.5X
-split threshold 2048                               1609           1625          12          0.7        1534.9       1.6X
-split threshold 4096                               1668           1681          15          0.6        1590.4       1.5X
-split threshold 8192                               2119           2153          50          0.5        2021.2       1.2X
-split threshold 65536                             21512          21816         366          0.0       20515.1       0.1X
+split threshold 10                                 2549           2628          72          0.4        2431.4       1.0X
+split threshold 100                                2035           2068          32          0.5        1940.7       1.3X
+split threshold 1024                               1674           1703          27          0.6        1596.6       1.5X
+split threshold 2048                               1612           1618           6          0.7        1537.6       1.6X
+split threshold 4096                               1663           1686          17          0.6        1585.9       1.5X
+split threshold 8192                               2151           2162          13          0.5        2051.2       1.2X
+split threshold 65536                             21995          22268         233          0.0       20976.1       0.1X
 
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java
index 7f5b69a09e90c..7fb8be7caf286 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java
@@ -34,6 +34,7 @@
 import org.apache.spark.sql.types.MapType;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.types.VariantType;
+import org.apache.spark.types.variant.VariantSchema;
 
 /**
  * Contains necessary information representing a Parquet column, either of primitive or nested type.
@@ -43,6 +44,14 @@ final class ParquetColumnVector {
   private final List<ParquetColumnVector> children;
   private final WritableColumnVector vector;
 
+  // Describes the file schema of the Parquet variant column. When it is not null, `children`
+  // contains only one child that reads the underlying file content. This `ParquetColumnVector`
+  // should assemble Spark variant values from the file content.
+  private VariantSchema variantSchema;
+  // Only meaningful if `variantSchema` is not null. See `SparkShreddingUtils.getFieldsToExtract`
+  // for its meaning.
+  private FieldToExtract[] fieldsToExtract;
+
   /**
    * Repetition & Definition levels
    * These are allocated only for leaf columns; for non-leaf columns, they simply maintain
@@ -101,7 +110,19 @@ final class ParquetColumnVector {
       }
     }
 
-    if (isPrimitive) {
+    if (column.variantFileType().isDefined()) {
+      ParquetColumn fileContentCol = column.variantFileType().get();
+      WritableColumnVector fileContent = memoryMode == MemoryMode.OFF_HEAP
+          ? new OffHeapColumnVector(capacity, fileContentCol.sparkType())
+          : new OnHeapColumnVector(capacity, fileContentCol.sparkType());
+      ParquetColumnVector contentVector = new ParquetColumnVector(fileContentCol,
+          fileContent, capacity, memoryMode, missingColumns, false, null);
+      children.add(contentVector);
+      variantSchema = SparkShreddingUtils.buildVariantSchema(fileContentCol.sparkType());
+      fieldsToExtract = SparkShreddingUtils.getFieldsToExtract(column.sparkType(), variantSchema);
+      repetitionLevels = contentVector.repetitionLevels;
+      definitionLevels = contentVector.definitionLevels;
+    } else if (isPrimitive) {
       if (column.repetitionLevel() > 0) {
         repetitionLevels = allocateLevelsVector(capacity, memoryMode);
       }
@@ -167,6 +188,17 @@ private static void getLeavesHelper(ParquetColumnVector vector, List<ParquetColu
    * This is a no-op for primitive columns.
    */
   void assemble() {
+    if (variantSchema != null) {
+      children.get(0).assemble();
+      WritableColumnVector fileContent = children.get(0).getValueVector();
+      if (fieldsToExtract == null) {
+        SparkShreddingUtils.assembleVariantBatch(fileContent, vector, variantSchema);
+      } else {
+        SparkShreddingUtils.assembleVariantStructBatch(fileContent, vector, variantSchema,
+            fieldsToExtract);
+      }
+      return;
+    }
     // nothing to do if the column itself is missing
     if (vector.isAllNull()) return;
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
index 8b24973ad3d87..cd2a821698853 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
@@ -77,6 +77,11 @@ public ConstantColumnVector(int numRows, DataType type) {
     }
   }
 
+  public void closeIfFreeable() {
+    // no-op: `ConstantColumnVector`s reuse the data backing its value across multiple batches and
+    // are freed at the end of execution in `close`.
+  }
+
   @Override
   public void close() {
     stringData = null;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
index 0464fe8159898..42d39457330c1 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java
@@ -82,6 +82,8 @@ public InternalRow copy() {
           row.setInt(i, getInt(i));
         } else if (dt instanceof TimestampType) {
           row.setLong(i, getLong(i));
+        } else if (dt instanceof TimestampNTZType) {
+          row.setLong(i, getLong(i));
         } else if (dt instanceof StructType) {
           row.update(i, getStruct(i, ((StructType) dt).fields().length).copy());
         } else if (dt instanceof ArrayType) {
@@ -191,6 +193,8 @@ public Object get(int ordinal, DataType dataType) {
       return getInt(ordinal);
     } else if (dataType instanceof TimestampType) {
       return getLong(ordinal);
+    } else if (dataType instanceof TimestampNTZType) {
+      return getLong(ordinal);
     } else if (dataType instanceof ArrayType) {
       return getArray(ordinal);
     } else if (dataType instanceof StructType structType) {
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
index 696e20525cdac..fc465e73006be 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
@@ -97,7 +97,7 @@ public void close() {
   }
 
   @Override
-  public void closeIfNotWritable() {
+  public void closeIfFreeable() {
     // no-op
   }
 
diff --git a/sql/core/src/main/protobuf/org/apache/spark/sql/execution/streaming/StateMessage.proto b/sql/core/src/main/protobuf/org/apache/spark/sql/execution/streaming/StateMessage.proto
index 4b0477290c8f7..1374bd100a2fe 100644
--- a/sql/core/src/main/protobuf/org/apache/spark/sql/execution/streaming/StateMessage.proto
+++ b/sql/core/src/main/protobuf/org/apache/spark/sql/execution/streaming/StateMessage.proto
@@ -26,6 +26,7 @@ message StateRequest {
     StateVariableRequest stateVariableRequest = 3;
     ImplicitGroupingKeyRequest implicitGroupingKeyRequest = 4;
     TimerRequest timerRequest = 5;
+    UtilsRequest utilsRequest = 6;
   }
 }
 
@@ -41,6 +42,12 @@ message StateResponseWithLongTypeVal {
   int64 value = 3;
 }
 
+message StateResponseWithStringTypeVal {
+  int32 statusCode = 1;
+  string errorMessage = 2;
+  string value = 3;
+}
+
 message StatefulProcessorCall {
   oneof method {
     SetHandleState setHandleState = 1;
@@ -91,6 +98,16 @@ message GetProcessingTime {
 message GetWatermark {
 }
 
+message UtilsRequest {
+  oneof method {
+    ParseStringSchema parseStringSchema = 1;
+  }
+}
+
+message ParseStringSchema {
+  string schema = 1;
+}
+
 message StateCallCommand {
   string stateName = 1;
   string schema = 2;
@@ -219,11 +236,12 @@ message RemoveKey {
 }
 
 enum HandleState {
-  CREATED = 0;
-  INITIALIZED = 1;
-  DATA_PROCESSED = 2;
-  TIMER_PROCESSED = 3;
-  CLOSED = 4;
+  PRE_INIT = 0;
+  CREATED = 1;
+  INITIALIZED = 2;
+  DATA_PROCESSED = 3;
+  TIMER_PROCESSED = 4;
+  CLOSED = 5;
 }
 
 message SetHandleState {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 53e12f58edd69..0d49e850b4637 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.types._
 @Stable
 final class DataFrameNaFunctions private[sql](df: DataFrame)
   extends api.DataFrameNaFunctions {
-  import df.sparkSession.RichColumn
+  import df.sparkSession.toRichColumn
 
   protected def drop(minNonNulls: Option[Int]): Dataset[Row] = {
     drop0(minNonNulls, outputAttributes)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 4766a74308a1f..e41521cba533a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -92,6 +92,23 @@ private[sql] object Dataset {
     dataset
   }
 
+  def apply[T](
+      sparkSession: SparkSession,
+      logicalPlan: LogicalPlan,
+      encoderGenerator: () => Encoder[T]): Dataset[T] = {
+    val dataset = new Dataset(sparkSession, logicalPlan, encoderGenerator)
+    // Eagerly bind the encoder so we verify that the encoder matches the underlying
+    // schema. The user will get an error if this is not the case.
+    // optimization: it is guaranteed that [[InternalRow]] can be converted to [[Row]] so
+    // do not do this check in that case. this check can be expensive since it requires running
+    // the whole [[Analyzer]] to resolve the deserializer
+    if (!dataset.queryExecution.isLazyAnalysis
+        && dataset.encoder.clsTag.runtimeClass != classOf[Row]) {
+      dataset.resolvedEnc
+    }
+    dataset
+  }
+
   def ofRows(sparkSession: SparkSession, logicalPlan: LogicalPlan): DataFrame =
     sparkSession.withActive {
       val qe = sparkSession.sessionState.executePlan(logicalPlan)
@@ -225,7 +242,7 @@ class Dataset[T] private[sql](
     queryExecution.sparkSession
   }
 
-  import sparkSession.RichColumn
+  import sparkSession.toRichColumn
 
   // A globally unique id of this Dataset.
   private[sql] val id = Dataset.curId.getAndIncrement()
@@ -241,8 +258,13 @@ class Dataset[T] private[sql](
     this(queryExecution, () => encoder)
   }
 
+  def this(
+      sparkSession: SparkSession, logicalPlan: LogicalPlan, encoderGenerator: () => Encoder[T]) = {
+    this(sparkSession.sessionState.executePlan(logicalPlan), encoderGenerator)
+  }
+
   def this(sparkSession: SparkSession, logicalPlan: LogicalPlan, encoder: Encoder[T]) = {
-    this(sparkSession.sessionState.executePlan(logicalPlan), encoder)
+    this(sparkSession, logicalPlan, () => encoder)
   }
 
   def this(sqlContext: SQLContext, logicalPlan: LogicalPlan, encoder: Encoder[T]) = {
@@ -280,9 +302,9 @@ class Dataset[T] private[sql](
 
   // The resolved `ExpressionEncoder` which can be used to turn rows to objects of type T, after
   // collecting rows to the driver side.
-  private lazy val resolvedEnc = {
-    exprEnc.resolveAndBind(logicalPlan.output, sparkSession.sessionState.analyzer)
-  }
+  private lazy val resolvedEnc = exprEnc.resolveAndBind(
+    queryExecution.commandExecuted.output, sparkSession.sessionState.analyzer)
+
 
   private implicit def classTag: ClassTag[T] = encoder.clsTag
 
@@ -508,16 +530,8 @@ class Dataset[T] private[sql](
 
   /** @inheritdoc */
   @scala.annotation.varargs
-  def toDF(colNames: String*): DataFrame = {
-    require(schema.size == colNames.size,
-      "The number of columns doesn't match.\n" +
-        s"Old column names (${schema.size}): " + schema.fields.map(_.name).mkString(", ") + "\n" +
-        s"New column names (${colNames.size}): " + colNames.mkString(", "))
-
-    val newCols = logicalPlan.output.zip(colNames).map { case (oldAttribute, newName) =>
-      Column(oldAttribute).as(newName)
-    }
-    select(newCols : _*)
+  def toDF(colNames: String*): DataFrame = withPlan {
+    UnresolvedSubqueryColumnAliases(colNames, logicalPlan)
   }
 
   /** @inheritdoc */
@@ -709,6 +723,38 @@ class Dataset[T] private[sql](
     new Dataset(sparkSession, joinWith, joinEncoder)
   }
 
+  private[sql] def lateralJoin(
+      right: DS[_], joinExprs: Option[Column], joinType: JoinType): DataFrame = {
+    withPlan {
+      LateralJoin(
+        logicalPlan,
+        LateralSubquery(right.logicalPlan),
+        joinType,
+        joinExprs.map(_.expr)
+      )
+    }
+  }
+
+  /** @inheritdoc */
+  def lateralJoin(right: DS[_]): DataFrame = {
+    lateralJoin(right, None, Inner)
+  }
+
+  /** @inheritdoc */
+  def lateralJoin(right: DS[_], joinExprs: Column): DataFrame = {
+    lateralJoin(right, Some(joinExprs), Inner)
+  }
+
+  /** @inheritdoc */
+  def lateralJoin(right: DS[_], joinType: String): DataFrame = {
+    lateralJoin(right, None, LateralJoinType(joinType))
+  }
+
+  /** @inheritdoc */
+  def lateralJoin(right: DS[_], joinExprs: Column, joinType: String): DataFrame = {
+    lateralJoin(right, Some(joinExprs), LateralJoinType(joinType))
+  }
+
   // TODO(SPARK-22947): Fix the DataFrame API.
   private[sql] def joinAsOf(
       other: Dataset[_],
@@ -822,7 +868,7 @@ class Dataset[T] private[sql](
   }
 
   /** @inheritdoc */
-  def as(alias: String): Dataset[T] = withTypedPlan {
+  def as(alias: String): Dataset[T] = withSameTypedPlan {
     SubqueryAlias(alias, logicalPlan)
   }
 
@@ -877,7 +923,7 @@ class Dataset[T] private[sql](
   }
 
   /** @inheritdoc */
-  def filter(condition: Column): Dataset[T] = withTypedPlan {
+  def filter(condition: Column): Dataset[T] = withSameTypedPlan {
     Filter(condition.expr, logicalPlan)
   }
 
@@ -981,7 +1027,7 @@ class Dataset[T] private[sql](
   /** @inheritdoc */
   def transpose(indexColumn: Column): DataFrame = withPlan {
     UnresolvedTranspose(
-      Seq(indexColumn.named),
+      Seq(indexColumn.expr),
       logicalPlan
     )
   }
@@ -994,23 +1040,42 @@ class Dataset[T] private[sql](
     )
   }
 
+  /**
+   * Converts the DataFrame into a `TableArg` object, which can be used as a table argument
+   * in a user-defined table function (UDTF).
+   *
+   * After obtaining a `TableArg` from a DataFrame using this method, you can specify
+   * partitioning and ordering for the table argument by calling methods such as `partitionBy`,
+   * `orderBy`, and `withSinglePartition` on the `TableArg` instance.
+   *   - partitionBy(*cols): Partitions the data based on the specified columns.
+   *     This method cannot be called after withSinglePartition() has been called.
+   *   - orderBy(*cols): Orders the data within partitions based on the specified columns.
+   *   - withSinglePartition(): Indicates that the data should be treated as a single partition.
+   *     This method cannot be called after partitionBy() has been called.
+   *
+   * @group untypedrel
+   * @since 4.0.0
+   */
+  def asTable(): TableArg = {
+    new TableArg(
+      FunctionTableSubqueryArgumentExpression(plan = logicalPlan),
+      sparkSession
+    )
+  }
+
   /** @inheritdoc */
   def scalar(): Column = {
-    Column(ExpressionColumnNode(
-      ScalarSubqueryExpr(SubExprUtils.removeLazyOuterReferences(logicalPlan),
-        hasExplicitOuterRefs = true)))
+    Column(ExpressionColumnNode(ScalarSubqueryExpr(logicalPlan)))
   }
 
   /** @inheritdoc */
   def exists(): Column = {
-    Column(ExpressionColumnNode(
-      Exists(SubExprUtils.removeLazyOuterReferences(logicalPlan),
-        hasExplicitOuterRefs = true)))
+    Column(ExpressionColumnNode(Exists(logicalPlan)))
   }
 
   /** @inheritdoc */
   @scala.annotation.varargs
-  def observe(name: String, expr: Column, exprs: Column*): Dataset[T] = withTypedPlan {
+  def observe(name: String, expr: Column, exprs: Column*): Dataset[T] = withSameTypedPlan {
     CollectMetrics(name, (expr +: exprs).map(_.named), logicalPlan, id)
   }
 
@@ -1022,12 +1087,12 @@ class Dataset[T] private[sql](
   }
 
   /** @inheritdoc */
-  def limit(n: Int): Dataset[T] = withTypedPlan {
+  def limit(n: Int): Dataset[T] = withSameTypedPlan {
     Limit(Literal(n), logicalPlan)
   }
 
   /** @inheritdoc */
-  def offset(n: Int): Dataset[T] = withTypedPlan {
+  def offset(n: Int): Dataset[T] = withSameTypedPlan {
     Offset(Literal(n), logicalPlan)
   }
 
@@ -1114,7 +1179,7 @@ class Dataset[T] private[sql](
 
   /** @inheritdoc */
   def sample(withReplacement: Boolean, fraction: Double, seed: Long): Dataset[T] = {
-    withTypedPlan {
+    withSameTypedPlan {
       Sample(0.0, fraction, withReplacement, seed, logicalPlan)
     }
   }
@@ -1210,29 +1275,14 @@ class Dataset[T] private[sql](
     require(colNames.size == cols.size,
       s"The size of column names: ${colNames.size} isn't equal to " +
         s"the size of columns: ${cols.size}")
-    SchemaUtils.checkColumnNameDuplication(
-      colNames,
-      sparkSession.sessionState.conf.caseSensitiveAnalysis)
-
-    val resolver = sparkSession.sessionState.analyzer.resolver
-    val output = queryExecution.analyzed.output
-
-    val columnSeq = colNames.zip(cols)
-
-    val replacedAndExistingColumns = output.map { field =>
-      columnSeq.find { case (colName, _) =>
-        resolver(field.name, colName)
-      } match {
-        case Some((colName: String, col: Column)) => col.as(colName)
-        case _ => Column(field)
-      }
+    withPlan {
+      Project(
+        Seq(
+          UnresolvedStarWithColumns(
+            colNames = colNames,
+            exprs = cols.map(_.expr))),
+        logicalPlan)
     }
-
-    val newColumns = columnSeq.filter { case (colName, col) =>
-      !output.exists(f => resolver(f.name, colName))
-    }.map { case (colName, col) => col.as(colName) }
-
-    select(replacedAndExistingColumns ++ newColumns : _*)
   }
 
   /** @inheritdoc */
@@ -1259,26 +1309,13 @@ class Dataset[T] private[sql](
     require(colNames.size == newColNames.size,
       s"The size of existing column names: ${colNames.size} isn't equal to " +
         s"the size of new column names: ${newColNames.size}")
-
-    val resolver = sparkSession.sessionState.analyzer.resolver
-    val output: Seq[NamedExpression] = queryExecution.analyzed.output
-    var shouldRename = false
-
-    val projectList = colNames.zip(newColNames).foldLeft(output) {
-      case (attrs, (existingName, newName)) =>
-        attrs.map(attr =>
-          if (resolver(attr.name, existingName)) {
-            shouldRename = true
-            Alias(attr, newName)()
-          } else {
-            attr
-          }
-        )
-    }
-    if (shouldRename) {
-      withPlan(Project(projectList, logicalPlan))
-    } else {
-      toDF()
+    withPlan {
+      Project(
+        Seq(
+          UnresolvedStarWithColumnsRenames(
+            existingNames = colNames,
+            newNames = newColNames)),
+        logicalPlan)
     }
   }
 
@@ -1312,7 +1349,7 @@ class Dataset[T] private[sql](
   def dropDuplicates(): Dataset[T] = dropDuplicates(this.columns)
 
   /** @inheritdoc */
-  def dropDuplicates(colNames: Seq[String]): Dataset[T] = withTypedPlan {
+  def dropDuplicates(colNames: Seq[String]): Dataset[T] = withSameTypedPlan {
     val groupCols = groupColsFromDropDuplicates(colNames)
     Deduplicate(groupCols, logicalPlan)
   }
@@ -1323,7 +1360,7 @@ class Dataset[T] private[sql](
   }
 
   /** @inheritdoc */
-  def dropDuplicatesWithinWatermark(colNames: Seq[String]): Dataset[T] = withTypedPlan {
+  def dropDuplicatesWithinWatermark(colNames: Seq[String]): Dataset[T] = withSameTypedPlan {
     val groupCols = groupColsFromDropDuplicates(colNames)
     // UnsupportedOperationChecker will fail the query if this is called with batch Dataset.
     DeduplicateWithinWatermark(groupCols, logicalPlan)
@@ -1483,7 +1520,7 @@ class Dataset[T] private[sql](
   }
 
   /** @inheritdoc */
-  def repartition(numPartitions: Int): Dataset[T] = withTypedPlan {
+  def repartition(numPartitions: Int): Dataset[T] = withSameTypedPlan {
     Repartition(numPartitions, shuffle = true, logicalPlan)
   }
 
@@ -1498,7 +1535,7 @@ class Dataset[T] private[sql](
       s"""Invalid partitionExprs specified: $sortOrders
          |For range partitioning use repartitionByRange(...) instead.
        """.stripMargin)
-    withTypedPlan {
+    withSameTypedPlan {
       RepartitionByExpression(partitionExprs.map(_.expr), logicalPlan, numPartitions)
     }
   }
@@ -1511,13 +1548,13 @@ class Dataset[T] private[sql](
       case expr: SortOrder => expr
       case expr: Expression => SortOrder(expr, Ascending)
     })
-    withTypedPlan {
+    withSameTypedPlan {
       RepartitionByExpression(sortOrder, logicalPlan, numPartitions)
     }
   }
 
   /** @inheritdoc */
-  def coalesce(numPartitions: Int): Dataset[T] = withTypedPlan {
+  def coalesce(numPartitions: Int): Dataset[T] = withSameTypedPlan {
     Repartition(numPartitions, shuffle = false, logicalPlan)
   }
 
@@ -1590,6 +1627,7 @@ class Dataset[T] private[sql](
         name = TableIdentifier(identifier.last),
         userSpecifiedColumns = Nil,
         comment = None,
+        collation = None,
         properties = Map.empty,
         originalText = None,
         plan = logicalPlan,
@@ -2211,7 +2249,7 @@ class Dataset[T] private[sql](
           SortOrder(expr, Ascending)
       }
     }
-    withTypedPlan {
+    withSameTypedPlan {
       Sort(sortOrder, global = global, logicalPlan)
     }
   }
@@ -2226,6 +2264,11 @@ class Dataset[T] private[sql](
     Dataset(sparkSession, logicalPlan)
   }
 
+  /** A convenient function to wrap a logical plan and produce a Dataset. */
+  @inline private def withSameTypedPlan(logicalPlan: LogicalPlan): Dataset[T] = {
+    Dataset(sparkSession, logicalPlan, encoderGenerator)
+  }
+
   /** A convenient function to wrap a set based logical plan and produce a Dataset. */
   @inline private def withSetOperator[U : Encoder](logicalPlan: LogicalPlan): Dataset[U] = {
     if (isUnTyped) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index 392c3edab9895..6dcf01d3a9db2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql
 
 import org.apache.spark.api.java.function._
-import org.apache.spark.sql.catalyst.analysis.{EliminateEventTimeWatermark, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{agnosticEncoderFor, ProductEncoder}
 import org.apache.spark.sql.catalyst.encoders.encoderFor
 import org.apache.spark.sql.catalyst.expressions.Attribute
@@ -289,11 +289,11 @@ class KeyValueGroupedDataset[K, V] private[sql](
       transformWithState
     )
 
-    Dataset[U](sparkSession, EliminateEventTimeWatermark(
+    Dataset[U](sparkSession,
       UpdateEventTimeWatermarkColumn(
         UnresolvedAttribute(eventTimeColumnName),
         None,
-        transformWithStateDataset.logicalPlan)))
+        transformWithStateDataset.logicalPlan))
   }
 
   /** @inheritdoc */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index 0974df55a6d84..b8c4b03fc13d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkRuntimeException
 import org.apache.spark.annotation.Stable
 import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.sql.catalyst.analysis.UnresolvedAlias
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.internal.ExpressionUtils.{column, generateAlias}
+import org.apache.spark.sql.internal.ExpressionUtils.generateAlias
 import org.apache.spark.sql.internal.TypedAggUtils.withInputType
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{NumericType, StructType}
@@ -114,7 +114,7 @@ class RelationalGroupedDataset protected[sql](
         namedExpr
       }
     }
-    columnExprs.map(column)
+    columnExprs.map(Column(_))
   }
 
   /** @inheritdoc */
@@ -238,7 +238,7 @@ class RelationalGroupedDataset protected[sql](
       broadcastVars: Array[Broadcast[Object]],
       outputSchema: StructType): DataFrame = {
       val groupingNamedExpressions = groupingExprs.map(alias)
-      val groupingCols = groupingNamedExpressions.map(column)
+      val groupingCols = groupingNamedExpressions.map(Column(_))
       val groupingDataFrame = df.select(groupingCols : _*)
       val groupingAttributes = groupingNamedExpressions.map(_.toAttribute)
       Dataset.ofRows(
@@ -475,7 +475,8 @@ class RelationalGroupedDataset protected[sql](
       outputStructType: StructType,
       outputModeStr: String,
       timeModeStr: String,
-      initialState: RelationalGroupedDataset): DataFrame = {
+      initialState: RelationalGroupedDataset,
+      eventTimeColumnName: String): DataFrame = {
     def exprToAttr(expr: Seq[Expression]): Seq[Attribute] = {
       expr.map {
         case ne: NamedExpression => ne
@@ -529,7 +530,30 @@ class RelationalGroupedDataset protected[sql](
         initialStateSchema = initialState.df.schema
       )
     }
-    Dataset.ofRows(df.sparkSession, plan)
+    if (eventTimeColumnName.isEmpty) {
+      Dataset.ofRows(df.sparkSession, plan)
+    } else {
+      updateEventTimeColumnAfterTransformWithState(plan, eventTimeColumnName)
+    }
+  }
+
+  /**
+   * Creates a new dataset with updated eventTimeColumn after the transformWithState
+   * logical node.
+   */
+  private def updateEventTimeColumnAfterTransformWithState(
+      transformWithStateInPandas: LogicalPlan,
+      eventTimeColumnName: String): DataFrame = {
+    val transformWithStateDataset = Dataset.ofRows(
+      df.sparkSession,
+      transformWithStateInPandas
+    )
+
+    Dataset.ofRows(df.sparkSession,
+      UpdateEventTimeWatermarkColumn(
+        UnresolvedAttribute(eventTimeColumnName),
+        None,
+        transformWithStateDataset.logicalPlan))
   }
 
   override def toString: String = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 636899a7acb06..1318563f8c93b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -17,21 +17,18 @@
 
 package org.apache.spark.sql
 
-import java.util.Properties
+import java.util.{List => JList, Map => JMap, Properties}
 
-import scala.collection.immutable
 import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.annotation.{DeveloperApi, Experimental, Stable, Unstable}
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
-import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.ConfigEntry
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst._
-import org.apache.spark.sql.catalyst.analysis.{CurrentNamespace, UnresolvedNamespace}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.ShowTables
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.internal.{SessionState, SharedState, SQLConf}
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.streaming.{DataStreamReader, StreamingQueryManager}
@@ -41,8 +38,8 @@ import org.apache.spark.sql.util.ExecutionListenerManager
 /**
  * The entry point for working with structured data (rows and columns) in Spark 1.x.
  *
- * As of Spark 2.0, this is replaced by [[SparkSession]]. However, we are keeping the class
- * here for backward compatibility.
+ * As of Spark 2.0, this is replaced by [[SparkSession]]. However, we are keeping the class here
+ * for backward compatibility.
  *
  * @groupname basic Basic Operations
  * @groupname ddl_ops Persistent Catalog DDL
@@ -56,8 +53,8 @@ import org.apache.spark.sql.util.ExecutionListenerManager
  * @since 1.0.0
  */
 @Stable
-class SQLContext private[sql](val sparkSession: SparkSession)
-  extends Logging with Serializable {
+class SQLContext private[sql] (override val sparkSession: SparkSession)
+    extends api.SQLContext(sparkSession) {
 
   self =>
 
@@ -77,980 +74,325 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   // TODO: move this logic into SparkSession
 
   private[sql] def sessionState: SessionState = sparkSession.sessionState
+
   private[sql] def sharedState: SharedState = sparkSession.sharedState
+
   @deprecated("Use SparkSession.sessionState.conf instead", "4.0.0")
   private[sql] def conf: SQLConf = sessionState.conf
 
-  def sparkContext: SparkContext = sparkSession.sparkContext
-
-  /**
-   * Returns a [[SQLContext]] as new session, with separated SQL configurations, temporary
-   * tables, registered functions, but sharing the same `SparkContext`, cached data and
-   * other things.
-   *
-   * @since 1.6.0
-   */
-  def newSession(): SQLContext = sparkSession.newSession().sqlContext
-
-  /**
-   * An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s
-   * that listen for execution metrics.
-   */
+  /** @inheritdoc */
   def listenerManager: ExecutionListenerManager = sparkSession.listenerManager
 
-  /**
-   * Set Spark SQL configuration properties.
-   *
-   * @group config
-   * @since 1.0.0
-   */
+  /** @inheritdoc */
   def setConf(props: Properties): Unit = {
     sessionState.conf.setConf(props)
   }
 
-  /**
-   * Set the given Spark SQL configuration property.
-   */
   private[sql] def setConf[T](entry: ConfigEntry[T], value: T): Unit = {
     sessionState.conf.setConf(entry, value)
   }
 
-  /**
-   * Set the given Spark SQL configuration property.
-   *
-   * @group config
-   * @since 1.0.0
-   */
-  def setConf(key: String, value: String): Unit = {
-    sparkSession.conf.set(key, value)
-  }
-
-  /**
-   * Return the value of Spark SQL configuration property for the given key.
-   *
-   * @group config
-   * @since 1.0.0
-   */
-  def getConf(key: String): String = {
-    sparkSession.conf.get(key)
-  }
-
-  /**
-   * Return the value of Spark SQL configuration property for the given key. If the key is not set
-   * yet, return `defaultValue`.
-   *
-   * @group config
-   * @since 1.0.0
-   */
-  def getConf(key: String, defaultValue: String): String = {
-    sparkSession.conf.get(key, defaultValue)
-  }
-
-  /**
-   * Return all the configuration properties that have been set (i.e. not the default).
-   * This creates a new copy of the config properties in the form of a Map.
-   *
-   * @group config
-   * @since 1.0.0
-   */
-  def getAllConfs: immutable.Map[String, String] = {
-    sparkSession.conf.getAll
-  }
-
-  /**
-   * :: Experimental ::
-   * A collection of methods that are considered experimental, but can be used to hook into
-   * the query planner for advanced functionality.
-   *
-   * @group basic
-   * @since 1.3.0
-   */
+  /** @inheritdoc */
   @Experimental
   @transient
   @Unstable
   def experimental: ExperimentalMethods = sparkSession.experimental
 
-  /**
-   * Returns a `DataFrame` with no rows or columns.
-   *
-   * @group basic
-   * @since 1.3.0
-   */
-  def emptyDataFrame: DataFrame = sparkSession.emptyDataFrame
-
-  /**
-   * A collection of methods for registering user-defined functions (UDF).
-   *
-   * The following example registers a Scala closure as UDF:
-   * {{{
-   *   sqlContext.udf.register("myUDF", (arg1: Int, arg2: String) => arg2 + arg1)
-   * }}}
-   *
-   * The following example registers a UDF in Java:
-   * {{{
-   *   sqlContext.udf().register("myUDF",
-   *       (Integer arg1, String arg2) -> arg2 + arg1,
-   *       DataTypes.StringType);
-   * }}}
-   *
-   * @note The user-defined functions must be deterministic. Due to optimization,
-   * duplicate invocations may be eliminated or the function may even be invoked more times than
-   * it is present in the query.
-   *
-   * @group basic
-   * @since 1.3.0
-   */
+  /** @inheritdoc */
   def udf: UDFRegistration = sparkSession.udf
 
-  /**
-   * Returns true if the table is currently cached in-memory.
-   * @group cachemgmt
-   * @since 1.3.0
-   */
-  def isCached(tableName: String): Boolean = {
-    sparkSession.catalog.isCached(tableName)
-  }
-
-  /**
-   * Caches the specified table in-memory.
-   * @group cachemgmt
-   * @since 1.3.0
-   */
-  def cacheTable(tableName: String): Unit = {
-    sparkSession.catalog.cacheTable(tableName)
-  }
-
-  /**
-   * Removes the specified table from the in-memory cache.
-   * @group cachemgmt
-   * @since 1.3.0
-   */
-  def uncacheTable(tableName: String): Unit = {
-    sparkSession.catalog.uncacheTable(tableName)
-  }
-
-  /**
-   * Removes all cached tables from the in-memory cache.
-   * @since 1.3.0
-   */
-  def clearCache(): Unit = {
-    sparkSession.catalog.clearCache()
-  }
-
   // scalastyle:off
   // Disable style checker so "implicits" object can start with lowercase i
-  /**
-   * (Scala-specific) Implicit methods available in Scala for converting
-   * common Scala objects into `DataFrame`s.
-   *
-   * {{{
-   *   val sqlContext = new SQLContext(sc)
-   *   import sqlContext.implicits._
-   * }}}
-   *
-   * @group basic
-   * @since 1.3.0
-   */
+
+  /** @inheritdoc */
   object implicits extends SQLImplicits {
+    /** @inheritdoc */
     override protected def session: SparkSession = sparkSession
   }
+
   // scalastyle:on
 
   /**
-   * Creates a DataFrame from an RDD of Product (e.g. case classes, tuples).
-   *
-   * @group dataframes
-   * @since 1.3.0
+   * Creates a DataFrame from an RDD[Row]. User can specify whether the input rows should be
+   * converted to Catalyst rows.
    */
-  def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = {
-    sparkSession.createDataFrame(rdd)
+  private[sql] def internalCreateDataFrame(
+      catalystRows: RDD[InternalRow],
+      schema: StructType,
+      isStreaming: Boolean = false): DataFrame = {
+    sparkSession.internalCreateDataFrame(catalystRows, schema, isStreaming)
   }
 
-  /**
-   * Creates a DataFrame from a local Seq of Product.
-   *
-   * @group dataframes
-   * @since 1.3.0
-   */
-  def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = {
-    sparkSession.createDataFrame(data)
-  }
+  /** @inheritdoc */
+  def read: DataFrameReader = sparkSession.read
 
-  /**
-   * Convert a `BaseRelation` created for external data sources into a `DataFrame`.
-   *
-   * @group dataframes
-   * @since 1.3.0
-   */
-  def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = {
-    sparkSession.baseRelationToDataFrame(baseRelation)
-  }
+  /** @inheritdoc */
+  def readStream: DataStreamReader = sparkSession.readStream
 
   /**
-   * :: DeveloperApi ::
-   * Creates a `DataFrame` from an `RDD` containing [[Row]]s using the given schema.
-   * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
-   * the provided schema. Otherwise, there will be runtime exception.
-   * Example:
-   * {{{
-   *  import org.apache.spark.sql._
-   *  import org.apache.spark.sql.types._
-   *  val sqlContext = new org.apache.spark.sql.SQLContext(sc)
-   *
-   *  val schema =
-   *    StructType(
-   *      StructField("name", StringType, false) ::
-   *      StructField("age", IntegerType, true) :: Nil)
-   *
-   *  val people =
-   *    sc.textFile("examples/src/main/resources/people.txt").map(
-   *      _.split(",")).map(p => Row(p(0), p(1).trim.toInt))
-   *  val dataFrame = sqlContext.createDataFrame(people, schema)
-   *  dataFrame.printSchema
-   *  // root
-   *  // |-- name: string (nullable = false)
-   *  // |-- age: integer (nullable = true)
-   *
-   *  dataFrame.createOrReplaceTempView("people")
-   *  sqlContext.sql("select name from people").collect.foreach(println)
-   * }}}
-   *
-   * @group dataframes
-   * @since 1.3.0
+   * Registers the given `DataFrame` as a temporary table in the catalog. Temporary tables exist
+   * only during the lifetime of this instance of SQLContext.
    */
-  @DeveloperApi
-  def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = {
-    sparkSession.createDataFrame(rowRDD, schema)
+  private[sql] def registerDataFrameAsTable(df: DataFrame, tableName: String): Unit = {
+    df.createOrReplaceTempView(tableName)
   }
 
   /**
-   * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an
-   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
-   * that is generally created automatically through implicits from a `SparkSession`, or can be
-   * created explicitly by calling static methods on [[Encoders]].
-   *
-   * == Example ==
-   *
-   * {{{
-   *
-   *   import spark.implicits._
-   *   case class Person(name: String, age: Long)
-   *   val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19))
-   *   val ds = spark.createDataset(data)
-   *
-   *   ds.show()
-   *   // +-------+---+
-   *   // |   name|age|
-   *   // +-------+---+
-   *   // |Michael| 29|
-   *   // |   Andy| 30|
-   *   // | Justin| 19|
-   *   // +-------+---+
-   * }}}
+   * Returns a `StreamingQueryManager` that allows managing all the
+   * [[org.apache.spark.sql.streaming.StreamingQuery StreamingQueries]] active on `this` context.
    *
    * @since 2.0.0
-   * @group dataset
    */
-  def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = {
-    sparkSession.createDataset(data)
-  }
+  def streams: StreamingQueryManager = sparkSession.streams
 
-  /**
-   * Creates a [[Dataset]] from an RDD of a given type. This method requires an
-   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
-   * that is generally created automatically through implicits from a `SparkSession`, or can be
-   * created explicitly by calling static methods on [[Encoders]].
-   *
-   * @since 2.0.0
-   * @group dataset
-   */
-  def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = {
-    sparkSession.createDataset(data)
-  }
+  /** @inheritdoc */
+  override def sparkContext: SparkContext = super.sparkContext
 
-  /**
-   * Creates a [[Dataset]] from a `java.util.List` of a given type. This method requires an
-   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
-   * that is generally created automatically through implicits from a `SparkSession`, or can be
-   * created explicitly by calling static methods on [[Encoders]].
-   *
-   * == Java Example ==
-   *
-   * {{{
-   *     List<String> data = Arrays.asList("hello", "world");
-   *     Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
-   * }}}
-   *
-   * @since 2.0.0
-   * @group dataset
-   */
-  def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = {
-    sparkSession.createDataset(data)
-  }
+  /** @inheritdoc */
+  override def newSession(): SQLContext = sparkSession.newSession().sqlContext
 
-  /**
-   * Creates a DataFrame from an RDD[Row]. User can specify whether the input rows should be
-   * converted to Catalyst rows.
-   */
-  private[sql]
-  def internalCreateDataFrame(
-      catalystRows: RDD[InternalRow],
-      schema: StructType,
-      isStreaming: Boolean = false) = {
-    sparkSession.internalCreateDataFrame(catalystRows, schema, isStreaming)
-  }
+  /** @inheritdoc */
+  override def emptyDataFrame: Dataset[Row] = super.emptyDataFrame
 
-  /**
-   * :: DeveloperApi ::
-   * Creates a `DataFrame` from a `JavaRDD` containing [[Row]]s using the given schema.
-   * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
-   * the provided schema. Otherwise, there will be runtime exception.
-   *
-   * @group dataframes
-   * @since 1.3.0
-   */
-  @DeveloperApi
-  def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
-    sparkSession.createDataFrame(rowRDD, schema)
-  }
+  /** @inheritdoc */
+  override def createDataFrame[A <: Product: TypeTag](rdd: RDD[A]): Dataset[Row] =
+    super.createDataFrame(rdd)
 
-  /**
-   * :: DeveloperApi ::
-   * Creates a `DataFrame` from a `java.util.List` containing [[Row]]s using the given schema.
-   * It is important to make sure that the structure of every [[Row]] of the provided List matches
-   * the provided schema. Otherwise, there will be runtime exception.
-   *
-   * @group dataframes
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
+  override def createDataFrame[A <: Product: TypeTag](data: Seq[A]): Dataset[Row] =
+    super.createDataFrame(data)
+
+  /** @inheritdoc */
+  override def baseRelationToDataFrame(baseRelation: BaseRelation): Dataset[Row] =
+    super.baseRelationToDataFrame(baseRelation)
+
+  /** @inheritdoc */
   @DeveloperApi
-  def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = {
-    sparkSession.createDataFrame(rows, schema)
-  }
+  override def createDataFrame(rowRDD: RDD[Row], schema: StructType): Dataset[Row] =
+    super.createDataFrame(rowRDD, schema)
 
-  /**
-   * Applies a schema to an RDD of Java Beans.
-   *
-   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
-   *          SELECT * queries will return the columns in an undefined order.
-   * @group dataframes
-   * @since 1.3.0
-   */
-  def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
-    sparkSession.createDataFrame(rdd, beanClass)
-  }
+  /** @inheritdoc */
+  override def createDataset[T: Encoder](data: Seq[T]): Dataset[T] = super.createDataset(data)
 
-  /**
-   * Applies a schema to an RDD of Java Beans.
-   *
-   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
-   *          SELECT * queries will return the columns in an undefined order.
-   * @group dataframes
-   * @since 1.3.0
-   */
-  def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
-    sparkSession.createDataFrame(rdd, beanClass)
-  }
+  /** @inheritdoc */
+  override def createDataset[T: Encoder](data: RDD[T]): Dataset[T] = super.createDataset(data)
 
-  /**
-   * Applies a schema to a List of Java Beans.
-   *
-   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
-   *          SELECT * queries will return the columns in an undefined order.
-   * @group dataframes
-   * @since 1.6.0
-   */
-  def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame = {
-    sparkSession.createDataFrame(data, beanClass)
-  }
+  /** @inheritdoc */
+  override def createDataset[T: Encoder](data: JList[T]): Dataset[T] =
+    super.createDataset(data)
 
-  /**
-   * Returns a [[DataFrameReader]] that can be used to read non-streaming data in as a
-   * `DataFrame`.
-   * {{{
-   *   sqlContext.read.parquet("/path/to/file.parquet")
-   *   sqlContext.read.schema(schema).json("/path/to/file.json")
-   * }}}
-   *
-   * @group genericdata
-   * @since 1.4.0
-   */
-  def read: DataFrameReader = sparkSession.read
+  /** @inheritdoc */
+  @DeveloperApi
+  override def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): Dataset[Row] =
+    super.createDataFrame(rowRDD, schema)
 
+  /** @inheritdoc */
+  @DeveloperApi
+  override def createDataFrame(rows: JList[Row], schema: StructType): Dataset[Row] =
+    super.createDataFrame(rows, schema)
 
-  /**
-   * Returns a `DataStreamReader` that can be used to read streaming data in as a `DataFrame`.
-   * {{{
-   *   sparkSession.readStream.parquet("/path/to/directory/of/parquet/files")
-   *   sparkSession.readStream.schema(schema).json("/path/to/directory/of/json/files")
-   * }}}
-   *
-   * @since 2.0.0
-   */
-  def readStream: DataStreamReader = sparkSession.readStream
+  /** @inheritdoc */
+  override def createDataFrame(rdd: RDD[_], beanClass: Class[_]): Dataset[Row] =
+    super.createDataFrame(rdd, beanClass)
 
+  /** @inheritdoc */
+  override def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): Dataset[Row] =
+    super.createDataFrame(rdd, beanClass)
 
-  /**
-   * Creates an external table from the given path and returns the corresponding DataFrame.
-   * It will use the default data source configured by spark.sql.sources.default.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String): DataFrame = {
-    sparkSession.catalog.createTable(tableName, path)
-  }
+  /** @inheritdoc */
+  override def createDataFrame(data: JList[_], beanClass: Class[_]): Dataset[Row] =
+    super.createDataFrame(data, beanClass)
 
-  /**
-   * Creates an external table from the given path based on a data source
-   * and returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
+  /** @inheritdoc */
+  override def createExternalTable(tableName: String, path: String): Dataset[Row] =
+    super.createExternalTable(tableName, path)
+
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       path: String,
-      source: String): DataFrame = {
-    sparkSession.catalog.createTable(tableName, path, source)
+      source: String): Dataset[Row] = {
+    super.createExternalTable(tableName, path, source)
   }
 
-  /**
-   * Creates an external table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       source: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, options)
+      options: JMap[String, String]): Dataset[Row] = {
+    super.createExternalTable(tableName, source, options)
   }
 
-  /**
-   * (Scala-specific)
-   * Creates an external table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       source: String,
-      options: Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, options)
+      options: Map[String, String]): Dataset[Row] = {
+    super.createExternalTable(tableName, source, options)
   }
 
-  /**
-   * Create an external table from the given path based on a data source, a schema and
-   * a set of options. Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       source: String,
       schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, schema, options)
+      options: JMap[String, String]): Dataset[Row] = {
+    super.createExternalTable(tableName, source, schema, options)
   }
 
-  /**
-   * (Scala-specific)
-   * Create an external table from the given path based on a data source, a schema and
-   * a set of options. Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       source: String,
       schema: StructType,
-      options: Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, schema, options)
-  }
-
-  /**
-   * Registers the given `DataFrame` as a temporary table in the catalog. Temporary tables exist
-   * only during the lifetime of this instance of SQLContext.
-   */
-  private[sql] def registerDataFrameAsTable(df: DataFrame, tableName: String): Unit = {
-    df.createOrReplaceTempView(tableName)
+      options: Map[String, String]): Dataset[Row] = {
+    super.createExternalTable(tableName, source, schema, options)
   }
 
-  /**
-   * Drops the temporary table with the given table name in the catalog. If the table has been
-   * cached/persisted before, it's also unpersisted.
-   *
-   * @param tableName the name of the table to be unregistered.
-   * @group basic
-   * @since 1.3.0
-   */
-  def dropTempTable(tableName: String): Unit = {
-    sparkSession.catalog.dropTempView(tableName)
-  }
+  /** @inheritdoc */
+  override def range(end: Long): Dataset[Row] = super.range(end)
 
-  /**
-   * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements
-   * in a range from 0 to `end` (exclusive) with step value 1.
-   *
-   * @since 1.4.1
-   * @group dataframe
-   */
-  def range(end: Long): DataFrame = sparkSession.range(end).toDF()
-
-  /**
-   * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements
-   * in a range from `start` to `end` (exclusive) with step value 1.
-   *
-   * @since 1.4.0
-   * @group dataframe
-   */
-  def range(start: Long, end: Long): DataFrame = sparkSession.range(start, end).toDF()
-
-  /**
-   * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements
-   * in a range from `start` to `end` (exclusive) with a step value.
-   *
-   * @since 2.0.0
-   * @group dataframe
-   */
-  def range(start: Long, end: Long, step: Long): DataFrame = {
-    sparkSession.range(start, end, step).toDF()
-  }
-
-  /**
-   * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements
-   * in an range from `start` to `end` (exclusive) with an step value, with partition number
-   * specified.
-   *
-   * @since 1.4.0
-   * @group dataframe
-   */
-  def range(start: Long, end: Long, step: Long, numPartitions: Int): DataFrame = {
-    sparkSession.range(start, end, step, numPartitions).toDF()
-  }
-
-  /**
-   * Executes a SQL query using Spark, returning the result as a `DataFrame`.
-   * This API eagerly runs DDL/DML commands, but not for SELECT queries.
-   *
-   * @group basic
-   * @since 1.3.0
-   */
-  def sql(sqlText: String): DataFrame = sparkSession.sql(sqlText)
-
-  /**
-   * Returns the specified table as a `DataFrame`.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  def table(tableName: String): DataFrame = {
-    sparkSession.table(tableName)
-  }
+  /** @inheritdoc */
+  override def range(start: Long, end: Long): Dataset[Row] = super.range(start, end)
 
-  /**
-   * Returns a `DataFrame` containing names of existing tables in the current database.
-   * The returned DataFrame has three columns, database, tableName and isTemporary (a Boolean
-   * indicating if a table is a temporary one or not).
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  def tables(): DataFrame = {
-    Dataset.ofRows(sparkSession, ShowTables(CurrentNamespace, None))
-  }
+  /** @inheritdoc */
+  override def range(start: Long, end: Long, step: Long): Dataset[Row] =
+    super.range(start, end, step)
 
-  /**
-   * Returns a `DataFrame` containing names of existing tables in the given database.
-   * The returned DataFrame has three columns, database, tableName and isTemporary (a Boolean
-   * indicating if a table is a temporary one or not).
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  def tables(databaseName: String): DataFrame = {
-    Dataset.ofRows(sparkSession, ShowTables(UnresolvedNamespace(Seq(databaseName)), None))
-  }
+  /** @inheritdoc */
+  override def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[Row] =
+    super.range(start, end, step, numPartitions)
 
-  /**
-   * Returns a `StreamingQueryManager` that allows managing all the
-   * [[org.apache.spark.sql.streaming.StreamingQuery StreamingQueries]] active on `this` context.
-   *
-   * @since 2.0.0
-   */
-  def streams: StreamingQueryManager = sparkSession.streams
+  /** @inheritdoc */
+  override def sql(sqlText: String): Dataset[Row] = super.sql(sqlText)
 
-  /**
-   * Returns the names of tables in the current database as an array.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  def tableNames(): Array[String] = {
-    tableNames(sparkSession.catalog.currentDatabase)
-  }
+  /** @inheritdoc */
+  override def table(tableName: String): Dataset[Row] = super.table(tableName)
 
-  /**
-   * Returns the names of tables in the given database as an array.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  def tableNames(databaseName: String): Array[String] = {
-    sessionState.catalog.listTables(databaseName).map(_.table).toArray
-  }
+  /** @inheritdoc */
+  override def tables(): DataFrame = super.tables()
 
-  ////////////////////////////////////////////////////////////////////////////
-  ////////////////////////////////////////////////////////////////////////////
-  // Deprecated methods
-  ////////////////////////////////////////////////////////////////////////////
-  ////////////////////////////////////////////////////////////////////////////
+  /** @inheritdoc */
+  override def tables(databaseName: String): DataFrame = super.tables(databaseName)
 
-  /**
-   * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
-   */
-  @deprecated("Use createDataFrame instead.", "1.3.0")
-  def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = {
-    createDataFrame(rowRDD, schema)
-  }
+  /** @inheritdoc */
+  override def applySchema(rowRDD: RDD[Row], schema: StructType): Dataset[Row] =
+    super.applySchema(rowRDD, schema)
 
-  /**
-   * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
-   */
-  @deprecated("Use createDataFrame instead.", "1.3.0")
-  def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
-    createDataFrame(rowRDD, schema)
-  }
+  /** @inheritdoc */
+  override def applySchema(rowRDD: JavaRDD[Row], schema: StructType): Dataset[Row] =
+    super.applySchema(rowRDD, schema)
 
-  /**
-   * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
-   */
-  @deprecated("Use createDataFrame instead.", "1.3.0")
-  def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = {
-    createDataFrame(rdd, beanClass)
-  }
+  /** @inheritdoc */
+  override def applySchema(rdd: RDD[_], beanClass: Class[_]): Dataset[Row] =
+    super.applySchema(rdd, beanClass)
 
-  /**
-   * @deprecated As of 1.3.0, replaced by `createDataFrame()`.
-   */
-  @deprecated("Use createDataFrame instead.", "1.3.0")
-  def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
-    createDataFrame(rdd, beanClass)
-  }
+  /** @inheritdoc */
+  override def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): Dataset[Row] =
+    super.applySchema(rdd, beanClass)
 
-  /**
-   * Loads a Parquet file, returning the result as a `DataFrame`. This function returns an empty
-   * `DataFrame` if no paths are passed in.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().parquet()`.
-   */
-  @deprecated("Use read.parquet() instead.", "1.4.0")
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def parquetFile(paths: String*): DataFrame = {
-    if (paths.isEmpty) {
-      emptyDataFrame
-    } else {
-      read.parquet(paths : _*)
-    }
-  }
+  override def parquetFile(paths: String*): Dataset[Row] = super.parquetFile(paths: _*)
 
-  /**
-   * Loads a JSON file (one object per line), returning the result as a `DataFrame`.
-   * It goes through the entire dataset once to determine the schema.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonFile(path: String): DataFrame = {
-    read.json(path)
-  }
+  /** @inheritdoc */
+  override def jsonFile(path: String): Dataset[Row] = super.jsonFile(path)
 
-  /**
-   * Loads a JSON file (one object per line) and applies the given schema,
-   * returning the result as a `DataFrame`.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonFile(path: String, schema: StructType): DataFrame = {
-    read.schema(schema).json(path)
-  }
+  /** @inheritdoc */
+  override def jsonFile(path: String, schema: StructType): Dataset[Row] =
+    super.jsonFile(path, schema)
 
-  /**
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonFile(path: String, samplingRatio: Double): DataFrame = {
-    read.option("samplingRatio", samplingRatio.toString).json(path)
-  }
+  /** @inheritdoc */
+  override def jsonFile(path: String, samplingRatio: Double): Dataset[Row] =
+    super.jsonFile(path, samplingRatio)
 
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
-   * `DataFrame`.
-   * It goes through the entire dataset once to determine the schema.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonRDD(json: RDD[String]): DataFrame = read.json(json)
+  /** @inheritdoc */
+  override def jsonRDD(json: RDD[String]): Dataset[Row] = read.json(json)
 
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
-   * `DataFrame`.
-   * It goes through the entire dataset once to determine the schema.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json)
+  /** @inheritdoc */
+  override def jsonRDD(json: JavaRDD[String]): Dataset[Row] = read.json(json)
 
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema,
-   * returning the result as a `DataFrame`.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonRDD(json: RDD[String], schema: StructType): DataFrame = {
-    read.schema(schema).json(json)
-  }
+  /** @inheritdoc */
+  override def jsonRDD(json: RDD[String], schema: StructType): Dataset[Row] =
+    super.jsonRDD(json, schema)
 
-  /**
-   * Loads an JavaRDD[String] storing JSON objects (one object per record) and applies the given
-   * schema, returning the result as a `DataFrame`.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = {
-    read.schema(schema).json(json)
-  }
+  /** @inheritdoc */
+  override def jsonRDD(json: JavaRDD[String], schema: StructType): Dataset[Row] =
+    super.jsonRDD(json, schema)
 
-  /**
-   * Loads an RDD[String] storing JSON objects (one object per record) inferring the
-   * schema, returning the result as a `DataFrame`.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = {
-    read.option("samplingRatio", samplingRatio.toString).json(json)
-  }
+  /** @inheritdoc */
+  override def jsonRDD(json: RDD[String], samplingRatio: Double): Dataset[Row] =
+    super.jsonRDD(json, samplingRatio)
 
-  /**
-   * Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the
-   * schema, returning the result as a `DataFrame`.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().json()`.
-   */
-  @deprecated("Use read.json() instead.", "1.4.0")
-  def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = {
-    read.option("samplingRatio", samplingRatio.toString).json(json)
-  }
+  /** @inheritdoc */
+  override def jsonRDD(json: JavaRDD[String], samplingRatio: Double): Dataset[Row] =
+    super.jsonRDD(json, samplingRatio)
 
-  /**
-   * Returns the dataset stored at path as a DataFrame,
-   * using the default data source configured by spark.sql.sources.default.
-   *
-   * @group genericdata
-   * @deprecated As of 1.4.0, replaced by `read().load(path)`.
-   */
-  @deprecated("Use read.load(path) instead.", "1.4.0")
-  def load(path: String): DataFrame = {
-    read.load(path)
-  }
+  /** @inheritdoc */
+  override def load(path: String): Dataset[Row] = super.load(path)
 
-  /**
-   * Returns the dataset stored at path as a DataFrame, using the given data source.
-   *
-   * @group genericdata
-   * @deprecated As of 1.4.0, replaced by `read().format(source).load(path)`.
-   */
-  @deprecated("Use read.format(source).load(path) instead.", "1.4.0")
-  def load(path: String, source: String): DataFrame = {
-    read.format(source).load(path)
-  }
+  /** @inheritdoc */
+  override def load(path: String, source: String): Dataset[Row] = super.load(path, source)
 
-  /**
-   * (Java-specific) Returns the dataset specified by the given data source and
-   * a set of options as a DataFrame.
-   *
-   * @group genericdata
-   * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`.
-   */
-  @deprecated("Use read.format(source).options(options).load() instead.", "1.4.0")
-  def load(source: String, options: java.util.Map[String, String]): DataFrame = {
-    read.options(options).format(source).load()
-  }
+  /** @inheritdoc */
+  override def load(source: String, options: JMap[String, String]): Dataset[Row] =
+    super.load(source, options)
 
-  /**
-   * (Scala-specific) Returns the dataset specified by the given data source and
-   * a set of options as a DataFrame.
-   *
-   * @group genericdata
-   * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`.
-   */
-  @deprecated("Use read.format(source).options(options).load() instead.", "1.4.0")
-  def load(source: String, options: Map[String, String]): DataFrame = {
-    read.options(options).format(source).load()
-  }
+  /** @inheritdoc */
+  override def load(source: String, options: Map[String, String]): Dataset[Row] =
+    super.load(source, options)
 
-  /**
-   * (Java-specific) Returns the dataset specified by the given data source and
-   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
-   *
-   * @group genericdata
-   * @deprecated As of 1.4.0, replaced by
-   *            `read().format(source).schema(schema).options(options).load()`.
-   */
-  @deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0")
-  def load(
+  /** @inheritdoc */
+  override def load(
       source: String,
       schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    read.format(source).schema(schema).options(options).load()
+      options: JMap[String, String]): Dataset[Row] = {
+    super.load(source, schema, options)
   }
 
-  /**
-   * (Scala-specific) Returns the dataset specified by the given data source and
-   * a set of options as a DataFrame, using the given schema as the schema of the DataFrame.
-   *
-   * @group genericdata
-   * @deprecated As of 1.4.0, replaced by
-   *            `read().format(source).schema(schema).options(options).load()`.
-   */
-  @deprecated("Use read.format(source).schema(schema).options(options).load() instead.", "1.4.0")
-  def load(source: String, schema: StructType, options: Map[String, String]): DataFrame = {
-    read.format(source).schema(schema).options(options).load()
+  /** @inheritdoc */
+  override def load(
+      source: String,
+      schema: StructType,
+      options: Map[String, String]): Dataset[Row] = {
+    super.load(source, schema, options)
   }
 
-  /**
-   * Construct a `DataFrame` representing the database table accessible via JDBC URL
-   * url named table.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
-   */
-  @deprecated("Use read.jdbc() instead.", "1.4.0")
-  def jdbc(url: String, table: String): DataFrame = {
-    read.jdbc(url, table, new Properties)
-  }
+  /** @inheritdoc */
+  override def jdbc(url: String, table: String): Dataset[Row] = super.jdbc(url, table)
 
-  /**
-   * Construct a `DataFrame` representing the database table accessible via JDBC URL
-   * url named table.  Partitions of the table will be retrieved in parallel based on the parameters
-   * passed to this function.
-   *
-   * @param columnName the name of a column of integral type that will be used for partitioning.
-   * @param lowerBound the minimum value of `columnName` used to decide partition stride
-   * @param upperBound the maximum value of `columnName` used to decide partition stride
-   * @param numPartitions the number of partitions.  the range `minValue`-`maxValue` will be split
-   *                      evenly into this many partitions
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
-   */
-  @deprecated("Use read.jdbc() instead.", "1.4.0")
-  def jdbc(
+  /** @inheritdoc */
+  override def jdbc(
       url: String,
       table: String,
       columnName: String,
       lowerBound: Long,
       upperBound: Long,
-      numPartitions: Int): DataFrame = {
-    read.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties)
+      numPartitions: Int): Dataset[Row] = {
+    super.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions)
   }
 
-  /**
-   * Construct a `DataFrame` representing the database table accessible via JDBC URL
-   * url named table. The theParts parameter gives a list expressions
-   * suitable for inclusion in WHERE clauses; each one defines one partition
-   * of the `DataFrame`.
-   *
-   * @group specificdata
-   * @deprecated As of 1.4.0, replaced by `read().jdbc()`.
-   */
-  @deprecated("Use read.jdbc() instead.", "1.4.0")
-  def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = {
-    read.jdbc(url, table, theParts, new Properties)
-  }
+  /** @inheritdoc */
+  override def jdbc(url: String, table: String, theParts: Array[String]): Dataset[Row] =
+    super.jdbc(url, table, theParts)
 }
 
-/**
- * This SQLContext object contains utility functions to create a singleton SQLContext instance,
- * or to get the created SQLContext instance.
- *
- * It also provides utility functions to support preference for threads in multiple sessions
- * scenario, setActive could set a SQLContext for current thread, which will be returned by
- * getOrCreate instead of the global one.
- */
-object SQLContext {
+object SQLContext extends api.SQLContextCompanion {
 
-  /**
-   * Get the singleton SQLContext if it exists or create a new one using the given SparkContext.
-   *
-   * This function can be used to create a singleton SQLContext object that can be shared across
-   * the JVM.
-   *
-   * If there is an active SQLContext for current thread, it will be returned instead of the global
-   * one.
-   *
-   * @since 1.5.0
-   */
-  @deprecated("Use SparkSession.builder instead", "2.0.0")
+  override private[sql] type SQLContextImpl = SQLContext
+  override private[sql] type SparkContextImpl = SparkContext
+
+  /** @inheritdoc */
   def getOrCreate(sparkContext: SparkContext): SQLContext = {
     SparkSession.builder().sparkContext(sparkContext).getOrCreate().sqlContext
   }
 
-  /**
-   * Changes the SQLContext that will be returned in this thread and its children when
-   * SQLContext.getOrCreate() is called. This can be used to ensure that a given thread receives
-   * a SQLContext with an isolated session, instead of the global (first created) context.
-   *
-   * @since 1.6.0
-   */
-  @deprecated("Use SparkSession.setActiveSession instead", "2.0.0")
-  def setActive(sqlContext: SQLContext): Unit = {
-    SparkSession.setActiveSession(sqlContext.sparkSession)
-  }
-
-  /**
-   * Clears the active SQLContext for current thread. Subsequent calls to getOrCreate will
-   * return the first created context instead of a thread-local override.
-   *
-   * @since 1.6.0
-   */
-  @deprecated("Use SparkSession.clearActiveSession instead", "2.0.0")
-  def clearActive(): Unit = {
-    SparkSession.clearActiveSession()
-  }
+  /** @inheritdoc */
+  override def setActive(sqlContext: SQLContext): Unit = super.setActive(sqlContext)
 
   /**
-   * Converts an iterator of Java Beans to InternalRow using the provided
-   * bean info & schema. This is not related to the singleton, but is a static
-   * method for internal use.
+   * Converts an iterator of Java Beans to InternalRow using the provided bean info & schema. This
+   * is not related to the singleton, but is a static method for internal use.
    */
   private[sql] def beansToRows(
       data: Iterator[_],
@@ -1058,7 +400,9 @@ object SQLContext {
       attrs: Seq[AttributeReference]): Iterator[InternalRow] = {
     def createStructConverter(cls: Class[_], fieldTypes: Seq[DataType]): Any => InternalRow = {
       val methodConverters =
-        JavaTypeInference.getJavaBeanReadableProperties(cls).zip(fieldTypes)
+        JavaTypeInference
+          .getJavaBeanReadableProperties(cls)
+          .zip(fieldTypes)
           .map { case (property, fieldType) =>
             val method = property.getReadMethod
             method -> createConverter(method.getReturnType, fieldType)
@@ -1067,16 +411,17 @@ object SQLContext {
         if (value == null) {
           null
         } else {
-          new GenericInternalRow(
-            methodConverters.map { case (method, converter) =>
-              converter(method.invoke(value))
-            })
+          new GenericInternalRow(methodConverters.map { case (method, converter) =>
+            converter(method.invoke(value))
+          })
         }
     }
+
     def createConverter(cls: Class[_], dataType: DataType): Any => Any = dataType match {
       case struct: StructType => createStructConverter(cls, struct.map(_.dataType))
       case _ => CatalystTypeConverters.createToCatalystConverter(dataType)
     }
+
     val dataConverter = createStructConverter(beanClass, attrs.map(_.dataType))
     data.map(dataConverter)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index a7f85db12b214..3b36f6b59cb38 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -42,19 +42,21 @@ import org.apache.spark.sql.catalog.Catalog
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.analysis.{NameParameterizedQuery, PosParameterizedQuery, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.encoders._
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Range}
+import org.apache.spark.sql.catalyst.plans.logical.{CompoundBody, LocalRelation, LogicalPlan, Range}
+import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.ExternalCommandRunner
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, SqlScriptingErrors}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.ExternalCommandExecutor
 import org.apache.spark.sql.execution.datasources.{DataSource, LogicalRelation}
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.internal._
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
+import org.apache.spark.sql.scripting.SqlScriptingExecution
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.types.{DataType, StructType}
@@ -96,7 +98,7 @@ class SparkSession private(
     @transient private[sql] val extensions: SparkSessionExtensions,
     @transient private[sql] val initialSessionOptions: Map[String, String],
     @transient private val parentManagedJobTags: Map[String, String])
-  extends api.SparkSession with Logging { self =>
+  extends api.SparkSession with Logging with classic.ColumnConversions { self =>
 
   // The call site where this SparkSession was constructed.
   private val creationSite: CallSite = Utils.getCallSite()
@@ -431,6 +433,43 @@ class SparkSession private(
    |  Everything else  |
    * ----------------- */
 
+  /**
+   * Executes given script and return the result of the last statement.
+   * If script contains no queries, an empty `DataFrame` is returned.
+   *
+   * @param script A SQL script to execute.
+   * @param args A map of parameter names to SQL literal expressions.
+   *
+   * @return The result as a `DataFrame`.
+   */
+  private def executeSqlScript(
+      script: CompoundBody,
+      args: Map[String, Expression] = Map.empty): DataFrame = {
+    val sse = new SqlScriptingExecution(script, this, args)
+    var result: Option[Seq[Row]] = None
+
+    // We must execute returned df before calling sse.getNextResult again because sse.hasNext
+    // advances the script execution and executes all statements until the next result. We must
+    // collect results immediately to maintain execution order.
+    // This ensures we respect the contract of SqlScriptingExecution API.
+    var df: Option[DataFrame] = sse.getNextResult
+    while (df.isDefined) {
+      sse.withErrorHandling {
+        // Collect results from the current DataFrame.
+        result = Some(df.get.collect().toSeq)
+      }
+      df = sse.getNextResult
+    }
+
+    if (result.isEmpty) {
+      emptyDataFrame
+    } else {
+      val attributes = DataTypeUtils.toAttributes(result.get.head.schema)
+      Dataset.ofRows(
+        self, LocalRelation.fromExternalRows(attributes, result.get))
+    }
+  }
+
   /**
    * Executes a SQL query substituting positional parameters by the given arguments,
    * returning the result as a `DataFrame`.
@@ -450,17 +489,33 @@ class SparkSession private(
     withActive {
       val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) {
         val parsedPlan = sessionState.sqlParser.parsePlan(sqlText)
-        if (args.nonEmpty) {
-          PosParameterizedQuery(parsedPlan, args.map(lit(_).expr).toImmutableArraySeq)
-        } else {
-          parsedPlan
+        parsedPlan match {
+          case compoundBody: CompoundBody =>
+            if (args.nonEmpty) {
+              // Positional parameters are not supported for SQL scripting.
+              throw SqlScriptingErrors.positionalParametersAreNotSupportedWithSqlScripting()
+            }
+            compoundBody
+          case logicalPlan: LogicalPlan =>
+            if (args.nonEmpty) {
+              PosParameterizedQuery(logicalPlan, args.map(lit(_).expr).toImmutableArraySeq)
+            } else {
+              logicalPlan
+            }
         }
       }
-      Dataset.ofRows(self, plan, tracker)
+
+      plan match {
+        case compoundBody: CompoundBody =>
+          // Execute the SQL script.
+          executeSqlScript(compoundBody)
+        case logicalPlan: LogicalPlan =>
+          // Execute the standalone SQL statement.
+          Dataset.ofRows(self, plan, tracker)
+      }
     }
 
   /** @inheritdoc */
-  @Experimental
   def sql(sqlText: String, args: Array[_]): DataFrame = {
     sql(sqlText, args, new QueryPlanningTracker)
   }
@@ -488,23 +543,34 @@ class SparkSession private(
     withActive {
       val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) {
         val parsedPlan = sessionState.sqlParser.parsePlan(sqlText)
-        if (args.nonEmpty) {
-          NameParameterizedQuery(parsedPlan, args.transform((_, v) => lit(v).expr))
-        } else {
-          parsedPlan
+        parsedPlan match {
+          case compoundBody: CompoundBody =>
+            compoundBody
+          case logicalPlan: LogicalPlan =>
+            if (args.nonEmpty) {
+              NameParameterizedQuery(logicalPlan, args.transform((_, v) => lit(v).expr))
+            } else {
+              logicalPlan
+            }
         }
       }
-      Dataset.ofRows(self, plan, tracker)
+
+      plan match {
+        case compoundBody: CompoundBody =>
+          // Execute the SQL script.
+          executeSqlScript(compoundBody, args.transform((_, v) => lit(v).expr))
+        case logicalPlan: LogicalPlan =>
+          // Execute the standalone SQL statement.
+          Dataset.ofRows(self, plan, tracker)
+      }
     }
 
   /** @inheritdoc */
-  @Experimental
   def sql(sqlText: String, args: Map[String, Any]): DataFrame = {
     sql(sqlText, args, new QueryPlanningTracker)
   }
 
   /** @inheritdoc */
-  @Experimental
   override def sql(sqlText: String, args: java.util.Map[String, Any]): DataFrame = {
     sql(sqlText, args.asScala.toMap)
   }
@@ -732,23 +798,11 @@ class SparkSession private(
       .getOrElse(sparkContext.defaultParallelism)
   }
 
-  private[sql] object Converter extends ColumnNodeToExpressionConverter with Serializable {
-    override protected def parser: ParserInterface = sessionState.sqlParser
-    override protected def conf: SQLConf = sessionState.conf
-  }
-
-  private[sql] def expression(e: Column): Expression = Converter(e.node)
-
-  private[sql] implicit class RichColumn(val column: Column) {
-    /**
-     * Returns the expression for this column.
-     */
-    def expr: Expression = Converter(column.node)
-    /**
-     * Returns the expression for this column either with an existing or auto assigned name.
-     */
-    def named: NamedExpression = ExpressionUtils.toNamed(expr)
-  }
+  override protected[sql] val converter: ColumnNodeToExpressionConverter =
+    new ColumnNodeToExpressionConverter with Serializable {
+      override protected def parser: ParserInterface = sessionState.sqlParser
+      override protected def conf: SQLConf = sessionState.conf
+    }
 
   private[sql] lazy val observationManager = new ObservationManager(this)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/TableArg.scala b/sql/core/src/main/scala/org/apache/spark/sql/TableArg.scala
new file mode 100644
index 0000000000000..133775c0b666c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/TableArg.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.expressions.{Ascending, Expression, FunctionTableSubqueryArgumentExpression, SortOrder}
+
+class TableArg(
+    private[sql] val expression: FunctionTableSubqueryArgumentExpression,
+    sparkSession: SparkSession)
+  extends TableValuedFunctionArgument {
+  import sparkSession.toRichColumn
+
+  private def isPartitioned: Boolean =
+    expression.partitionByExpressions.nonEmpty || expression.withSinglePartition
+
+  @scala.annotation.varargs
+  def partitionBy(cols: Column*): TableArg = {
+    if (isPartitioned) {
+      throw new IllegalArgumentException(
+        "Cannot call partitionBy() after partitionBy() or withSinglePartition() has been called."
+      )
+    }
+    val partitionByExpressions = cols.map(_.expr)
+    new TableArg(
+      expression.copy(
+        partitionByExpressions = partitionByExpressions),
+        sparkSession)
+  }
+
+  @scala.annotation.varargs
+  def orderBy(cols: Column*): TableArg = {
+    if (!isPartitioned) {
+      throw new IllegalArgumentException(
+        "Please call partitionBy() or withSinglePartition() before orderBy()."
+      )
+    }
+    val orderByExpressions = cols.map { col =>
+      col.expr match {
+        case sortOrder: SortOrder => sortOrder
+        case expr: Expression => SortOrder(expr, Ascending)
+      }
+    }
+    new TableArg(
+      expression.copy(orderByExpressions = orderByExpressions),
+      sparkSession)
+  }
+
+  def withSinglePartition(): TableArg = {
+    if (isPartitioned) {
+      throw new IllegalArgumentException(
+        "Cannot call withSinglePartition() after partitionBy() or " +
+          "withSinglePartition() has been called."
+      )
+    }
+    new TableArg(
+      expression.copy(withSinglePartition = true),
+      sparkSession)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
index a66a6e54a7c8a..49fe494903cdc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
@@ -33,10 +33,11 @@ import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TableFunctionRe
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.execution.{ExplainMode, QueryExecution}
 import org.apache.spark.sql.execution.arrow.ArrowConverters
 import org.apache.spark.sql.execution.python.EvaluatePython
-import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
+import org.apache.spark.sql.internal.ExpressionUtils.expression
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.util.{MutableURLClassLoader, Utils}
@@ -143,6 +144,33 @@ private[sql] object PythonSQLUtils extends Logging {
     }
   }
 
+  def jsonToDDL(json: String): String = {
+    DataType.fromJson(json).asInstanceOf[StructType].toDDL
+  }
+
+  def ddlToJson(ddl: String): String = {
+    val dataType = try {
+      // DDL format, "fieldname datatype, fieldname datatype".
+      StructType.fromDDL(ddl)
+    } catch {
+      case e: Throwable =>
+        try {
+          // For backwards compatibility, "integer", "struct<fieldname: datatype>" and etc.
+          parseDataType(ddl)
+        } catch {
+          case _: Throwable =>
+            try {
+              // For backwards compatibility, "fieldname: datatype, fieldname: datatype" case.
+              parseDataType(s"struct<${ddl.trim}>")
+            } catch {
+              case _: Throwable =>
+                throw e
+            }
+        }
+    }
+    dataType.json
+  }
+
   def unresolvedNamedLambdaVariable(name: String): Column =
     Column(internal.UnresolvedNamedLambdaVariable.apply(name))
 
@@ -152,7 +180,8 @@ private[sql] object PythonSQLUtils extends Logging {
     Column(internal.LambdaFunction(function.node, arguments))
   }
 
-  def namedArgumentExpression(name: String, e: Column): Column = NamedArgumentExpression(name, e)
+  def namedArgumentExpression(name: String, e: Column): Column =
+    Column(NamedArgumentExpression(name, expression(e)))
 
   @scala.annotation.varargs
   def fn(name: String, arguments: Column*): Column = Column.fn(name, arguments: _*)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
index d362c5bef878e..6394cef9fc760 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.artifact
 
-import java.io.File
+import java.io.{File, IOException}
+import java.lang.ref.Cleaner
 import java.net.{URI, URL, URLClassLoader}
 import java.nio.ByteBuffer
 import java.nio.file.{CopyOption, Files, Path, Paths, StandardCopyOption}
@@ -30,8 +31,8 @@ import scala.reflect.ClassTag
 import org.apache.commons.io.{FilenameUtils, FileUtils}
 import org.apache.hadoop.fs.{LocalFileSystem, Path => FSPath}
 
-import org.apache.spark.{JobArtifactSet, JobArtifactState, SparkEnv, SparkException, SparkUnsupportedOperationException}
-import org.apache.spark.internal.Logging
+import org.apache.spark.{JobArtifactSet, JobArtifactState, SparkContext, SparkEnv, SparkException, SparkUnsupportedOperationException}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config.{CONNECT_SCALA_UDF_STUB_PREFIXES, EXECUTOR_USER_CLASS_PATH_FIRST}
 import org.apache.spark.sql.{Artifact, SparkSession}
 import org.apache.spark.sql.internal.SQLConf
@@ -51,7 +52,7 @@ import org.apache.spark.util.{ChildFirstURLClassLoader, StubClassLoader, Utils}
  *
  * @param session The object used to hold the Spark Connect session state.
  */
-class ArtifactManager(session: SparkSession) extends Logging {
+class ArtifactManager(session: SparkSession) extends AutoCloseable with Logging {
   import ArtifactManager._
 
   // The base directory where all artifacts are stored.
@@ -66,12 +67,11 @@ class ArtifactManager(session: SparkSession) extends Logging {
   // The base directory/URI where all artifacts are stored for this `sessionUUID`.
   protected[artifact] val (artifactPath, artifactURI): (Path, String) =
     (ArtifactUtils.concatenatePaths(artifactRootPath, session.sessionUUID),
-      s"$artifactRootURI${File.separator}${session.sessionUUID}")
+      s"$artifactRootURI/${session.sessionUUID}")
 
   // The base directory/URI where all class file artifacts are stored for this `sessionUUID`.
   protected[artifact] val (classDir, replClassURI): (Path, String) =
-    (ArtifactUtils.concatenatePaths(artifactPath, "classes"),
-      s"$artifactURI${File.separator}classes${File.separator}")
+    (ArtifactUtils.concatenatePaths(artifactPath, "classes"), s"$artifactURI/classes/")
 
   private lazy val alwaysApplyClassLoader =
     session.conf.get(SQLConf.ARTIFACTS_SESSION_ISOLATION_ALWAYS_APPLY_CLASSLOADER.key).toBoolean
@@ -88,6 +88,9 @@ class ArtifactManager(session: SparkSession) extends Logging {
    */
   protected val sessionArtifactAdded = new AtomicBoolean(false)
 
+  @volatile
+  protected var cachedClassLoader: Option[ClassLoader] = None
+
   private def withClassLoaderIfNeeded[T](f: => T): T = {
     val log = s" classloader for session ${session.sessionUUID} because " +
       s"alwaysApplyClassLoader=$alwaysApplyClassLoader, " +
@@ -203,6 +206,7 @@ class ArtifactManager(session: SparkSession) extends Logging {
         allowOverwrite = true,
         deleteSource = deleteStagedFile)
       sessionArtifactAdded.set(true)
+      cachedClassLoader = None
     } else {
       val target = ArtifactUtils.concatenatePaths(artifactPath, normalizedRemoteRelativePath)
       // Disallow overwriting with modified version
@@ -227,6 +231,7 @@ class ArtifactManager(session: SparkSession) extends Logging {
           (SparkContextResourceType.JAR, normalizedRemoteRelativePath, fragment))
         jarsList.add(normalizedRemoteRelativePath)
         sessionArtifactAdded.set(true)
+        cachedClassLoader = None
       } else if (normalizedRemoteRelativePath.startsWith(s"pyfiles${File.separator}")) {
         session.sparkContext.addFile(uri)
         sparkContextRelativePaths.add(
@@ -282,10 +287,18 @@ class ArtifactManager(session: SparkSession) extends Logging {
     }
   }
 
+  def classloader: ClassLoader = synchronized {
+    cachedClassLoader.getOrElse {
+      val loader = buildClassLoader
+      cachedClassLoader = Some(loader)
+      loader
+    }
+  }
+
   /**
    * Returns a [[ClassLoader]] for session-specific jar/class file resources.
    */
-  def classloader: ClassLoader = {
+  private def buildClassLoader: ClassLoader = {
     val urls = (getAddedJars :+ classDir.toUri.toURL).toArray
     val prefixes = SparkEnv.get.conf.get(CONNECT_SCALA_UDF_STUB_PREFIXES)
     val userClasspathFirst = SparkEnv.get.conf.get(EXECUTOR_USER_CLASS_PATH_FIRST)
@@ -361,40 +374,48 @@ class ArtifactManager(session: SparkSession) extends Logging {
     newArtifactManager
   }
 
+  private val cleanUpStateForGlobalResources = ArtifactStateForCleanup(
+    session.sessionUUID,
+    session.sparkContext,
+    state,
+    artifactPath)
+  // Ensure that no reference to `this` is captured/help by the cleanup lambda
+  private def getCleanable: Cleaner.Cleanable = cleaner.register(
+    this,
+    () => ArtifactManager.cleanUpGlobalResources(cleanUpStateForGlobalResources)
+  )
+  private var cleanable = getCleanable
+
   /**
    * Cleans up all resources specific to this `session`.
    */
-  private[sql] def cleanUpResources(): Unit = {
+  private def cleanUpResources(): Unit = {
     logDebug(
       s"Cleaning up resources for session with sessionUUID ${session.sessionUUID}")
 
-    // Clean up added files
-    val fileserver = SparkEnv.get.rpcEnv.fileServer
-    val sparkContext = session.sparkContext
-    if (state != null) {
-      val shouldUpdateEnv = sparkContext.addedFiles.contains(state.uuid) ||
-        sparkContext.addedArchives.contains(state.uuid) ||
-        sparkContext.addedJars.contains(state.uuid)
-      if (shouldUpdateEnv) {
-        sparkContext.addedFiles.remove(state.uuid).foreach(_.keys.foreach(fileserver.removeFile))
-        sparkContext.addedArchives.remove(state.uuid).foreach(_.keys.foreach(fileserver.removeFile))
-        sparkContext.addedJars.remove(state.uuid).foreach(_.keys.foreach(fileserver.removeJar))
-        sparkContext.postEnvironmentUpdate()
-      }
-    }
-
-    // Clean up cached relations
-    val blockManager = sparkContext.env.blockManager
-    blockManager.removeCache(session.sessionUUID)
-
-    // Clean up artifacts folder
-    FileUtils.deleteDirectory(artifactPath.toFile)
+    // Clean up global resources via the Cleaner process.
+    // Note that this will only be run once per instance.
+    cleanable.clean()
 
     // Clean up internal trackers
     jarsList.clear()
     pythonIncludeList.clear()
     cachedBlockIdList.clear()
     sparkContextRelativePaths.clear()
+
+    // Removed cached classloader
+    cachedClassLoader = None
+  }
+
+  override def close(): Unit = {
+    cleanUpResources()
+  }
+
+  private[sql] def cleanUpResourcesForTesting(): Unit = {
+    cleanUpResources()
+    // Tests reuse the same instance so we need to re-register the cleanable otherwise, it is run
+    // only once per instance.
+    cleanable = getCleanable
   }
 
   def uploadArtifactToFs(
@@ -466,4 +487,51 @@ object ArtifactManager extends Logging {
         throw SparkException.internalError(s"Block $fromId not found in the block manager.")
     }
   }
+
+  // Shared cleaner instance
+  private val cleaner: Cleaner = Cleaner.create()
+
+  /**
+   * Helper method to clean up global resources (i.e. resources associated with the calling
+   * instance but held externally in sparkContext, blockManager, disk etc.)
+   */
+  private def cleanUpGlobalResources(cleanupState: ArtifactStateForCleanup): Unit = {
+    // Clean up added files
+    val (sparkSessionUUID, sparkContext, state, artifactPath) = (
+      cleanupState.sparkSessionUUID,
+      cleanupState.sparkContext,
+      cleanupState.jobArtifactState,
+      cleanupState.artifactPath)
+    val fileServer = SparkEnv.get.rpcEnv.fileServer
+    if (state != null) {
+      val shouldUpdateEnv = sparkContext.addedFiles.contains(state.uuid) ||
+        sparkContext.addedArchives.contains(state.uuid) ||
+        sparkContext.addedJars.contains(state.uuid)
+      if (shouldUpdateEnv) {
+        sparkContext.addedFiles.remove(state.uuid).foreach(_.keys.foreach(fileServer.removeFile))
+        sparkContext.addedArchives.remove(state.uuid).foreach(_.keys.foreach(fileServer.removeFile))
+        sparkContext.addedJars.remove(state.uuid).foreach(_.keys.foreach(fileServer.removeJar))
+        sparkContext.postEnvironmentUpdate()
+      }
+    }
+
+    // Clean up cached relations
+    val blockManager = sparkContext.env.blockManager
+    blockManager.removeCache(sparkSessionUUID)
+
+    // Clean up artifacts folder
+    try {
+      FileUtils.deleteDirectory(artifactPath.toFile)
+    } catch {
+      case e: IOException =>
+        logWarning(log"Failed to delete directory ${MDC(LogKeys.PATH, artifactPath.toFile)}: " +
+          log"${MDC(LogKeys.EXCEPTION, e.getMessage)}", e)
+    }
+  }
 }
+
+private[artifact] case class ArtifactStateForCleanup(
+  sparkSessionUUID: String,
+  sparkContext: SparkContext,
+  jobArtifactState: JobArtifactState,
+  artifactPath: Path)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 92c74f7bede18..b73ea2f80452b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -54,6 +54,11 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
   import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+    case _ if ResolveDefaultStringTypes.needsResolution(plan) =>
+      // if there are still unresolved string types in the plan
+      // we should not try to resolve it
+      plan
+
     case AddColumns(ResolvedV1TableIdentifier(ident), cols) =>
       cols.foreach { c =>
         if (c.name.length > 1) {
@@ -149,11 +154,11 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
     // Use v1 command to describe (temp) view, as v2 catalog doesn't support view yet.
     case DescribeRelation(
-         ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended, output) =>
+        ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended, output) =>
       DescribeTableCommand(ident, partitionSpec, isExtended, output)
 
     case DescribeColumn(
-         ResolvedViewIdentifier(ident), column: UnresolvedAttribute, isExtended, output) =>
+        ResolvedViewIdentifier(ident), column: UnresolvedAttribute, isExtended, output) =>
       // For views, the column will not be resolved by `ResolveReferences` because
       // `ResolvedView` stores only the identifier.
       DescribeColumnCommand(ident, column.nameParts, isExtended, output)
@@ -416,11 +421,12 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       AlterViewSchemaBindingCommand(ident, viewSchemaMode)
 
     case CreateView(ResolvedIdentifierInSessionCatalog(ident), userSpecifiedColumns, comment,
-        properties, originalText, child, allowExisting, replace, viewSchemaMode) =>
+        collation, properties, originalText, child, allowExisting, replace, viewSchemaMode) =>
       CreateViewCommand(
         name = ident,
         userSpecifiedColumns = userSpecifiedColumns,
         comment = comment,
+        collation = collation,
         properties = properties,
         originalText = originalText,
         plan = child,
@@ -429,7 +435,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         viewType = PersistedView,
         viewSchemaMode = viewSchemaMode)
 
-    case CreateView(ResolvedIdentifier(catalog, _), _, _, _, _, _, _, _, _) =>
+    case CreateView(ResolvedIdentifier(catalog, _), _, _, _, _, _, _, _, _, _) =>
       throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "views")
 
     case ShowViews(ns: ResolvedNamespace, pattern, output) =>
@@ -491,6 +497,27 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
     case CreateFunction(ResolvedIdentifier(catalog, _), _, _, _, _) =>
       throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "CREATE FUNCTION")
+
+    case c @ CreateUserDefinedFunction(
+        ResolvedIdentifierInSessionCatalog(ident), _, _, _, _, _, _, _, _, _, _, _) =>
+      CreateUserDefinedFunctionCommand(
+        FunctionIdentifier(ident.table, ident.database, ident.catalog),
+        c.inputParamText,
+        c.returnTypeText,
+        c.exprText,
+        c.queryText,
+        c.comment,
+        c.isDeterministic,
+        c.containsSQL,
+        c.language,
+        c.isTableFunc,
+        isTemp = false,
+        c.ignoreIfExists,
+        c.replace)
+
+    case CreateUserDefinedFunction(
+        ResolvedIdentifier(catalog, _), _, _, _, _, _, _, _, _, _, _, _) =>
+      throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "CREATE FUNCTION")
   }
 
   private def constructV1TableCmd(
@@ -503,8 +530,8 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       storageFormat: CatalogStorageFormat,
       provider: String): CreateTableV1 = {
     val tableDesc = buildCatalogTable(
-      ident, tableSchema, partitioning, tableSpec.properties, provider,
-      tableSpec.location, tableSpec.comment, storageFormat, tableSpec.external)
+      ident, tableSchema, partitioning, tableSpec.properties, provider, tableSpec.location,
+      tableSpec.comment, tableSpec.collation, storageFormat, tableSpec.external)
     val mode = if (ignoreIfExists) SaveMode.Ignore else SaveMode.ErrorIfExists
     CreateTableV1(tableDesc, mode, query)
   }
@@ -580,6 +607,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       provider: String,
       location: Option[String],
       comment: Option[String],
+      collation: Option[String],
       storageFormat: CatalogStorageFormat,
       external: Boolean): CatalogTable = {
     val tableType = if (external || location.isDefined) {
@@ -600,7 +628,9 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       properties = properties ++
         maybeClusterBySpec.map(
           clusterBySpec => ClusterBySpec.toProperty(schema, clusterBySpec, conf.resolver)),
-      comment = comment)
+      comment = comment,
+      collation = collation
+    )
   }
 
   object ResolvedViewIdentifier {
@@ -717,7 +747,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
   private def supportsV1Command(catalog: CatalogPlugin): Boolean = {
     isSessionCatalog(catalog) && (
-      SQLConf.get.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isEmpty ||
+      SQLConf.get.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION) == "builtin" ||
         catalog.isInstanceOf[CatalogExtension])
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
deleted file mode 100644
index 8ae0341e5646c..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.catalog
-
-import org.apache.spark.sql.catalyst.FunctionIdentifier
-import org.apache.spark.sql.catalyst.catalog.UserDefinedFunction._
-import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.types.{DataType, StructType}
-
-/**
- * Represent a SQL function.
- *
- * @param name qualified name of the SQL function
- * @param inputParam function input parameters
- * @param returnType function return type
- * @param exprText function body as an expression
- * @param queryText function body as a query
- * @param comment function comment
- * @param deterministic whether the function is deterministic
- * @param containsSQL whether the function has data access routine to be CONTAINS SQL
- * @param isTableFunc whether the function is a table function
- * @param properties additional properties to be serialized for the SQL function
- * @param owner owner of the function
- * @param createTimeMs function creation time in milliseconds
- */
-case class SQLFunction(
-    name: FunctionIdentifier,
-    inputParam: Option[StructType],
-    returnType: Either[DataType, StructType],
-    exprText: Option[String],
-    queryText: Option[String],
-    comment: Option[String],
-    deterministic: Option[Boolean],
-    containsSQL: Option[Boolean],
-    isTableFunc: Boolean,
-    properties: Map[String, String],
-    owner: Option[String] = None,
-    createTimeMs: Long = System.currentTimeMillis) extends UserDefinedFunction {
-
-  assert(exprText.nonEmpty || queryText.nonEmpty)
-  assert((isTableFunc && returnType.isRight) || (!isTableFunc && returnType.isLeft))
-
-  override val language: RoutineLanguage = LanguageSQL
-}
-
-object SQLFunction {
-
-  /**
-   * This method returns an optional DataType indicating, when present, either the return type for
-   * scalar user-defined functions, or a StructType indicating the names and types of the columns in
-   * the output schema for table functions. If the optional value is empty, this indicates that the
-   * CREATE FUNCTION statement did not have any RETURNS clause at all (for scalar functions), or
-   * that it included a RETURNS TABLE clause but without any specified output schema (for table
-   * functions), prompting the analyzer to infer these metadata instead.
-   */
-  def parseReturnTypeText(
-      text: String,
-      isTableFunc: Boolean,
-      parser: ParserInterface): Option[Either[DataType, StructType]] = {
-    if (!isTableFunc) {
-      // This is a scalar user-defined function.
-      if (text.isEmpty) {
-        // The CREATE FUNCTION statement did not have any RETURNS clause.
-        Option.empty[Either[DataType, StructType]]
-      } else {
-        // The CREATE FUNCTION statement included a RETURNS clause with an explicit return type.
-        Some(Left(parseDataType(text, parser)))
-      }
-    } else {
-      // This is a table function.
-      if (text.equalsIgnoreCase("table")) {
-        // The CREATE FUNCTION statement had a RETURNS TABLE clause but without any explicit schema.
-        Option.empty[Either[DataType, StructType]]
-      } else {
-        // The CREATE FUNCTION statement included a RETURNS TABLE clause with an explicit schema.
-        Some(Right(parseTableSchema(text, parser)))
-      }
-    }
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala
deleted file mode 100644
index 1473f19cb71bd..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.catalog
-
-import org.apache.spark.sql.catalyst.FunctionIdentifier
-import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.types.{DataType, StructType}
-
-/**
- * The base class for all user defined functions registered via SQL queries.
- */
-trait UserDefinedFunction {
-
-  /**
-   * Qualified name of the function
-   */
-  def name: FunctionIdentifier
-
-  /**
-   * Additional properties to be serialized for the function.
-   * Use this to preserve the runtime configuration that should be used during the function
-   * execution, such as SQL configs etc. See [[SQLConf]] for more info.
-   */
-  def properties: Map[String, String]
-
-  /**
-   * Owner of the function
-   */
-  def owner: Option[String]
-
-  /**
-   * Function creation time in milliseconds since the linux epoch
-   */
-  def createTimeMs: Long
-
-  /**
-   * The language of the user defined function.
-   */
-  def language: RoutineLanguage
-}
-
-object UserDefinedFunction {
-  def parseTableSchema(text: String, parser: ParserInterface): StructType = {
-    val parsed = parser.parseTableSchema(text)
-    CharVarcharUtils.failIfHasCharVarchar(parsed).asInstanceOf[StructType]
-  }
-
-  def parseDataType(text: String, parser: ParserInterface): DataType = {
-    val dataType = parser.parseDataType(text)
-    CharVarcharUtils.failIfHasCharVarchar(dataType)
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SQLFunctionNode.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SQLFunctionNode.scala
new file mode 100644
index 0000000000000..0a3274af33b5b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SQLFunctionNode.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.catalog.SQLFunction
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.trees.TreePattern.FUNCTION_TABLE_RELATION_ARGUMENT_EXPRESSION
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+/**
+ * A container for holding a SQL function query plan and its function identifier.
+ *
+ * @param function: the SQL function that this node represents.
+ * @param child: the SQL function body.
+ */
+case class SQLFunctionNode(
+    function: SQLFunction,
+    child: LogicalPlan) extends UnaryNode {
+  override def output: Seq[Attribute] = child.output
+  override def stringArgs: Iterator[Any] = Iterator(function.name, child)
+  override protected def withNewChildInternal(newChild: LogicalPlan): SQLFunctionNode =
+    copy(child = newChild)
+
+  // Throw a reasonable error message when trying to call a SQL UDF with TABLE argument(s).
+  if (child.containsPattern(FUNCTION_TABLE_RELATION_ARGUMENT_EXPRESSION)) {
+    throw QueryCompilationErrors
+      .tableValuedArgumentsNotYetImplementedForSqlFunctions("call", toSQLId(function.name.funcName))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
index b0ce2bb4293e1..23ae5ee7b9be4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
@@ -305,6 +305,8 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) extends L
     case _: Md5 => generateExpressionWithName("MD5", expr, isPredicate)
     case _: Sha1 => generateExpressionWithName("SHA1", expr, isPredicate)
     case _: Sha2 => generateExpressionWithName("SHA2", expr, isPredicate)
+    case _: StringLPad => generateExpressionWithName("LPAD", expr, isPredicate)
+    case _: StringRPad => generateExpressionWithName("RPAD", expr, isPredicate)
     // TODO supports other expressions
     case ApplyFunctionExpression(function, children) =>
       val childrenExpressions = children.flatMap(generateExpression(_))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/ClassicConversions.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/conversions.scala
similarity index 56%
rename from sql/core/src/main/scala/org/apache/spark/sql/classic/ClassicConversions.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/classic/conversions.scala
index 8c3223fa72f55..e90fd4b6a6032 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/classic/ClassicConversions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/conversions.scala
@@ -20,8 +20,8 @@ import scala.language.implicitConversions
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.internal.ExpressionUtils
+import org.apache.spark.sql.catalyst.expressions.{Expression, NamedExpression}
+import org.apache.spark.sql.internal.{ColumnNodeToExpressionConverter, ExpressionUtils}
 
 /**
  * Conversions from sql interfaces to the Classic specific implementation.
@@ -56,4 +56,54 @@ trait ClassicConversions {
   }
 }
 
+@DeveloperApi
 object ClassicConversions extends ClassicConversions
+
+/**
+ * Conversions from a [[Column]] to an [[Expression]].
+ */
+@DeveloperApi
+trait ColumnConversions {
+  protected def converter: ColumnNodeToExpressionConverter
+
+  /**
+   * Convert a [[Column]] into an [[Expression]].
+   */
+  @DeveloperApi
+  def expression(column: Column): Expression = converter(column.node)
+
+  /**
+   * Wrap a [[Column]] with a [[RichColumn]] to provide the `expr` and `named` methods.
+   */
+  @DeveloperApi
+  implicit def toRichColumn(column: Column): RichColumn = new RichColumn(column, converter)
+}
+
+/**
+ * Automatic conversions from a Column to an Expression. This uses the active SparkSession for
+ * parsing, and the active SQLConf for fetching configurations.
+ *
+ * This functionality is not part of the ClassicConversions because it is generally better to use
+ * `SparkSession.toRichColumn(...)` or `SparkSession.expression(...)` directly.
+ */
+@DeveloperApi
+object ColumnConversions extends ColumnConversions {
+  override protected def converter: ColumnNodeToExpressionConverter =
+    ColumnNodeToExpressionConverter
+}
+
+/**
+ * Helper class that adds the `expr` and `named` methods to a Column. This can be used to reinstate
+ * the pre-Spark 4 Column functionality.
+ */
+@DeveloperApi
+class RichColumn(column: Column, converter: ColumnNodeToExpressionConverter) {
+  /**
+   * Returns the expression for this column.
+   */
+  def expr: Expression = converter(column.node)
+  /**
+   * Returns the expression for this column either with an existing or auto assigned name.
+   */
+  def named: NamedExpression = ExpressionUtils.toNamed(expr)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
index 64d2633c31079..60156bff1fb71 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
@@ -239,7 +239,7 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
         val complexTypeFactory = JsonToStructs(attr.dataType,
           ioschema.outputSerdeProps.toMap, Literal(null), Some(conf.sessionLocalTimeZone))
         wrapperConvertException(data =>
-          complexTypeFactory.evaluator.evaluate(UTF8String.fromString(data)), any => any)
+          complexTypeFactory.nullSafeEval(UTF8String.fromString(data)), any => any)
       case udt: UserDefinedType[_] =>
         wrapperConvertException(data => udt.deserialize(data), converter)
       case dt =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
index 64163da50e13a..a67648f24b4c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
@@ -194,7 +194,7 @@ case class ColumnarToRowExec(child: SparkPlan) extends ColumnarToRowTransition w
        |    $shouldStop
        |  }
        |  $idx = $numRows;
-       |  $batch.closeIfNotWritable();
+       |  $batch.closeIfFreeable();
        |  $batch = null;
        |  $nextBatchFuncName();
        |}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffset.scala
index 6c7a9206a8e39..aa29128cda7e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffset.scala
@@ -18,10 +18,11 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.catalyst.expressions.SortOrder
+import org.apache.spark.sql.catalyst.plans.logical.{Project, Sort}
 import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.adaptive.{AQEShuffleReadExec, ShuffleQueryStageExec}
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
+import org.apache.spark.sql.execution.python.EvalPythonExec
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -41,31 +42,61 @@ object InsertSortForLimitAndOffset extends Rule[SparkPlan] {
     plan transform {
       case l @ GlobalLimitExec(
           _,
-          SinglePartitionShuffleWithGlobalOrdering(ordering),
-          _) =>
-        val newChild = SortExec(ordering, global = false, child = l.child)
-        l.withNewChildren(Seq(newChild))
-    }
-  }
-
-  object SinglePartitionShuffleWithGlobalOrdering {
-    def unapply(plan: SparkPlan): Option[Seq[SortOrder]] = plan match {
-      case ShuffleExchangeExec(SinglePartition, SparkPlanWithGlobalOrdering(ordering), _, _) =>
-        Some(ordering)
-      case p: AQEShuffleReadExec => unapply(p.child)
-      case p: ShuffleQueryStageExec => unapply(p.plan)
-      case _ => None
+          // Should not match AQE shuffle stage because we only target un-submitted stages which
+          // we can still rewrite the query plan.
+          s @ ShuffleExchangeExec(SinglePartition, child, _, _),
+          _) if child.logicalLink.isDefined =>
+        extractOrderingAndPropagateOrderingColumns(child) match {
+          case Some((ordering, newChild)) =>
+            val newShuffle = s.withNewChildren(Seq(newChild))
+            val sorted = SortExec(ordering, global = false, child = newShuffle)
+            // We must set the logical plan link to avoid losing the added SortExec and ProjectExec
+            // during AQE re-optimization, where we turn physical plan back to logical plan.
+            val logicalSort = Sort(ordering, global = false, child = s.child.logicalLink.get)
+            sorted.setLogicalLink(logicalSort)
+            val projected = if (sorted.output == s.output) {
+              sorted
+            } else {
+              val p = ProjectExec(s.output, sorted)
+              p.setLogicalLink(Project(s.output, logicalSort))
+              p
+            }
+            l.withNewChildren(Seq(projected))
+          case _ => l
+        }
     }
   }
 
   // Note: this is not implementing a generalized notion of "global order preservation", but just
-  // tackles the regular ORDER BY semantics with optional LIMIT (top-K).
-  object SparkPlanWithGlobalOrdering {
-    def unapply(plan: SparkPlan): Option[Seq[SortOrder]] = plan match {
-      case p: SortExec if p.global => Some(p.sortOrder)
-      case p: LocalLimitExec => unapply(p.child)
-      case p: WholeStageCodegenExec => unapply(p.child)
-      case _ => None
-    }
+  // a best effort to catch the common query patterns that the data ordering should be preserved.
+  private def extractOrderingAndPropagateOrderingColumns(
+      plan: SparkPlan): Option[(Seq[SortOrder], SparkPlan)] = plan match {
+    case p: SortExec if p.global => Some(p.sortOrder, p)
+    case p: UnaryExecNode if
+        p.isInstanceOf[LocalLimitExec] ||
+          p.isInstanceOf[WholeStageCodegenExec] ||
+          p.isInstanceOf[FilterExec] ||
+          p.isInstanceOf[EvalPythonExec] =>
+      extractOrderingAndPropagateOrderingColumns(p.child) match {
+        case Some((ordering, newChild)) => Some((ordering, p.withNewChildren(Seq(newChild))))
+        case _ => None
+      }
+    case p: ProjectExec =>
+      extractOrderingAndPropagateOrderingColumns(p.child) match {
+        case Some((ordering, newChild)) =>
+          val orderingCols = ordering.flatMap(_.references)
+          if (orderingCols.forall(p.outputSet.contains)) {
+            Some((ordering, p.withNewChildren(Seq(newChild))))
+          } else {
+            // In order to do the sort after shuffle, we must propagate the ordering columns in the
+            // pre-shuffle ProjectExec.
+            val missingCols = orderingCols.filterNot(p.outputSet.contains)
+            val newProj = p.copy(projectList = p.projectList ++ missingCols, child = newChild)
+            newProj.copyTagsFrom(p)
+            Some((ordering, newProj))
+          }
+        case _ => None
+      }
+    case _ => None
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 490184c93620a..d9b1a2136a5d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -31,12 +31,11 @@ import org.apache.spark.internal.LogKeys.EXTENDED_EXPLAIN_GENERATOR
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, ExtendedExplainGenerator, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{InternalRow, QueryPlanningTracker}
-import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
+import org.apache.spark.sql.catalyst.analysis.{LazyExpression, UnsupportedOperationChecker}
 import org.apache.spark.sql.catalyst.expressions.codegen.ByteCodeStats
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, Command, CommandResult, CreateTableAsSelect, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, ReturnAnswer, Union}
 import org.apache.spark.sql.catalyst.rules.{PlanChangeLogger, Rule}
-import org.apache.spark.sql.catalyst.trees.TreePattern.LAZY_ANALYSIS_EXPRESSION
 import org.apache.spark.sql.catalyst.util.StringUtils.PlanStringConcat
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.adaptive.{AdaptiveExecutionContext, InsertAdaptiveSparkPlan}
@@ -69,7 +68,10 @@ class QueryExecution(
   // TODO: Move the planner an optimizer into here from SessionState.
   protected def planner = sparkSession.sessionState.planner
 
-  lazy val isLazyAnalysis: Boolean = logical.containsAnyPattern(LAZY_ANALYSIS_EXPRESSION)
+  lazy val isLazyAnalysis: Boolean = {
+    // Only check the main query as subquery expression can be resolved now with the main query.
+    logical.exists(_.expressions.exists(_.exists(_.isInstanceOf[LazyExpression])))
+  }
 
   def assertAnalyzed(): Unit = {
     try {
@@ -90,12 +92,18 @@ class QueryExecution(
   }
 
   private val lazyAnalyzed = LazyTry {
-    val plan = executePhase(QueryPlanningTracker.ANALYSIS) {
-      // We can't clone `logical` here, which will reset the `_analyzed` flag.
-      sparkSession.sessionState.analyzer.executeAndCheck(logical, tracker)
+    try {
+      val plan = executePhase(QueryPlanningTracker.ANALYSIS) {
+        // We can't clone `logical` here, which will reset the `_analyzed` flag.
+        sparkSession.sessionState.analyzer.executeAndCheck(logical, tracker)
+      }
+      tracker.setAnalyzed(plan)
+      plan
+    } catch {
+      case NonFatal(e) =>
+        tracker.setAnalysisFailed(logical)
+        throw e
     }
-    tracker.setAnalyzed(plan)
-    plan
   }
 
   def analyzed: LogicalPlan = lazyAnalyzed.get
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index 6173703ef3cd9..a51870cfd7fdd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.optimizer._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.execution.datasources.{PruneFileSourcePartitions, SchemaPruning, V1Writes}
+import org.apache.spark.sql.execution.datasources.{PruneFileSourcePartitions, PushVariantIntoScan, SchemaPruning, V1Writes}
 import org.apache.spark.sql.execution.datasources.v2.{GroupBasedRowLevelOperationScanPlanning, OptimizeMetadataOnlyDeleteFromTable, V2ScanPartitioningAndOrdering, V2ScanRelationPushDown, V2Writes}
 import org.apache.spark.sql.execution.dynamicpruning.{CleanupDynamicPruningFilters, PartitionPruning, RowLevelOperationRuntimeGroupFiltering}
 import org.apache.spark.sql.execution.python.{ExtractGroupingPythonUDFFromAggregate, ExtractPythonUDFFromAggregate, ExtractPythonUDFs, ExtractPythonUDTFs}
@@ -36,38 +36,42 @@ class SparkOptimizer(
 
   override def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] =
     // TODO: move SchemaPruning into catalyst
-    Seq(SchemaPruning) :+
-      GroupBasedRowLevelOperationScanPlanning :+
-      V1Writes :+
-      V2ScanRelationPushDown :+
-      V2ScanPartitioningAndOrdering :+
-      V2Writes :+
-      PruneFileSourcePartitions
+    Seq(
+      SchemaPruning,
+      GroupBasedRowLevelOperationScanPlanning,
+      V1Writes,
+      V2ScanRelationPushDown,
+      V2ScanPartitioningAndOrdering,
+      V2Writes,
+      PruneFileSourcePartitions,
+      PushVariantIntoScan)
 
   override def preCBORules: Seq[Rule[LogicalPlan]] =
-    OptimizeMetadataOnlyDeleteFromTable :: Nil
+    Seq(OptimizeMetadataOnlyDeleteFromTable)
 
-  override def defaultBatches: Seq[Batch] = (preOptimizationBatches ++ super.defaultBatches :+
-    Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+
+  override def defaultBatches: Seq[Batch] = flattenBatches(Seq(
+    preOptimizationBatches,
+    super.defaultBatches,
+    Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)),
     Batch("PartitionPruning", Once,
       PartitionPruning,
       // We can't run `OptimizeSubqueries` in this batch, as it will optimize the subqueries
       // twice which may break some optimizer rules that can only be applied once. The rule below
       // only invokes `OptimizeSubqueries` to optimize newly added subqueries.
-      new RowLevelOperationRuntimeGroupFiltering(OptimizeSubqueries)) :+
+      new RowLevelOperationRuntimeGroupFiltering(OptimizeSubqueries)),
     Batch("InjectRuntimeFilter", FixedPoint(1),
-      InjectRuntimeFilter) :+
+      InjectRuntimeFilter),
     Batch("MergeScalarSubqueries", Once,
       MergeScalarSubqueries,
-      RewriteDistinctAggregates) :+
+      RewriteDistinctAggregates),
     Batch("Pushdown Filters from PartitionPruning", fixedPoint,
-      PushDownPredicates) :+
+      PushDownPredicates),
     Batch("Cleanup filters that cannot be pushed down", Once,
       CleanupDynamicPruningFilters,
       // cleanup the unnecessary TrueLiteral predicates
       BooleanSimplification,
-      PruneFilters)) ++
-    postHocOptimizationBatches :+
+      PruneFilters),
+    postHocOptimizationBatches,
     Batch("Extract Python UDFs", Once,
       ExtractPythonUDFFromJoinCondition,
       // `ExtractPythonUDFFromJoinCondition` can convert a join to a cartesian product.
@@ -84,25 +88,27 @@ class SparkOptimizer(
       LimitPushDown,
       PushPredicateThroughNonJoin,
       PushProjectionThroughLimit,
-      RemoveNoopOperators) :+
+      RemoveNoopOperators),
     Batch("Infer window group limit", Once,
       InferWindowGroupLimit,
       LimitPushDown,
       LimitPushDownThroughWindow,
       EliminateLimits,
-      ConstantFolding) :+
-    Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) :+
-    Batch("Replace CTE with Repartition", Once, ReplaceCTERefWithRepartition)
+      ConstantFolding),
+    Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*),
+    Batch("Replace CTE with Repartition", Once, ReplaceCTERefWithRepartition)))
 
-  override def nonExcludableRules: Seq[String] = super.nonExcludableRules :+
-    ExtractPythonUDFFromJoinCondition.ruleName :+
-    ExtractPythonUDFFromAggregate.ruleName :+ ExtractGroupingPythonUDFFromAggregate.ruleName :+
-    ExtractPythonUDFs.ruleName :+
-    GroupBasedRowLevelOperationScanPlanning.ruleName :+
-    V2ScanRelationPushDown.ruleName :+
-    V2ScanPartitioningAndOrdering.ruleName :+
-    V2Writes.ruleName :+
-    ReplaceCTERefWithRepartition.ruleName
+  override def nonExcludableRules: Seq[String] = super.nonExcludableRules ++
+    Seq(
+      ExtractPythonUDFFromJoinCondition.ruleName,
+      ExtractPythonUDFFromAggregate.ruleName,
+      ExtractGroupingPythonUDFFromAggregate.ruleName,
+      ExtractPythonUDFs.ruleName,
+      GroupBasedRowLevelOperationScanPlanning.ruleName,
+      V2ScanRelationPushDown.ruleName,
+      V2ScanPartitioningAndOrdering.ruleName,
+      V2Writes.ruleName,
+      ReplaceCTERefWithRepartition.ruleName)
 
   /**
    * Optimization batches that are executed before the regular optimization batches (also before
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
index da3159319f98e..5dfe85548349c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
@@ -25,10 +25,13 @@ import org.apache.spark.sql.execution.adaptive.LogicalQueryStageStrategy
 import org.apache.spark.sql.execution.command.v2.V2CommandStrategy
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, FileSourceStrategy}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy
+import org.apache.spark.sql.internal.SQLConf
 
 class SparkPlanner(val session: SparkSession, val experimentalMethods: ExperimentalMethods)
   extends SparkStrategies with SQLConfHelper {
 
+  override def conf: SQLConf = session.sessionState.conf
+
   def numPartitions: Int = conf.numShufflePartitions
 
   override def strategies: Seq[Strategy] =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 9fbe400a555fc..2b7be9b34b9aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -27,7 +27,7 @@ import org.antlr.v4.runtime.tree.TerminalNode
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, PersistedView, PlanWithUnresolvedIdentifier, SchemaEvolution, SchemaTypeEvolution, UnresolvedFunctionName, UnresolvedIdentifier, UnresolvedNamespace}
+import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, PersistedView, PlanWithUnresolvedIdentifier, SchemaEvolution, SchemaTypeEvolution, UnresolvedAttribute, UnresolvedFunctionName, UnresolvedIdentifier, UnresolvedNamespace}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
 import org.apache.spark.sql.catalyst.parser._
@@ -63,7 +63,7 @@ class SparkSqlAstBuilder extends AstBuilder {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   private val configKeyValueDef = """([a-zA-Z_\d\\.:]+)\s*=([^;]*);*""".r
-  private val configKeyDef = """([a-zA-Z_\d\\.:]+)$""".r
+  private val configKeyDef = """([a-zA-Z_\d\\.:]+)\s*$""".r
   private val configValueDef = """([^;]*);*""".r
   private val strLiteralDef = """(".*?[^\\]"|'.*?[^\\]'|[^ \n\r\t"']+)""".r
 
@@ -106,14 +106,14 @@ class SparkSqlAstBuilder extends AstBuilder {
         SetCommand(Some(keyStr -> None))
       }
     } else {
-      remainder(ctx.SET.getSymbol).trim match {
+      remainder(ctx.SET.getSymbol).trim.replaceAll(";+$", "") match {
         case configKeyValueDef(key, value) =>
           SetCommand(Some(key -> Option(value.trim)))
         case configKeyDef(key) =>
           SetCommand(Some(key -> None))
-        case s if s == "-v" =>
+        case s if s.trim == "-v" =>
           SetCommand(Some("-v" -> None))
-        case s if s.isEmpty =>
+        case s if s.trim.isEmpty =>
           SetCommand(None)
         case _ => throw QueryParsingErrors.unexpectedFormatForSetConfigurationError(ctx)
       }
@@ -146,7 +146,7 @@ class SparkSqlAstBuilder extends AstBuilder {
    */
   override def visitResetConfiguration(
       ctx: ResetConfigurationContext): LogicalPlan = withOrigin(ctx) {
-    remainder(ctx.RESET.getSymbol).trim match {
+    remainder(ctx.RESET.getSymbol).trim.replaceAll(";+$", "") match {
       case configKeyDef(key) =>
         ResetCommand(Some(key))
       case s if s.trim.isEmpty =>
@@ -377,7 +377,7 @@ class SparkSqlAstBuilder extends AstBuilder {
         invalidStatement("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx)
       }
 
-      val (_, _, _, _, options, location, _, _, _) =
+      val (_, _, _, _, options, location, _, _, _, _) =
         visitCreateTableClauses(ctx.createTableClauses())
       val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText).getOrElse(
         throw QueryParsingErrors.createTempTableNotSpecifyProviderError(ctx))
@@ -520,6 +520,7 @@ class SparkSqlAstBuilder extends AstBuilder {
    *
    *   create_view_clauses (order insensitive):
    *     [COMMENT view_comment]
+   *     [DEFAULT COLLATION collation_name]
    *     [TBLPROPERTIES (property_name = property_value, ...)]
    * }}}
    */
@@ -529,6 +530,7 @@ class SparkSqlAstBuilder extends AstBuilder {
     }
 
     checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx)
+    checkDuplicateClauses(ctx.collationSpec(), "DEFAULT COLLATION", ctx)
     checkDuplicateClauses(ctx.schemaBinding(), "WITH SCHEMA", ctx)
     checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED ON", ctx)
     checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
@@ -584,6 +586,7 @@ class SparkSqlAstBuilder extends AstBuilder {
         withIdentClause(ctx.identifierReference(), UnresolvedIdentifier(_)),
         userSpecifiedColumns,
         visitCommentSpecList(ctx.commentSpec()),
+        visitCollationSpecList(ctx.collationSpec()),
         properties,
         Some(originalText),
         qPlan,
@@ -609,6 +612,7 @@ class SparkSqlAstBuilder extends AstBuilder {
           tableIdentifier,
           userSpecifiedColumns,
           visitCommentSpecList(ctx.commentSpec()),
+          visitCollationSpecList(ctx.collationSpec()),
           properties,
           Option(source(ctx.query)),
           otherPlans.head,
@@ -719,8 +723,19 @@ class SparkSqlAstBuilder extends AstBuilder {
 
       withIdentClause(ctx.identifierReference(), functionIdentifier => {
         if (ctx.TEMPORARY == null) {
-          // TODO: support creating persistent UDFs.
-          operationNotAllowed(s"creating persistent SQL functions is not supported", ctx)
+          CreateUserDefinedFunction(
+            UnresolvedIdentifier(functionIdentifier),
+            inputParamText,
+            returnTypeText,
+            exprText,
+            queryText,
+            comment,
+            deterministic,
+            containsSQL,
+            language,
+            isTableFunc,
+            ctx.EXISTS != null,
+            ctx.REPLACE != null)
         } else {
           // Disallow to define a temporary function with `IF NOT EXISTS`
           if (ctx.EXISTS != null) {
@@ -1138,4 +1153,46 @@ class SparkSqlAstBuilder extends AstBuilder {
       withIdentClause(ctx.identifierReference(), UnresolvedNamespace(_)),
       cleanedProperties)
   }
+
+  /**
+   * Create a [[DescribeColumn]] or [[DescribeRelation]] or [[DescribeRelationAsJsonCommand]]
+   * command.
+   */
+  override def visitDescribeRelation(ctx: DescribeRelationContext): LogicalPlan = withOrigin(ctx) {
+    val isExtended = ctx.EXTENDED != null || ctx.FORMATTED != null
+    val asJson = ctx.JSON != null
+    if (asJson && !isExtended) {
+      val tableName = ctx.identifierReference.getText.split("\\.").lastOption.getOrElse("table")
+      throw QueryCompilationErrors.describeJsonNotExtendedError(tableName)
+    }
+    val relation = createUnresolvedTableOrView(ctx.identifierReference, "DESCRIBE TABLE")
+    if (ctx.describeColName != null) {
+      if (ctx.partitionSpec != null) {
+        throw QueryParsingErrors.descColumnForPartitionUnsupportedError(ctx)
+      } else if (asJson) {
+        throw QueryCompilationErrors.describeColJsonUnsupportedError()
+      } else {
+        DescribeColumn(
+          relation,
+          UnresolvedAttribute(ctx.describeColName.nameParts.asScala.map(_.getText).toSeq),
+          isExtended)
+      }
+    } else {
+      val partitionSpec = if (ctx.partitionSpec != null) {
+        // According to the syntax, visitPartitionSpec returns `Map[String, Option[String]]`.
+        visitPartitionSpec(ctx.partitionSpec).map {
+          case (key, Some(value)) => key -> value
+          case (key, _) =>
+            throw QueryParsingErrors.emptyPartitionKeyError(key, ctx.partitionSpec)
+        }
+      } else {
+        Map.empty[String, String]
+      }
+      if (asJson) {
+        DescribeRelationJsonCommand(relation, partitionSpec, isExtended)
+      } else {
+        DescribeRelation(relation, partitionSpec, isExtended)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 22082aca81a22..36e25773f8342 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -607,7 +607,12 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
             // [COUNT(DISTINCT bar), COUNT(DISTINCT foo)] is disallowed because those two distinct
             // aggregates have different column expressions.
             val distinctExpressions =
-              functionsWithDistinct.head.aggregateFunction.children.filterNot(_.foldable)
+            functionsWithDistinct.head.aggregateFunction.children
+              .filterNot(_.foldable)
+              .map {
+                case s: SortOrder => s.child
+                case e => e
+              }
             val normalizedNamedDistinctExpressions = distinctExpressions.map { e =>
               // Ideally this should be done in `NormalizeFloatingNumbers`, but we do it here
               // because `distinctExpressions` is not extracted during logical phase.
@@ -789,8 +794,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   object TransformWithStateInPandasStrategy extends Strategy {
     override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case t @ TransformWithStateInPandas(
-      func, _, outputAttrs, outputMode, timeMode, child,
-      hasInitialState, initialState, _, initialStateSchema) =>
+        func, _, outputAttrs, outputMode, timeMode, child,
+        hasInitialState, initialState, _, initialStateSchema) =>
         val execPlan = TransformWithStateInPandasExec(
           func, t.leftAttributes, outputAttrs, outputMode, timeMode,
           stateInfo = None,
@@ -798,6 +803,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           eventTimeWatermarkForLateEvents = None,
           eventTimeWatermarkForEviction = None,
           planLater(child),
+          isStreaming = true,
           hasInitialState,
           planLater(initialState),
           t.rightAttributes,
@@ -962,6 +968,12 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           keyEncoder, outputObjAttr, planLater(child), hasInitialState,
           initialStateGroupingAttrs, initialStateDataAttrs,
           initialStateDeserializer, planLater(initialState)) :: Nil
+      case t @ TransformWithStateInPandas(
+        func, _, outputAttrs, outputMode, timeMode, child,
+        hasInitialState, initialState, _, initialStateSchema) =>
+        TransformWithStateInPandasExec.generateSparkPlanForBatchQueries(func,
+          t.leftAttributes, outputAttrs, outputMode, timeMode, planLater(child), hasInitialState,
+          planLater(initialState), t.rightAttributes, initialStateSchema) :: Nil
 
       case _: FlatMapGroupsInPandasWithState =>
         // TODO(SPARK-40443): support applyInPandasWithState in batch query
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
index 1bbc26f3e52ed..3fdcb17bdeae6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
@@ -33,6 +33,8 @@ import org.apache.spark.util.Utils
  */
 case class CoalesceShufflePartitions(session: SparkSession) extends AQEShuffleReadRule {
 
+  override def conf: SQLConf = session.sessionState.conf
+
   override val supportedShuffleOrigins: Seq[ShuffleOrigin] =
     Seq(ENSURE_REQUIREMENTS, REPARTITION_BY_COL, REBALANCE_PARTITIONS_BY_NONE,
       REBALANCE_PARTITIONS_BY_COL)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
index 8517911d70262..73fc9b1fe4e2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
@@ -44,6 +44,8 @@ import org.apache.spark.sql.internal.SQLConf
 case class InsertAdaptiveSparkPlan(
     adaptiveExecutionContext: AdaptiveExecutionContext) extends Rule[SparkPlan] {
 
+  override def conf: SQLConf = adaptiveExecutionContext.session.sessionState.conf
+
   override def apply(plan: SparkPlan): SparkPlan = applyInternal(plan, false)
 
   private def applyInternal(plan: SparkPlan, isSubquery: Boolean): SparkPlan = plan match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveDynamicPruningFilters.scala
index 3d35abff3c538..77c180b18aee0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveDynamicPruningFilters.scala
@@ -25,12 +25,16 @@ import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.exchange.BroadcastExchangeExec
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, HashedRelationBroadcastMode, HashJoin}
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * A rule to insert dynamic pruning predicates in order to reuse the results of broadcast.
  */
 case class PlanAdaptiveDynamicPruningFilters(
     rootPlan: AdaptiveSparkPlanExec) extends Rule[SparkPlan] with AdaptiveSparkPlanHelper {
+
+  override def conf: SQLConf = rootPlan.context.session.sessionState.conf
+
   def apply(plan: SparkPlan): SparkPlan = {
     if (!conf.dynamicPartitionPruningEnabled) {
       return plan
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala
index 35a815d83922d..5f2638655c37c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala
@@ -30,7 +30,7 @@ case class PlanAdaptiveSubqueries(
   def apply(plan: SparkPlan): SparkPlan = {
     plan.transformAllExpressionsWithPruning(
       _.containsAnyPattern(SCALAR_SUBQUERY, IN_SUBQUERY, DYNAMIC_PRUNING_SUBQUERY)) {
-      case expressions.ScalarSubquery(_, _, exprId, _, _, _, _, _) =>
+      case expressions.ScalarSubquery(_, _, exprId, _, _, _, _) =>
         val subquery = SubqueryExec.createForScalarSubquery(
           s"subquery#${exprId.id}", subqueryMap(exprId.id))
         execution.ScalarSubquery(subquery, exprId)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
index bb7d904402ded..1ea4df0254673 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
@@ -61,7 +61,7 @@ object ShufflePartitionsUtil extends Logging {
     val targetSize = maxTargetSize.min(advisoryTargetSize).max(minPartitionSize)
 
     val shuffleIds = mapOutputStatistics.flatMap(_.map(_.shuffleId)).mkString(", ")
-    logInfo(log"For shuffle(${MDC(LogKeys.SHUFFLE_ID, shuffleIds)}, advisory target size: " +
+    logInfo(log"For shuffle(${MDC(LogKeys.SHUFFLE_IDS, shuffleIds)}, advisory target size: " +
       log"${MDC(LogKeys.ADVISORY_TARGET_SIZE, advisoryTargetSize)}, actual target size " +
       log"${MDC(LogKeys.TARGET_SIZE, targetSize)}, minimum partition size: " +
       log"${MDC(LogKeys.PARTITION_SIZE, minPartitionSize)}")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
index 09d9915022a65..1197a16a35e9b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
@@ -501,17 +501,17 @@ case class ScalaAggregator[IN, BUF, OUT](
   with Logging {
 
   // input and buffer encoders are resolved by ResolveEncodersInScalaAgg
-  private[this] lazy val inputDeserializer = inputEncoder.createDeserializer()
-  private[this] lazy val bufferSerializer = bufferEncoder.createSerializer()
-  private[this] lazy val bufferDeserializer = bufferEncoder.createDeserializer()
-  private[this] lazy val outputEncoder = encoderFor(agg.outputEncoder)
-  private[this] lazy val outputSerializer = outputEncoder.createSerializer()
+  @transient private[this] lazy val inputDeserializer = inputEncoder.createDeserializer()
+  @transient private[this] lazy val bufferSerializer = bufferEncoder.createSerializer()
+  @transient private[this] lazy val bufferDeserializer = bufferEncoder.createDeserializer()
+  @transient private[this] lazy val outputEncoder = encoderFor(agg.outputEncoder)
+  @transient private[this] lazy val outputSerializer = outputEncoder.createSerializer()
 
   def dataType: DataType = outputEncoder.objSerializer.dataType
 
   def inputTypes: Seq[DataType] = inputEncoder.schema.map(_.dataType)
 
-  override lazy val deterministic: Boolean = isDeterministic
+  @transient override lazy val deterministic: Boolean = isDeterministic
 
   def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ScalaAggregator[IN, BUF, OUT] =
     copy(mutableAggBufferOffset = newMutableAggBufferOffset)
@@ -519,7 +519,7 @@ case class ScalaAggregator[IN, BUF, OUT](
   def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ScalaAggregator[IN, BUF, OUT] =
     copy(inputAggBufferOffset = newInputAggBufferOffset)
 
-  private[this] lazy val inputProjection = UnsafeProjection.create(children)
+  @transient private[this] lazy val inputProjection = UnsafeProjection.create(children)
 
   def createAggregationBuffer(): BUF = agg.zero
 
@@ -533,7 +533,7 @@ case class ScalaAggregator[IN, BUF, OUT](
     if (outputEncoder.isSerializedAsStruct) row else row.get(0, dataType)
   }
 
-  private[this] lazy val bufferRow = new UnsafeRow(bufferEncoder.namedExpressions.length)
+  @transient private[this] lazy val bufferRow = new UnsafeRow(bufferEncoder.namedExpressions.length)
 
   def serialize(agg: BUF): Array[Byte] =
     bufferSerializer(agg).asInstanceOf[UnsafeRow].getBytes()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index 23555c98135f6..1268b14a32fb5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -140,6 +140,7 @@ case class AnalyzeColumnCommand(
     case DoubleType | FloatType => true
     case BooleanType => true
     case _: DatetimeType => true
+    case CharType(_) | VarcharType(_) => false
     case BinaryType | _: StringType => true
     case _ => false
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala
index d2aaa93fcca06..fe4e6f121f57b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala
@@ -17,9 +17,19 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.FunctionIdentifier
-import org.apache.spark.sql.catalyst.catalog.SQLFunction
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, SQLFunctionNode, UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.catalog.{SessionCatalog, SQLFunction, UserDefinedFunctionErrors}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Generator, LateralSubquery, Literal, ScalarSubquery, SubqueryExpression, WindowExpression}
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.plans.Inner
+import org.apache.spark.sql.catalyst.plans.logical.{LateralJoin, LogicalPlan, OneRowRelation, Project, UnresolvedWith}
+import org.apache.spark.sql.catalyst.trees.TreePattern.UNRESOLVED_ATTRIBUTE
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.execution.command.CreateUserDefinedFunctionCommand._
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
 
 /**
  * The DDL command that creates a SQL function.
@@ -52,10 +62,13 @@ case class CreateSQLFunctionCommand(
     replace: Boolean)
     extends CreateUserDefinedFunctionCommand {
 
-  override def run(sparkSession: SparkSession): Seq[Row] = {
-    import SQLFunction._
+  import SQLFunction._
 
+  override def run(sparkSession: SparkSession): Seq[Row] = {
     val parser = sparkSession.sessionState.sqlParser
+    val analyzer = sparkSession.sessionState.analyzer
+    val catalog = sparkSession.sessionState.catalog
+    val conf = sparkSession.sessionState.conf
 
     val inputParam = inputParamText.map(parser.parseTableSchema)
     val returnType = parseReturnTypeText(returnTypeText, isTableFunc, parser)
@@ -72,8 +85,332 @@ case class CreateSQLFunctionCommand(
       isTableFunc,
       Map.empty)
 
-    // TODO: Implement the rest of the method.
+    val newFunction = {
+      val (expression, query) = function.getExpressionAndQuery(parser, isTableFunc)
+      assert(query.nonEmpty || expression.nonEmpty)
+
+      // Check if the function can be replaced.
+      if (replace && catalog.functionExists(name)) {
+        checkFunctionSignatures(catalog, name)
+      }
+
+      // Build function input.
+      val inputPlan = if (inputParam.isDefined) {
+        val param = inputParam.get
+        checkParameterNotNull(param, inputParamText.get)
+        checkParameterNameDuplication(param, conf, name)
+        checkDefaultsTrailing(param, name)
+
+        // Qualify the input parameters with the function name so that attributes referencing
+        // the function input parameters can be resolved correctly.
+        val qualifier = Seq(name.funcName)
+        val input = param.map(p => Alias(
+          {
+            val defaultExpr = p.getDefault()
+            if (defaultExpr.isEmpty) {
+              Literal.create(null, p.dataType)
+            } else {
+              val defaultPlan = parseDefault(defaultExpr.get, parser)
+              if (SubqueryExpression.hasSubquery(defaultPlan)) {
+                throw new AnalysisException(
+                  errorClass = "USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_EXPRESSION",
+                  messageParameters =
+                    Map("functionName" -> name.funcName, "parameterName" -> p.name))
+              } else if (defaultPlan.containsPattern(UNRESOLVED_ATTRIBUTE)) {
+                // TODO(SPARK-50698): use parsed expression instead of expression string.
+                defaultPlan.collect {
+                  case a: UnresolvedAttribute =>
+                    throw QueryCompilationErrors.unresolvedAttributeError(
+                      "UNRESOLVED_COLUMN", a.sql, Seq.empty, a.origin)
+                }
+              }
+              Cast(defaultPlan, p.dataType)
+            }
+          }, p.name)(qualifier = qualifier))
+        Project(input, OneRowRelation())
+      } else {
+        OneRowRelation()
+      }
+
+      // Build the function body and check if the function body can be analyzed successfully.
+      val (unresolvedPlan, analyzedPlan, inferredReturnType) = if (!isTableFunc) {
+        // Build SQL scalar function plan.
+        val outputExpr = if (query.isDefined) ScalarSubquery(query.get) else expression.get
+        val plan: LogicalPlan = returnType.map { t =>
+          val retType: DataType = t match {
+            case Left(t) => t
+            case _ => throw SparkException.internalError(
+              "Unexpected return type for a scalar SQL UDF.")
+          }
+          val outputCast = Seq(Alias(Cast(outputExpr, retType), name.funcName)())
+          Project(outputCast, inputPlan)
+        }.getOrElse {
+          // If no explicit RETURNS clause is present, infer the result type from the function body.
+          val outputAlias = Seq(Alias(outputExpr, name.funcName)())
+          Project(outputAlias, inputPlan)
+        }
+
+        // Check the function body can be analyzed correctly.
+        val analyzed = analyzer.execute(plan)
+        val (resolved, resolvedReturnType) = analyzed match {
+          case p @ Project(expr :: Nil, _) if expr.resolved =>
+            (p, Left(expr.dataType))
+          case other =>
+            (other, function.returnType)
+        }
+
+        // Check if the SQL function body contains aggregate/window functions.
+        // This check needs to be performed before checkAnalysis to provide better error messages.
+        checkAggOrWindowOrGeneratorExpr(resolved)
+
+        // Check if the SQL function body can be analyzed.
+        checkFunctionBodyAnalysis(analyzer, function, resolved)
+
+        (plan, resolved, resolvedReturnType)
+      } else {
+        // Build SQL table function plan.
+        if (query.isEmpty) {
+          throw UserDefinedFunctionErrors.bodyIsNotAQueryForSqlTableUdf(name.funcName)
+        }
+
+        // Construct a lateral join to analyze the function body.
+        val plan = LateralJoin(inputPlan, LateralSubquery(query.get), Inner, None)
+        val analyzed = analyzer.execute(plan)
+        val newPlan = analyzed match {
+          case Project(_, j: LateralJoin) => j
+          case j: LateralJoin => j
+          case _ => throw SparkException.internalError("Unexpected plan returned when " +
+            s"creating a SQL TVF: ${analyzed.getClass.getSimpleName}.")
+        }
+        val maybeResolved = newPlan.asInstanceOf[LateralJoin].right.plan
+
+        // Check if the function body can be analyzed.
+        checkFunctionBodyAnalysis(analyzer, function, maybeResolved)
+
+        // Get the function's return schema.
+        val returnParam: StructType = returnType.map {
+          case Right(t) => t
+          case Left(_) => throw SparkException.internalError(
+            "Unexpected return schema for a SQL table function.")
+        }.getOrElse {
+          // If no explicit RETURNS clause is present, infer the result type from the function body.
+          // To detect this, we search for instances of the UnresolvedAlias expression. Examples:
+          // CREATE TABLE t USING PARQUET AS VALUES (0, 1), (1, 2) AS tab(c1, c2);
+          // SELECT c1 FROM t           -->  UnresolvedAttribute: 'c1
+          // SELECT c1 + 1 FROM t       -->  UnresolvedAlias: unresolvedalias(('c1 + 1), None)
+          // SELECT c1 + 1 AS a FROM t  -->  Alias: ('c1 + 1) AS a#2
+          query.get match {
+            case Project(projectList, _) if projectList.exists(_.isInstanceOf[UnresolvedAlias]) =>
+              throw UserDefinedFunctionErrors.missingColumnNamesForSqlTableUdf(name.funcName)
+            case _ =>
+              StructType(analyzed.asInstanceOf[LateralJoin].right.plan.output.map { col =>
+                StructField(col.name, col.dataType)
+              })
+          }
+        }
+
+        // Check the return columns cannot have NOT NULL specified.
+        checkParameterNotNull(returnParam, returnTypeText)
+
+        // Check duplicated return column names.
+        checkReturnsColumnDuplication(returnParam, conf, name)
+
+        // Check if the actual output size equals to the number of return parameters.
+        val outputSize = maybeResolved.output.size
+        if (outputSize != returnParam.size) {
+          throw new AnalysisException(
+            errorClass = "USER_DEFINED_FUNCTIONS.RETURN_COLUMN_COUNT_MISMATCH",
+            messageParameters = Map(
+              "outputSize" -> s"$outputSize",
+              "returnParamSize" -> s"${returnParam.size}",
+              "name" -> s"$name"
+            )
+          )
+        }
+
+        (plan, analyzed, Right(returnParam))
+      }
+
+      // A permanent function is not allowed to reference temporary objects.
+      // This should be called after `qe.assertAnalyzed()` (i.e., `plan` can be resolved)
+      verifyTemporaryObjectsNotExists(catalog, isTemp, name, unresolvedPlan, analyzedPlan)
+
+      // Generate function properties.
+      val properties = generateFunctionProperties(sparkSession, unresolvedPlan, analyzedPlan)
+
+      // Derive determinism of the SQL function.
+      val deterministic = analyzedPlan.deterministic
+
+      function.copy(
+        // Assign the return type, inferring from the function body if needed.
+        returnType = inferredReturnType,
+        deterministic = Some(function.deterministic.getOrElse(deterministic)),
+        properties = properties
+      )
+    }
+
+    if (isTemp) {
+      if (isTableFunc) {
+        catalog.registerSQLTableFunction(newFunction, overrideIfExists = replace)
+      } else {
+        catalog.registerSQLScalarFunction(newFunction, overrideIfExists = replace)
+      }
+    } else {
+      if (replace && catalog.functionExists(name)) {
+        // Hive metastore alter function method does not alter function resources
+        // so the existing function must be dropped first when replacing a SQL function.
+        assert(!ignoreIfExists)
+        catalog.dropFunction(name, ignoreIfExists)
+      }
+      // For a persistent function, we will store the metadata into underlying external catalog.
+      // This function will be loaded into the FunctionRegistry when a query uses it.
+      // We do not load it into FunctionRegistry right now, to avoid loading the resource
+      // immediately, as the Spark application to create the function may not have
+      // access to the function.
+      catalog.createUserDefinedFunction(newFunction, ignoreIfExists)
+    }
 
     Seq.empty
   }
+
+  /**
+   * Check if the function body can be analyzed.
+   */
+  private def checkFunctionBodyAnalysis(
+      analyzer: Analyzer,
+      function: SQLFunction,
+      body: LogicalPlan): Unit = {
+    analyzer.checkAnalysis(SQLFunctionNode(function, body))
+  }
+
+  /** Check whether the new function is replacing an existing SQL function. */
+  private def checkFunctionSignatures(catalog: SessionCatalog, name: FunctionIdentifier): Unit = {
+    val info = catalog.lookupFunctionInfo(name)
+    if (!isSQLFunction(info.getClassName)) {
+      throw new AnalysisException(
+        errorClass = "USER_DEFINED_FUNCTIONS.CANNOT_REPLACE_NON_SQL_UDF_WITH_SQL_UDF",
+        messageParameters = Map("name" -> s"$name")
+      )
+    }
+  }
+
+  /**
+   * Collect all temporary views and functions and return the identifiers separately
+   * This func traverses the unresolved plan `child`. Below are the reasons:
+   * 1) Analyzer replaces unresolved temporary views by a SubqueryAlias with the corresponding
+   * logical plan. After replacement, it is impossible to detect whether the SubqueryAlias is
+   * added/generated from a temporary view.
+   * 2) The temp functions are represented by multiple classes. Most are inaccessible from this
+   * package (e.g., HiveGenericUDF).
+   * 3) Temporary SQL functions, once resolved, cannot be identified as temp functions.
+   */
+  private def collectTemporaryObjectsInUnresolvedPlan(
+      catalog: SessionCatalog,
+      child: LogicalPlan): (Seq[Seq[String]], Seq[String]) = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    def collectTempViews(child: LogicalPlan): Seq[Seq[String]] = {
+      child.flatMap {
+        case UnresolvedRelation(nameParts, _, _) if catalog.isTempView(nameParts) =>
+          Seq(nameParts)
+        case w: UnresolvedWith if !w.resolved => w.innerChildren.flatMap(collectTempViews)
+        case plan if !plan.resolved => plan.expressions.flatMap(_.flatMap {
+          case e: SubqueryExpression => collectTempViews(e.plan)
+          case _ => Seq.empty
+        })
+        case _ => Seq.empty
+      }.distinct
+    }
+
+    def collectTempFunctions(child: LogicalPlan): Seq[String] = {
+      child.flatMap {
+        case w: UnresolvedWith if !w.resolved => w.innerChildren.flatMap(collectTempFunctions)
+        case plan if !plan.resolved =>
+          plan.expressions.flatMap(_.flatMap {
+            case e: SubqueryExpression => collectTempFunctions(e.plan)
+            case e: UnresolvedFunction
+              if catalog.isTemporaryFunction(e.nameParts.asFunctionIdentifier) =>
+              Seq(e.nameParts.asFunctionIdentifier.funcName)
+            case _ => Seq.empty
+          })
+        case _ => Seq.empty
+      }.distinct
+    }
+    (collectTempViews(child), collectTempFunctions(child))
+  }
+
+  /**
+   * Permanent functions are not allowed to reference temp objects, including temp functions
+   * and temp views.
+   */
+  private def verifyTemporaryObjectsNotExists(
+      catalog: SessionCatalog,
+      isTemporary: Boolean,
+      name: FunctionIdentifier,
+      child: LogicalPlan,
+      analyzed: LogicalPlan): Unit = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    if (!isTemporary) {
+      val (tempViews, tempFunctions) = collectTemporaryObjectsInUnresolvedPlan(catalog, child)
+      tempViews.foreach { nameParts =>
+        throw UserDefinedFunctionErrors.invalidTempViewReference(
+          routineName = name.asMultipart, tempViewName = nameParts)
+      }
+      tempFunctions.foreach { funcName =>
+        throw UserDefinedFunctionErrors.invalidTempFuncReference(
+          routineName = name.asMultipart, tempFuncName = funcName)
+      }
+      val tempVars = ViewHelper.collectTemporaryVariables(analyzed)
+      tempVars.foreach { varName =>
+        throw UserDefinedFunctionErrors.invalidTempVarReference(
+          routineName = name.asMultipart, varName = varName)
+      }
+    }
+  }
+
+  /**
+   * Check if the SQL function body contains aggregate/window/generate functions.
+   * Note subqueries inside the SQL function body can contain aggregate/window/generate functions.
+   */
+  private def checkAggOrWindowOrGeneratorExpr(plan: LogicalPlan): Unit = {
+    if (plan.resolved) {
+      plan.transformAllExpressions {
+        case e if e.isInstanceOf[WindowExpression] || e.isInstanceOf[Generator] ||
+          e.isInstanceOf[AggregateExpression] =>
+          throw new AnalysisException(
+            errorClass = "USER_DEFINED_FUNCTIONS.CANNOT_CONTAIN_COMPLEX_FUNCTIONS",
+            messageParameters = Map("queryText" -> s"${exprText.orElse(queryText).get}")
+          )
+      }
+    }
+  }
+
+  /**
+   * Generate the function properties, including:
+   * 1. the SQL configs when creating the function.
+   * 2. the catalog and database name when creating the function. This will be used to provide
+   *    context during nested function resolution.
+   * 3. referred temporary object names if the function is a temp function.
+   */
+  private def generateFunctionProperties(
+      session: SparkSession,
+      plan: LogicalPlan,
+      analyzed: LogicalPlan): Map[String, String] = {
+    val catalog = session.sessionState.catalog
+    val conf = session.sessionState.conf
+    val manager = session.sessionState.catalogManager
+
+    // Only collect temporary object names when the function is a temp function.
+    val (tempViews, tempFunctions) = if (isTemp) {
+      collectTemporaryObjectsInUnresolvedPlan(catalog, plan)
+    } else {
+      (Nil, Nil)
+    }
+    val tempVars = ViewHelper.collectTemporaryVariables(analyzed)
+
+    sqlConfigsToProps(conf) ++
+      catalogAndNamespaceToProps(
+        manager.currentCatalog.name,
+        manager.currentNamespace.toIndexedSeq) ++
+      referredTempNamesToProps(tempViews, tempFunctions, tempVars)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateUserDefinedFunctionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateUserDefinedFunctionCommand.scala
index bebb0f5cf6c38..1ee3c8a4c388f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateUserDefinedFunctionCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateUserDefinedFunctionCommand.scala
@@ -17,9 +17,15 @@
 
 package org.apache.spark.sql.execution.command
 
+import java.util.Locale
+
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.catalog.{LanguageSQL, RoutineLanguage, UserDefinedFunctionErrors}
+import org.apache.spark.sql.catalyst.catalog.UserDefinedFunction._
 import org.apache.spark.sql.catalyst.plans.logical.IgnoreCachedData
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StructType
 
 /**
  * The base class for CreateUserDefinedFunctionCommand
@@ -74,4 +80,108 @@ object CreateUserDefinedFunctionCommand {
         throw UserDefinedFunctionErrors.unsupportedUserDefinedFunction(other)
     }
   }
+
+  /**
+   * Convert SQL configs to properties by prefixing all configs with a key.
+   * When converting a function to [[org.apache.spark.sql.catalyst.catalog.CatalogFunction]] or
+   * [[org.apache.spark.sql.catalyst.expressions.ExpressionInfo]], all SQL configs and other
+   * function properties (such as the function parameters and the function return type)
+   * are saved together in a property map.
+   */
+  def sqlConfigsToProps(conf: SQLConf): Map[String, String] = {
+    val modifiedConfs = ViewHelper.getModifiedConf(conf)
+    modifiedConfs.map { case (key, value) => s"$SQL_CONFIG_PREFIX$key" -> value }
+  }
+
+  /**
+   * Check whether the function parameters contain duplicated column names.
+   * It takes the function input parameter struct as input and verifies that there is no duplicates
+   * in the parameter column names.
+   * If any duplicates are found, it throws an exception with helpful information for users to
+   * fix the wrong function parameters.
+   *
+   * Perform this check while registering the function to fail early.
+   * This check does not need to run the function itself.
+   */
+  def checkParameterNameDuplication(
+      param: StructType,
+      conf: SQLConf,
+      name: FunctionIdentifier): Unit = {
+    val names = if (conf.caseSensitiveAnalysis) {
+      param.fields.map(_.name)
+    } else {
+      param.fields.map(_.name.toLowerCase(Locale.ROOT))
+    }
+    if (names.distinct.length != names.length) {
+      val duplicateColumns = names.groupBy(identity).collect {
+        case (x, ys) if ys.length > 1 => s"`$x`"
+      }
+      throw UserDefinedFunctionErrors.duplicateParameterNames(
+        routineName = name.funcName,
+        names = duplicateColumns.toSeq.sorted.mkString(", "))
+    }
+  }
+
+  /**
+   * Check whether the function has duplicate column names in the RETURNS clause.
+   */
+  def checkReturnsColumnDuplication(
+      columns: StructType,
+      conf: SQLConf,
+      name: FunctionIdentifier): Unit = {
+    val names = if (conf.caseSensitiveAnalysis) {
+      columns.fields.map(_.name)
+    } else {
+      columns.fields.map(_.name.toLowerCase(Locale.ROOT))
+    }
+    if (names.distinct.length != names.length) {
+      val duplicateColumns = names.groupBy(identity).collect {
+        case (x, ys) if ys.length > 1 => s"`$x`"
+      }
+      throw UserDefinedFunctionErrors.duplicateReturnsColumns(
+        routineName = name.funcName,
+        columns = duplicateColumns.toSeq.sorted.mkString(", "))
+    }
+  }
+
+  /**
+   * Check whether the function parameters contain non trailing defaults.
+   * For languages that support default values for input parameters,
+   * this check ensures once a default value is given to a parameter,
+   * all subsequent parameters must also have a default value. It throws error if otherwise.
+   *
+   * Perform this check on function input parameters while registering the function to fail early.
+   * This check does not need to run the function itself.
+   */
+  def checkDefaultsTrailing(param: StructType, name: FunctionIdentifier): Unit = {
+    var defaultFound = false
+    var previousParamName = "";
+    param.fields.foreach { field =>
+      if (field.getDefault().isEmpty && defaultFound) {
+        throw new AnalysisException(
+          errorClass = "USER_DEFINED_FUNCTIONS.NOT_A_VALID_DEFAULT_PARAMETER_POSITION",
+          messageParameters = Map(
+            "functionName" -> name.funcName,
+            "parameterName" -> previousParamName,
+            "nextParameterName" -> field.name))
+      }
+      defaultFound |= field.getDefault().isDefined
+      previousParamName = field.name
+    }
+  }
+
+  /**
+   * Check whether the function input or return columns (for TABLE Return type) have NOT NULL
+   * specified. Throw exception if NOT NULL is found.
+   *
+   * Perform this check on function input and return parameters while registering the function
+   * to fail early. This check does not need to run the function itself.
+   */
+  def checkParameterNotNull(param: StructType, input: String): Unit = {
+    param.fields.foreach { field =>
+      if (!field.nullable) {
+        throw UserDefinedFunctionErrors.cannotSpecifyNotNullOnFunctionParameters(input)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DescribeRelationJsonCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DescribeRelationJsonCommand.scala
new file mode 100644
index 0000000000000..6abe34f0ea156
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DescribeRelationJsonCommand.scala
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import scala.collection.mutable
+
+import org.json4s._
+import org.json4s.JsonAST.JObject
+import org.json4s.jackson.JsonMethods._
+
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.catalyst.analysis.{ResolvedPersistentView, ResolvedTable, ResolvedTempView}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.quoteIfNeeded
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.sql.connector.catalog.V1Table
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.PartitioningUtils
+
+/**
+ * The command for `DESCRIBE ... AS JSON`.
+ */
+case class DescribeRelationJsonCommand(
+    child: LogicalPlan,
+    partitionSpec: TablePartitionSpec,
+    isExtended: Boolean,
+    override val output: Seq[Attribute] = Seq(
+      AttributeReference(
+        "json_metadata",
+        StringType,
+        nullable = false,
+        new MetadataBuilder().putString("comment", "JSON metadata of the table").build())()
+    )) extends UnaryRunnableCommand {
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    val jsonMap = mutable.LinkedHashMap[String, JValue]()
+    child match {
+      case v: ResolvedTempView =>
+        if (partitionSpec.nonEmpty) {
+          throw QueryCompilationErrors.descPartitionNotAllowedOnTempView(v.identifier.name())
+        }
+        describeIdentifier(Seq("system", "session", v.identifier.name()), jsonMap)
+        describeColsJson(v.metadata.schema, jsonMap)
+        describeFormattedTableInfoJson(v.metadata, jsonMap)
+
+      case v: ResolvedPersistentView =>
+        if (partitionSpec.nonEmpty) {
+          throw QueryCompilationErrors.descPartitionNotAllowedOnView(v.identifier.name())
+        }
+        describeIdentifier(v.identifier.toQualifiedNameParts(v.catalog), jsonMap)
+        describeColsJson(v.metadata.schema, jsonMap)
+        describeFormattedTableInfoJson(v.metadata, jsonMap)
+
+      case ResolvedTable(catalog, identifier, V1Table(metadata), _) =>
+        describeIdentifier(identifier.toQualifiedNameParts(catalog), jsonMap)
+        val schema = if (metadata.schema.isEmpty) {
+          // In older versions of Spark,
+          // the table schema can be empty and should be inferred at runtime.
+          sparkSession.table(metadata.identifier).schema
+        } else {
+          metadata.schema
+        }
+        describeColsJson(schema, jsonMap)
+        describeClusteringInfoJson(metadata, jsonMap)
+        if (partitionSpec.nonEmpty) {
+          // Outputs the partition-specific info for the DDL command:
+          // "DESCRIBE [EXTENDED|FORMATTED] table_name PARTITION (partitionVal*)"
+          describePartitionInfoJson(
+            sparkSession, sparkSession.sessionState.catalog, metadata, jsonMap)
+        } else {
+          describeFormattedTableInfoJson(metadata, jsonMap)
+        }
+
+      case _ => throw QueryCompilationErrors.describeAsJsonNotSupportedForV2TablesError()
+    }
+
+    Seq(Row(compact(render(JObject(jsonMap.toList)))))
+  }
+
+  private def addKeyValueToMap(
+      key: String,
+      value: JValue,
+      jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = {
+    // Rename some JSON keys that are pre-named in describe table implementation
+    val renames = Map(
+      "inputformat" -> "input_format",
+      "outputformat" -> "output_format"
+    )
+
+    val normalizedKey = key.toLowerCase().replace(" ", "_")
+    val renamedKey = renames.getOrElse(normalizedKey, normalizedKey)
+
+    if (!jsonMap.contains(renamedKey) && !excludedKeys.contains(renamedKey)) {
+      jsonMap += renamedKey -> value
+    }
+  }
+
+  private def describeIdentifier(
+      ident: Seq[String],
+      jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = {
+    addKeyValueToMap("table_name", JString(ident.last), jsonMap)
+    addKeyValueToMap("catalog_name", JString(ident.head), jsonMap)
+    val namespace = ident.init.tail
+    addKeyValueToMap("namespace", JArray(namespace.map(JString).toList), jsonMap)
+    if (namespace.nonEmpty) {
+      addKeyValueToMap("schema_name", JString(namespace.last), jsonMap)
+    }
+  }
+
+  /**
+   * Util to recursively form JSON string representation of data type, used for DESCRIBE AS JSON.
+   * Differs from `json` in DataType.scala by providing additional fields for some types.
+   */
+  private def jsonType(dataType: DataType): JValue = {
+    dataType match {
+      case arrayType: ArrayType =>
+        JObject(
+          "name" -> JString("array"),
+          "element_type" -> jsonType(arrayType.elementType),
+          "element_nullable" -> JBool(arrayType.containsNull)
+        )
+
+      case mapType: MapType =>
+        JObject(
+          "name" -> JString("map"),
+          "key_type" -> jsonType(mapType.keyType),
+          "value_type" -> jsonType(mapType.valueType),
+          "value_nullable" -> JBool(mapType.valueContainsNull)
+        )
+
+      case structType: StructType =>
+        val fieldsJson = structType.fields.map { field =>
+          val baseJson = List(
+            "name" -> JString(field.name),
+            "type" -> jsonType(field.dataType),
+            "nullable" -> JBool(field.nullable)
+          )
+          val commentJson = field.getComment().map(comment => "comment" -> JString(comment)).toList
+          val defaultJson =
+            field.getCurrentDefaultValue().map(default => "default" -> JString(default)).toList
+
+          JObject(baseJson ++ commentJson ++ defaultJson: _*)
+        }.toList
+
+        JObject(
+          "name" -> JString("struct"),
+          "fields" -> JArray(fieldsJson)
+        )
+
+      case decimalType: DecimalType =>
+        JObject(
+          "name" -> JString("decimal"),
+          "precision" -> JInt(decimalType.precision),
+          "scale" -> JInt(decimalType.scale)
+        )
+
+      case varcharType: VarcharType =>
+        JObject(
+          "name" -> JString("varchar"),
+          "length" -> JInt(varcharType.length)
+        )
+
+      case charType: CharType =>
+        JObject(
+          "name" -> JString("char"),
+          "length" -> JInt(charType.length)
+        )
+
+      // Only override TimestampType; TimestampType_NTZ type is already timestamp_ntz
+      case _: TimestampType =>
+        JObject("name" -> JString("timestamp_ltz"))
+
+      case yearMonthIntervalType: YearMonthIntervalType =>
+        def getFieldName(field: Byte): String = YearMonthIntervalType.fieldToString(field)
+
+        JObject(
+          "name" -> JString("interval"),
+          "start_unit" -> JString(getFieldName(yearMonthIntervalType.startField)),
+          "end_unit" -> JString(getFieldName(yearMonthIntervalType.endField))
+        )
+
+      case dayTimeIntervalType: DayTimeIntervalType =>
+        def getFieldName(field: Byte): String = DayTimeIntervalType.fieldToString(field)
+
+        JObject(
+          "name" -> JString("interval"),
+          "start_unit" -> JString(getFieldName(dayTimeIntervalType.startField)),
+          "end_unit" -> JString(getFieldName(dayTimeIntervalType.endField))
+        )
+
+      case _ =>
+        JObject("name" -> JString(dataType.simpleString))
+    }
+  }
+
+  private def describeColsJson(
+      schema: StructType,
+      jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = {
+    val columnsJson = jsonType(StructType(schema.fields))
+      .asInstanceOf[JObject].find(_.isInstanceOf[JArray]).get
+    addKeyValueToMap("columns", columnsJson, jsonMap)
+  }
+
+  private def describeClusteringInfoJson(
+      table: CatalogTable, jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = {
+    table.clusterBySpec.foreach { clusterBySpec =>
+      val clusteringColumnsJson: JValue = JArray(
+        clusterBySpec.columnNames.map { fieldNames =>
+          val nestedFieldOpt = table.schema.findNestedField(fieldNames.fieldNames.toIndexedSeq)
+          assert(nestedFieldOpt.isDefined,
+            "The clustering column " +
+              s"${fieldNames.fieldNames.map(quoteIfNeeded).mkString(".")} " +
+              s"was not found in the table schema ${table.schema.catalogString}."
+          )
+          val (path, field) = nestedFieldOpt.get
+          JObject(
+            "name" -> JString((path :+ field.name).map(quoteIfNeeded).mkString(".")),
+            "type" -> jsonType(field.dataType),
+            "comment" -> field.getComment().map(JString).getOrElse(JNull)
+          )
+        }.toList
+      )
+      addKeyValueToMap("clustering_information", clusteringColumnsJson, jsonMap)
+    }
+  }
+
+  private def describeFormattedTableInfoJson(
+      table: CatalogTable, jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = {
+    table.bucketSpec match {
+      case Some(spec) =>
+        spec.toJsonLinkedHashMap.foreach { case (key, value) =>
+          addKeyValueToMap(key, value, jsonMap)
+        }
+      case _ =>
+    }
+    table.storage.toJsonLinkedHashMap.foreach { case (key, value) =>
+      addKeyValueToMap(key, value, jsonMap)
+    }
+
+    val filteredTableInfo = table.toJsonLinkedHashMap
+
+    filteredTableInfo.map { case (key, value) =>
+      addKeyValueToMap(key, value, jsonMap)
+    }
+  }
+
+  private def describePartitionInfoJson(
+      spark: SparkSession,
+      catalog: SessionCatalog,
+      metadata: CatalogTable,
+      jsonMap: mutable.LinkedHashMap[String, JValue]): Unit = {
+    if (metadata.tableType == CatalogTableType.VIEW) {
+      throw QueryCompilationErrors.descPartitionNotAllowedOnView(metadata.identifier.identifier)
+    }
+
+    DDLUtils.verifyPartitionProviderIsHive(spark, metadata, "DESC PARTITION")
+    val normalizedPartSpec = PartitioningUtils.normalizePartitionSpec(
+      partitionSpec,
+      metadata.partitionSchema,
+      metadata.identifier.quotedString,
+      spark.sessionState.conf.resolver)
+    val partition = catalog.getPartition(metadata.identifier, normalizedPartSpec)
+
+    // First add partition details to jsonMap.
+    // `addKeyValueToMap` only adds unique keys, so this ensures the
+    // more detailed partition information is added
+    // in the case of duplicated key names (e.g. storage_information).
+    partition.toJsonLinkedHashMap.foreach { case (key, value) =>
+      addKeyValueToMap(key, value, jsonMap)
+    }
+
+    metadata.toJsonLinkedHashMap.foreach { case (key, value) =>
+      addKeyValueToMap(key, value, jsonMap)
+    }
+
+    metadata.bucketSpec match {
+      case Some(spec) =>
+        spec.toJsonLinkedHashMap.foreach { case (key, value) =>
+          addKeyValueToMap(key, value, jsonMap)
+        }
+      case _ =>
+    }
+    metadata.storage.toJsonLinkedHashMap.foreach { case (key, value) =>
+      addKeyValueToMap(key, value, jsonMap)
+    }
+  }
+
+  // Already added to jsonMap in DescribeTableJsonCommand
+  private val excludedKeys = Set("catalog", "schema", "database", "table")
+
+  override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = {
+    copy(child = newChild)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index a8a91af1bdbc4..9dfe5c3e4c301 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -196,7 +196,8 @@ case class DescribeDatabaseCommand(
         if (properties.isEmpty) {
           ""
         } else {
-          conf.redactOptions(properties).toSeq.sortBy(_._1).mkString("(", ", ", ")")
+          sparkSession.sessionState.conf.redactOptions(properties).toSeq
+            .sortBy(_._1).mkString("(", ", ", ")")
         }
       result :+ Row("Properties", propertiesStr)
     } else {
@@ -548,7 +549,7 @@ case class AlterTableAddPartitionCommand(
     // Hive metastore may not have enough memory to handle millions of partitions in single RPC.
     // Also the request to metastore times out when adding lot of partitions in one shot.
     // we should split them into smaller batches
-    val batchSize = conf.getConf(SQLConf.ADD_PARTITION_BATCH_SIZE)
+    val batchSize = sparkSession.sessionState.conf.getConf(SQLConf.ADD_PARTITION_BATCH_SIZE)
     parts.iterator.grouped(batchSize).foreach { batch =>
       catalog.createPartitions(table.identifier, batch, ignoreIfExists = ifNotExists)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 9ecd3fd19aa64..a58e8fac6e36d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -240,7 +240,10 @@ case class AlterTableAddColumnsCommand(
 
     SchemaUtils.checkColumnNameDuplication(
       (colsWithProcessedDefaults ++ catalogTable.schema).map(_.name),
-      conf.caseSensitiveAnalysis)
+      sparkSession.sessionState.conf.caseSensitiveAnalysis)
+    if (!conf.allowCollationsInMapKeys) {
+      colsToAdd.foreach(col => SchemaUtils.checkNoCollationsInMapKeys(col.dataType))
+    }
     DDLUtils.checkTableColumns(catalogTable, StructType(colsWithProcessedDefaults))
 
     val existingSchema = CharVarcharUtils.getRawSchema(catalogTable.dataSchema)
@@ -498,7 +501,7 @@ case class TruncateTableCommand(
         partLocations
       }
     val hadoopConf = spark.sessionState.newHadoopConf()
-    val ignorePermissionAcl = conf.truncateTableIgnorePermissionAcl
+    val ignorePermissionAcl = spark.sessionState.conf.truncateTableIgnorePermissionAcl
     locations.foreach { location =>
       if (location.isDefined) {
         val path = new Path(location.get)
@@ -816,7 +819,8 @@ case class DescribeColumnCommand(
 
     val catalogTable = catalog.getTempViewOrPermanentTableMetadata(table)
     val colStatsMap = catalogTable.stats.map(_.colStats).getOrElse(Map.empty)
-    val colStats = if (conf.caseSensitiveAnalysis) colStatsMap else CaseInsensitiveMap(colStatsMap)
+    val colStats = if (sparkSession.sessionState.conf.caseSensitiveAnalysis) colStatsMap
+      else CaseInsensitiveMap(colStatsMap)
     val cs = colStats.get(field.name)
 
     val comment = if (field.metadata.contains("comment")) {
@@ -972,7 +976,7 @@ case class ShowTablePropertiesCommand(
       Seq.empty[Row]
     } else {
       val catalogTable = catalog.getTableMetadata(table)
-      val properties = conf.redactOptions(catalogTable.properties)
+      val properties = sparkSession.sessionState.conf.redactOptions(catalogTable.properties)
       propertyKey match {
         case Some(p) =>
           val propValue = properties
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 071e3826b20a0..6428583c9e1ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -47,6 +47,7 @@ import org.apache.spark.util.ArrayImplicits._
  * @param userSpecifiedColumns the output column names and optional comments specified by users,
  *                             can be Nil if not specified.
  * @param comment the comment of this view.
+ * @param collation the collation of this view.
  * @param properties the properties of this view.
  * @param originalText the original SQL text of this view, can be None if this view is created via
  *                     Dataset API.
@@ -64,6 +65,7 @@ case class CreateViewCommand(
     name: TableIdentifier,
     userSpecifiedColumns: Seq[(String, Option[String])],
     comment: Option[String],
+    collation: Option[String],
     properties: Map[String, String],
     originalText: Option[String],
     plan: LogicalPlan,
@@ -220,7 +222,8 @@ case class CreateViewCommand(
       properties = newProperties,
       viewOriginalText = originalText,
       viewText = originalText,
-      comment = comment
+      comment = comment,
+      collation = collation
     )
   }
 
@@ -461,12 +464,19 @@ object ViewHelper extends SQLConfHelper with Logging {
   }
 
   /**
-   * Convert the view SQL configs to `properties`.
+   * Get all configurations that are modifiable and should be captured.
    */
-  private def sqlConfigsToProps(conf: SQLConf): Map[String, String] = {
-    val modifiedConfs = conf.getAllConfs.filter { case (k, _) =>
+  def getModifiedConf(conf: SQLConf): Map[String, String] = {
+    conf.getAllConfs.filter { case (k, _) =>
       conf.isModifiable(k) && shouldCaptureConfig(k)
     }
+  }
+
+  /**
+   * Convert the view SQL configs to `properties`.
+   */
+  private def sqlConfigsToProps(conf: SQLConf): Map[String, String] = {
+    val modifiedConfs = getModifiedConf(conf)
     // Some configs have dynamic default values, such as SESSION_LOCAL_TIMEZONE whose
     // default value relies on the JVM system timezone. We need to always capture them to
     // to make sure we apply the same configs when reading the view.
@@ -687,7 +697,7 @@ object ViewHelper extends SQLConfHelper with Logging {
   /**
    * Collect all temporary SQL variables and return the identifiers separately.
    */
-  private def collectTemporaryVariables(child: LogicalPlan): Seq[Seq[String]] = {
+  def collectTemporaryVariables(child: LogicalPlan): Seq[Seq[String]] = {
     def collectTempVars(child: LogicalPlan): Seq[Seq[String]] = {
       child.flatMap { plan =>
         plan.expressions.flatMap(_.flatMap {
@@ -729,7 +739,8 @@ object ViewHelper extends SQLConfHelper with Logging {
     val uncache = getRawTempView(name.table).map { r =>
       needsToUncache(r, aliasedPlan)
     }.getOrElse(false)
-    val storeAnalyzedPlanForView = conf.storeAnalyzedPlanForView || originalText.isEmpty
+    val storeAnalyzedPlanForView = session.sessionState.conf.storeAnalyzedPlanForView ||
+      originalText.isEmpty
     if (replace && uncache) {
       logDebug(s"Try to uncache ${name.quotedString} before replacing.")
       if (!storeAnalyzedPlanForView) {
@@ -782,7 +793,6 @@ object ViewHelper extends SQLConfHelper with Logging {
       originalText: String,
       tempFunctions: Seq[String]): CatalogTable = {
 
-    val catalog = session.sessionState.catalog
     val tempViews = collectTemporaryViews(analyzedPlan)
     val tempVariables = collectTemporaryVariables(analyzedPlan)
     // TBLPROPERTIES is not allowed for temporary view, so we don't use it for
@@ -797,6 +807,7 @@ object ViewHelper extends SQLConfHelper with Logging {
       storage = CatalogStorageFormat.empty,
       schema = viewSchema,
       viewText = Some(originalText),
+      createVersion = org.apache.spark.SPARK_VERSION,
       properties = newProperties)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index bc156cd82ed6a..58bbd91a8cc77 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -682,11 +682,10 @@ object DataSource extends Logging {
                 throw e
               }
           }
-        case _ :: Nil if isUserDefinedDataSource =>
-          // There was DSv1 or DSv2 loaded, but the same name source was found
-          // in user defined data source.
-          throw QueryCompilationErrors.foundMultipleDataSources(provider)
         case head :: Nil =>
+          // We do not check whether the provider is a Python data source
+          // (isUserDefinedDataSource) to avoid the lookup cost. Java data sources
+          // always take precedence over Python user-defined data sources.
           head.getClass
         case sources =>
           // There are multiple registered aliases for the input. If there is single datasource
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala
index 93fc6cf367cfc..711e096ebd1f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala
@@ -101,6 +101,7 @@ object DataSourceManager extends Logging {
 
   private def initialStaticDataSourceBuilders: Map[String, UserDefinedPythonDataSource] = {
     if (shouldLoadPythonDataSources) this.synchronized {
+      logInfo("Loading static Python Data Sources.")
       if (dataSourceBuilders.isEmpty) {
         val maybeResult = try {
           Some(UserDefinedPythonDataSource.lookupAllDataSourcesInPython())
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolver.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolver.scala
new file mode 100644
index 0000000000000..3a2a3207a01f9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolver.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.analysis.resolver.{
+  ExplicitlyUnsupportedResolverFeature,
+  ResolverExtension
+}
+import org.apache.spark.sql.catalyst.catalog.UnresolvedCatalogRelation
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
+import org.apache.spark.sql.execution.streaming.StreamingRelation
+
+/**
+ * The [[DataSourceResolver]] is a [[Resolver]] extension that resolves nodes defined in the
+ * [[datasources]] package. We have it as an extension to avoid cyclic dependencies between
+ * [[resolver]] and [[datasources]] packages.
+ */
+class DataSourceResolver(sparkSession: SparkSession) extends ResolverExtension {
+  private val findDataSourceTable = new FindDataSourceTable(sparkSession)
+
+  /**
+   * Resolve [[UnresolvedCatalogRelation]]:
+   * - Reuse [[FindDataSourceTable]] code to resolve [[UnresolvedCatalogRelation]]
+   * - Create a new instance of [[LogicalRelation]] to regenerate the expression IDs
+   * - Explicitly disallow [[StreamingRelation]] and [[StreamingRelationV2]] for now
+   * - [[FileResolver]], which is a [[ResolverExtension]], introduces a new [[LogicalPlan]] node
+   *    which resolution has to be handled here (further resolution of it doesn't need any specific
+   *    resolution except adding it's attributes to the scope).
+   */
+  override def resolveOperator: PartialFunction[LogicalPlan, LogicalPlan] = {
+    case unresolvedCatalogRelation: UnresolvedCatalogRelation =>
+      val result = findDataSourceTable.resolveUnresolvedCatalogRelation(unresolvedCatalogRelation)
+      result match {
+        case logicalRelation: LogicalRelation =>
+          logicalRelation.newInstance()
+        case streamingRelation: StreamingRelation =>
+          throw new ExplicitlyUnsupportedResolverFeature(
+            s"unsupported operator: ${streamingRelation.getClass.getName}"
+          )
+        case streamingRelationV2: StreamingRelationV2 =>
+          throw new ExplicitlyUnsupportedResolverFeature(
+            s"unsupported operator: ${streamingRelationV2.getClass.getName}"
+          )
+        case other =>
+          other
+      }
+    case logicalRelation: LogicalRelation =>
+      logicalRelation.newInstance()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileResolver.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileResolver.scala
new file mode 100644
index 0000000000000..44102da752c2e
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileResolver.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.analysis.resolver.ResolverExtension
+import org.apache.spark.sql.catalyst.plans.logical.{AnalysisHelper, LogicalPlan}
+
+/**
+ * The [[FileResolver]] is a [[MetadataResolver]] extension that resolves [[UnresolvedRelation]]
+ * which is created out of file. It reuses the code from [[ResolveSQLOnFile]] to resolve it
+ * properly.
+ *
+ * We have it as an extension to avoid cyclic dependencies between [[resolver]] and [[datasources]]
+ * packages.
+ */
+class FileResolver(sparkSession: SparkSession) extends ResolverExtension {
+  private val resolveSQLOnFile = new ResolveSQLOnFile(sparkSession)
+
+  /**
+   * [[ResolveSQLOnFile]] code that is reused to resolve [[UnresolvedRelation]] has
+   * [[ExpressionEncoder.resolveAndBind]] on its path which introduces another call to
+   * the analyzer which is acceptable as it is called on the leaf node of the plan. That's why we
+   * have to allow invoking transforms in the single-pass analyzer.
+   */
+  object UnresolvedRelationResolution {
+    def unapply(operator: LogicalPlan): Option[LogicalPlan] =
+      AnalysisHelper.allowInvokingTransformsInAnalyzer {
+        resolveSQLOnFile.UnresolvedRelationResolution.unapply(operator)
+      }
+  }
+
+  /**
+   * Reuse [[ResolveSQLOnFile]] code to resolve [[UnresolvedRelation]] made out of file.
+   */
+  override def resolveOperator: PartialFunction[LogicalPlan, LogicalPlan] = {
+    case UnresolvedRelationResolution(resolvedRelation) =>
+      resolvedRelation
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 9bcdbadf7c5c0..e468807f4ffd1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -21,6 +21,8 @@ import java.io.{Closeable, FileNotFoundException, IOException}
 import java.net.URI
 
 import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hdfs.BlockMissingException
+import org.apache.hadoop.security.AccessControlException
 
 import org.apache.spark.{Partition => RDDPartition, TaskContext}
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -266,6 +268,7 @@ class FileScanRDD(
                     null
                   // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
                   case e: FileNotFoundException if !ignoreMissingFiles => throw e
+                  case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
                   case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
                     logWarning(log"Skipped the rest of the content in the corrupted file: " +
                       log"${MDC(PATH, currentFile)}", e)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index aed129c7dccc4..8a795f0748811 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -84,6 +84,9 @@ case class InsertIntoHadoopFsRelationCommand(
         outputColumnNames,
         sparkSession.sessionState.conf.caseSensitiveAnalysis)
     }
+    if (!conf.allowCollationsInMapKeys) {
+      SchemaUtils.checkNoCollationsInMapKeys(query.schema)
+    }
 
     val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(options)
     val fs = outputPath.getFileSystem(hadoopConf)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PushVariantIntoScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PushVariantIntoScan.scala
new file mode 100644
index 0000000000000..33ba4f772a13a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PushVariantIntoScan.scala
@@ -0,0 +1,340 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import scala.collection.mutable.HashMap
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.variant._
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project, Subquery}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns
+import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+// A metadata class of a struct field. All struct fields in a struct must either all have this
+// metadata, or all don't have it.
+// We define a "variant struct" as: a special struct with its fields annotated with this metadata.
+// It indicates that the struct should produce all requested fields of a variant type, and should be
+// treated specially by the scan.
+case class VariantMetadata(
+    // The `path` parameter of VariantGet. It has the same format as a JSON path, except that
+    // `[*]` is not supported.
+    path: String,
+    failOnError: Boolean,
+    timeZoneId: String) {
+  // Produce a metadata contain one key-value pair. The key is the special `METADATA_KEY`.
+  // The value contains three key-value pairs for `path`, `failOnError`, and `timeZoneId`.
+  def toMetadata: Metadata =
+    new MetadataBuilder().putMetadata(
+      VariantMetadata.METADATA_KEY,
+      new MetadataBuilder()
+        .putString(VariantMetadata.PATH_KEY, path)
+        .putBoolean(VariantMetadata.FAIL_ON_ERROR_KEY, failOnError)
+        .putString(VariantMetadata.TIME_ZONE_ID_KEY, timeZoneId)
+        .build()
+    ).build()
+
+  def parsedPath(): Array[VariantPathSegment] = {
+    VariantPathParser.parse(path).getOrElse {
+      val name = if (failOnError) "variant_get" else "try_variant_get"
+      throw QueryExecutionErrors.invalidVariantGetPath(path, name)
+    }
+  }
+}
+
+object VariantMetadata {
+  val METADATA_KEY = "__VARIANT_METADATA_KEY"
+  val PATH_KEY = "path"
+  val FAIL_ON_ERROR_KEY = "failOnError"
+  val TIME_ZONE_ID_KEY = "timeZoneId"
+
+  def isVariantStruct(s: StructType): Boolean =
+    s.fields.length > 0 && s.fields.forall(_.metadata.contains(METADATA_KEY))
+
+  def isVariantStruct(t: DataType): Boolean = t match {
+    case s: StructType => isVariantStruct(s)
+    case _ => false
+  }
+
+  // Parse the `VariantMetadata` from a metadata produced by `toMetadata`.
+  def fromMetadata(metadata: Metadata): VariantMetadata = {
+    val value = metadata.getMetadata(METADATA_KEY)
+    VariantMetadata(
+      value.getString(PATH_KEY),
+      value.getBoolean(FAIL_ON_ERROR_KEY),
+      value.getString(TIME_ZONE_ID_KEY)
+    )
+  }
+}
+
+// Represent a requested field of a variant that the scan should produce.
+// Each `RequestedVariantField` is corresponded to a variant path extraction in the plan.
+case class RequestedVariantField(path: VariantMetadata, targetType: DataType)
+
+object RequestedVariantField {
+  def fullVariant: RequestedVariantField =
+    RequestedVariantField(VariantMetadata("$", failOnError = true, "UTC"), VariantType)
+
+  def apply(v: VariantGet): RequestedVariantField =
+    RequestedVariantField(
+      VariantMetadata(v.path.eval().toString, v.failOnError, v.timeZoneId.get), v.dataType)
+
+  def apply(c: Cast): RequestedVariantField =
+    RequestedVariantField(
+      VariantMetadata("$", c.evalMode != EvalMode.TRY, c.timeZoneId.get), c.dataType)
+}
+
+// Extract a nested struct access path. Return the (root attribute id, a sequence of ordinals to
+// access the field). For non-nested attribute access, the sequence is empty.
+object StructPath {
+  def unapply(expr: Expression): Option[(ExprId, Seq[Int])] = expr match {
+    case GetStructField(StructPath(root, path), ordinal, _) => Some((root, path :+ ordinal))
+    case a: Attribute => Some(a.exprId, Nil)
+    case _ => None
+  }
+}
+
+// A collection of all eligible variants in a relation, which are in the root of the relation output
+// schema, or only nested in struct types.
+// The user should:
+// 1. Call `addVariantFields` to add all eligible variants in a relation.
+// 2. Call `collectRequestedFields` on all expressions depending on the relation. This process will
+// add the requested fields of each variant and potentially remove non-eligible variants. See
+// `collectRequestedFields` for details.
+// 3. Call `rewriteType` to produce a new output schema for the relation.
+// 4. Call `rewriteExpr` to rewrite the previously visited expressions by replacing variant
+// extractions with struct accessed.
+class VariantInRelation {
+  // First level key: root attribute id.
+  // Second level key: struct access paths to the variant type.
+  // Third level key: requested fields of a variant type.
+  // Final value: the ordinal of a requested field in the final struct of requested fields.
+  val mapping = new HashMap[ExprId, HashMap[Seq[Int], HashMap[RequestedVariantField, Int]]]
+
+  // Extract the SQL-struct path where the leaf is a variant.
+  object StructPathToVariant {
+    def unapply(expr: Expression): Option[HashMap[RequestedVariantField, Int]] = expr match {
+      case StructPath(attrId, path) =>
+        mapping.get(attrId).flatMap(_.get(path))
+      case _ => None
+    }
+  }
+
+  // Find eligible variants recursively. `attrId` is the root attribute id.
+  // `path` is the current struct access path. `dataType` is the child data type after extracting
+  // `path` from the root attribute struct.
+  def addVariantFields(
+      attrId: ExprId,
+      dataType: DataType,
+      defaultValue: Any,
+      path: Seq[Int]): Unit = {
+    dataType match {
+      // TODO(SHREDDING): non-null default value is not yet supported.
+      case _: VariantType if defaultValue == null =>
+        mapping.getOrElseUpdate(attrId, new HashMap).put(path, new HashMap)
+      case s: StructType if !VariantMetadata.isVariantStruct(s) =>
+        val row = defaultValue.asInstanceOf[InternalRow]
+        for ((field, idx) <- s.fields.zipWithIndex) {
+          val fieldDefault = if (row == null || row.isNullAt(idx)) {
+            null
+          } else {
+            row.get(idx, field.dataType)
+          }
+          addVariantFields(attrId, field.dataType, fieldDefault, path :+ idx)
+        }
+      case _ =>
+    }
+  }
+
+  def rewriteType(attrId: ExprId, dataType: DataType, path: Seq[Int]): DataType = {
+    dataType match {
+      case _: VariantType =>
+        mapping.get(attrId).flatMap(_.get(path)) match {
+          case Some(fields) =>
+            var requestedFields = fields.toArray.sortBy(_._2).map { case (field, ordinal) =>
+              StructField(ordinal.toString, field.targetType, metadata = field.path.toMetadata)
+            }
+            // Avoid producing an empty struct of requested fields. This is intended to simplify the
+            // scan implementation, which may not be able to handle empty struct type. This happens
+            // if the variant is not used, or only used in `IsNotNull/IsNull` expressions. The value
+            // of the placeholder field doesn't matter, even if the scan source accidentally
+            // contains such a field.
+            if (requestedFields.isEmpty) {
+              val placeholder = VariantMetadata("$.__placeholder_field__",
+                failOnError = false, timeZoneId = "UTC")
+              requestedFields = Array(StructField("0", BooleanType,
+                metadata = placeholder.toMetadata))
+            }
+            StructType(requestedFields)
+          case _ => dataType
+        }
+      case s: StructType if !VariantMetadata.isVariantStruct(s) =>
+        val newFields = s.fields.zipWithIndex.map { case (field, idx) =>
+          field.copy(dataType = rewriteType(attrId, field.dataType, path :+ idx))
+        }
+        StructType(newFields)
+      case _ => dataType
+    }
+  }
+
+  // Add a requested field to a variant column.
+  private def addField(
+      map: HashMap[RequestedVariantField, Int],
+      field: RequestedVariantField): Unit = {
+    val idx = map.size
+    map.getOrElseUpdate(field, idx)
+  }
+
+  // Update `mapping` with any access to a variant. Add the requested fields of each variant and
+  // potentially remove non-eligible variants.
+  // If a struct containing a variant is directly used, this variant is not eligible for push down.
+  // This is because we need to replace the variant type with a struct producing all requested
+  // fields, which also changes the struct type containing it, and it is difficult to reconstruct
+  // the original struct value. This is not a big loss, because we need the full variant anyway.
+  def collectRequestedFields(expr: Expression): Unit = expr match {
+    case v@VariantGet(StructPathToVariant(fields), _, _, _, _) =>
+      addField(fields, RequestedVariantField(v))
+    case c@Cast(StructPathToVariant(fields), _, _, _) => addField(fields, RequestedVariantField(c))
+    case IsNotNull(StructPath(_, _)) | IsNull(StructPath(_, _)) =>
+    case StructPath(attrId, path) =>
+      mapping.get(attrId) match {
+        case Some(variants) =>
+          variants.get(path) match {
+            case Some(fields) =>
+              addField(fields, RequestedVariantField.fullVariant)
+            case _ =>
+              // Remove non-eligible variants.
+              variants.filterInPlace { case (key, _) => !key.startsWith(path) }
+          }
+        case _ =>
+      }
+    case _ => expr.children.foreach(collectRequestedFields)
+  }
+
+  def rewriteExpr(
+      expr: Expression,
+      attributeMap: Map[ExprId, AttributeReference]): Expression = {
+    def rewriteAttribute(expr: Expression): Expression = expr.transformDown {
+      case a: Attribute => attributeMap.getOrElse(a.exprId, a)
+    }
+
+    // Rewrite patterns should be consistent with visit patterns in `collectRequestedFields`.
+    expr.transformDown {
+      case g@VariantGet(v@StructPathToVariant(fields), _, _, _, _) =>
+        // Rewrite the attribute in advance, rather than depending on the last branch to rewrite it.
+        // Ww need to avoid the `v@StructPathToVariant(fields)` branch to rewrite the child again.
+        GetStructField(rewriteAttribute(v), fields(RequestedVariantField(g)))
+      case c@Cast(v@StructPathToVariant(fields), _, _, _) =>
+        GetStructField(rewriteAttribute(v), fields(RequestedVariantField(c)))
+      case i@IsNotNull(StructPath(_, _)) => rewriteAttribute(i)
+      case i@IsNull(StructPath(_, _)) => rewriteAttribute(i)
+      case v@StructPathToVariant(fields) =>
+        GetStructField(rewriteAttribute(v), fields(RequestedVariantField.fullVariant))
+      case a: Attribute => attributeMap.getOrElse(a.exprId, a)
+    }
+  }
+}
+
+// Push variant into scan by rewriting the variant type with a struct type producing all requested
+// fields and rewriting the variant extraction expressions by struct accesses.
+// For example, for an input plan:
+// - Project [v:a::int, v:b::string, v]
+//   - Filter [v:a::int = 1]
+//     - Relation [v: variant]
+// Rewrite it as:
+// - Project [v.0, v.1, v.2]
+//   - Filter [v.0 = 1]
+//     - Relation [v: struct<0: int, 1: string, 2: variant>]
+// The struct fields are annotated with `VariantMetadata` to indicate the extraction path.
+object PushVariantIntoScan extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan match {
+    // A correlated subquery will be rewritten into join later, and will go through this rule
+    // eventually.
+    case s: Subquery if s.correlated => plan
+    case _ if !SQLConf.get.getConf(SQLConf.PUSH_VARIANT_INTO_SCAN) => plan
+    case _ => plan.transformDown {
+      case p@PhysicalOperation(projectList, filters,
+      relation @ LogicalRelationWithTable(
+      hadoopFsRelation@HadoopFsRelation(_, _, _, _, _: ParquetFileFormat, _), _)) =>
+        rewritePlan(p, projectList, filters, relation, hadoopFsRelation)
+    }
+  }
+
+  private def rewritePlan(
+      originalPlan: LogicalPlan,
+      projectList: Seq[NamedExpression],
+      filters: Seq[Expression],
+      relation: LogicalRelation,
+      hadoopFsRelation: HadoopFsRelation): LogicalPlan = {
+    val variants = new VariantInRelation
+    val defaultValues = ResolveDefaultColumns.existenceDefaultValues(hadoopFsRelation.schema)
+    // I'm not aware of any case that an attribute `relation.output` can have a different data type
+    // than the corresponding field in `hadoopFsRelation.schema`. Other code seems to prefer using
+    // the data type in `hadoopFsRelation.schema`, let's also stick to it.
+    val schemaWithAttributes = hadoopFsRelation.schema.fields.zip(relation.output)
+    for (((f, attr), defaultValue) <- schemaWithAttributes.zip(defaultValues)) {
+      variants.addVariantFields(attr.exprId, f.dataType, defaultValue, Nil)
+    }
+    if (variants.mapping.isEmpty) return originalPlan
+
+    projectList.foreach(variants.collectRequestedFields)
+    filters.foreach(variants.collectRequestedFields)
+    // `collectRequestedFields` may have removed all variant columns.
+    if (variants.mapping.forall(_._2.isEmpty)) return originalPlan
+
+    val (newFields, newOutput) = schemaWithAttributes.map {
+      case (f, attr) =>
+        if (variants.mapping.get(attr.exprId).exists(_.nonEmpty)) {
+          val newType = variants.rewriteType(attr.exprId, f.dataType, Nil)
+          val newAttr = AttributeReference(f.name, newType, f.nullable, f.metadata)()
+          (f.copy(dataType = newType), newAttr)
+        } else {
+          (f, attr)
+        }
+    }.unzip
+
+    val newHadoopFsRelation = hadoopFsRelation.copy(dataSchema = StructType(newFields))(
+      hadoopFsRelation.sparkSession)
+    val newRelation = relation.copy(relation = newHadoopFsRelation, output = newOutput.toIndexedSeq)
+
+    val attributeMap = relation.output.zip(newOutput).map {
+      case (oldAttr, newAttr) => oldAttr.exprId -> newAttr
+    }.toMap
+    val withFilter = if (filters.nonEmpty) {
+      Filter(filters.map(variants.rewriteExpr(_, attributeMap)).reduce(And), newRelation)
+    } else {
+      newRelation
+    }
+    val newProjectList = projectList.map { e =>
+      val rewritten = variants.rewriteExpr(e, attributeMap)
+      rewritten match {
+        case n: NamedExpression => n
+        // This is when the variant column is directly selected. We replace the attribute reference
+        // with a struct access, which is not a `NamedExpression` that `Project` requires. We wrap
+        // it with an `Alias`.
+        case _ => Alias(rewritten, e.name)(e.exprId, e.qualifier)
+      }
+    }
+    Project(newProjectList, withFilter)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
index 8ef85ee91aa8f..b2b99e2d0f4ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -86,7 +86,7 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
       }
 
       override def getFileExtension(context: TaskAttemptContext): String = {
-        ".csv" + CodecStreams.getCompressionExtension(context)
+        "." + csvOptions.extension + CodecStreams.getCompressionExtension(context)
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
index d9367d92d462e..eb9d5813cff7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, V1CreateTablePlan}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.command.{DDLUtils, LeafRunnableCommand}
@@ -43,7 +43,7 @@ import org.apache.spark.sql.types._
 case class CreateTable(
     tableDesc: CatalogTable,
     mode: SaveMode,
-    query: Option[LogicalPlan]) extends LogicalPlan {
+    query: Option[LogicalPlan]) extends LogicalPlan with V1CreateTablePlan {
   assert(tableDesc.provider.isDefined, "The table to be created must have a provider.")
 
   if (query.isEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetColumn.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetColumn.scala
index 6ac96300ccd65..4bc1194d9370d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetColumn.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetColumn.scala
@@ -35,7 +35,10 @@ case class ParquetColumn(
     definitionLevel: Int,
     required: Boolean,
     path: Seq[String],
-    children: Seq[ParquetColumn]) {
+    children: Seq[ParquetColumn],
+    // When `variantFileType` has value, the parquet column should produce a Spark variant type, and
+    // `variantFileType` describes the file schema of the Parquet variant column.
+    variantFileType: Option[ParquetColumn] = None) {
 
   def isPrimitive: Boolean = descriptor.nonEmpty
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index 8dde02a4673f0..af0bf0d51f077 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -35,6 +35,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.execution.datasources.VariantMetadata
 import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
 import org.apache.spark.sql.types._
 
@@ -221,6 +222,9 @@ object ParquetReadSupport extends Logging {
         clipParquetMapType(
           parquetType.asGroupType(), t.keyType, t.valueType, caseSensitive, useFieldId)
 
+      case t: StructType if VariantMetadata.isVariantStruct(t) =>
+        clipVariantSchema(parquetType.asGroupType(), t)
+
       case t: StructType =>
         clipParquetGroup(parquetType.asGroupType(), t, caseSensitive, useFieldId)
 
@@ -390,6 +394,11 @@ object ParquetReadSupport extends Logging {
       .named(parquetRecord.getName)
   }
 
+  private def clipVariantSchema(parquetType: GroupType, variantStruct: StructType): GroupType = {
+    // TODO(SHREDDING): clip `parquetType` to retain the necessary columns.
+    parquetType
+  }
+
   /**
    * Clips a Parquet [[GroupType]] which corresponds to a Catalyst [[StructType]].
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index 838eb30c38fb1..550c2af43a706 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.execution.datasources.DataSourceUtils
+import org.apache.spark.sql.execution.datasources.{DataSourceUtils, VariantMetadata}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
@@ -498,6 +498,9 @@ private[parquet] class ParquetRowConverter(
       case t: MapType =>
         new ParquetMapConverter(parquetType.asGroupType(), t, updater)
 
+      case t: StructType if VariantMetadata.isVariantStruct(t) =>
+        new ParquetVariantConverter(t, parquetType.asGroupType(), updater)
+
       case t: StructType =>
         val wrappedUpdater = {
           // SPARK-30338: avoid unnecessary InternalRow copying for nested structs:
@@ -535,7 +538,11 @@ private[parquet] class ParquetRowConverter(
           wrappedUpdater)
 
       case t: VariantType =>
-        new ParquetVariantConverter(parquetType.asGroupType(), updater)
+        if (SQLConf.get.getConf(SQLConf.VARIANT_ALLOW_READING_SHREDDED)) {
+          new ParquetVariantConverter(t, parquetType.asGroupType(), updater)
+        } else {
+          new ParquetUnshreddedVariantConverter(parquetType.asGroupType(), updater)
+        }
 
       case t =>
         throw QueryExecutionErrors.cannotCreateParquetConverterForDataTypeError(
@@ -845,8 +852,8 @@ private[parquet] class ParquetRowConverter(
     }
   }
 
-  /** Parquet converter for Variant */
-  private final class ParquetVariantConverter(
+  /** Parquet converter for unshredded Variant */
+  private final class ParquetUnshreddedVariantConverter(
      parquetType: GroupType,
      updater: ParentContainerUpdater)
     extends ParquetGroupConverter(updater) {
@@ -898,6 +905,47 @@ private[parquet] class ParquetRowConverter(
     }
   }
 
+  /** Parquet converter for Variant (shredded or unshredded) */
+  private final class ParquetVariantConverter(
+     targetType: DataType, parquetType: GroupType, updater: ParentContainerUpdater)
+    extends ParquetGroupConverter(updater) {
+
+    private[this] var currentRow: Any = _
+    private[this] val parquetSparkType = SparkShreddingUtils.parquetTypeToSparkType(parquetType)
+    private[this] val variantSchema = SparkShreddingUtils.buildVariantSchema(parquetSparkType)
+    private[this] val fieldsToExtract =
+      SparkShreddingUtils.getFieldsToExtract(targetType, variantSchema)
+    // A struct converter that reads the underlying file data.
+    private[this] val fileConverter = new ParquetRowConverter(
+      schemaConverter,
+      parquetType,
+      parquetSparkType.asInstanceOf[StructType],
+      convertTz,
+      datetimeRebaseSpec,
+      int96RebaseSpec,
+      new ParentContainerUpdater {
+        override def set(value: Any): Unit = currentRow = value
+      })
+
+    override def getConverter(fieldIndex: Int): Converter = fileConverter.getConverter(fieldIndex)
+
+    override def end(): Unit = {
+      fileConverter.end()
+      val row = currentRow.asInstanceOf[InternalRow]
+      val v = if (fieldsToExtract == null) {
+        SparkShreddingUtils.assembleVariant(row, variantSchema)
+      } else {
+        SparkShreddingUtils.assembleVariantStruct(row, variantSchema, fieldsToExtract)
+      }
+      updater.set(v)
+    }
+
+    override def start(): Unit = {
+      fileConverter.start()
+      currentRow = null
+    }
+  }
+
   private trait RepeatedConverter {
     private[this] val currentArray = ArrayBuffer.empty[Any]
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
index 350d42c8efd76..daeb8e88a924b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
@@ -28,6 +28,7 @@ import org.apache.parquet.schema.Type.Repetition._
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.execution.datasources.VariantMetadata
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -179,7 +180,15 @@ class ParquetToSparkSchemaConverter(
     field match {
       case primitiveColumn: PrimitiveColumnIO => convertPrimitiveField(primitiveColumn, targetType)
       case groupColumn: GroupColumnIO if targetType.contains(VariantType) =>
-        convertVariantField(groupColumn)
+        if (SQLConf.get.getConf(SQLConf.VARIANT_ALLOW_READING_SHREDDED)) {
+          val col = convertGroupField(groupColumn)
+          col.copy(sparkType = VariantType, variantFileType = Some(col))
+        } else {
+          convertVariantField(groupColumn)
+        }
+      case groupColumn: GroupColumnIO if targetType.exists(VariantMetadata.isVariantStruct) =>
+        val col = convertGroupField(groupColumn)
+        col.copy(sparkType = targetType.get, variantFileType = Some(col))
       case groupColumn: GroupColumnIO => convertGroupField(groupColumn, targetType)
     }
   }
@@ -747,6 +756,14 @@ class SparkToParquetSchemaConverter(
           .addField(convertField(StructField("metadata", BinaryType, nullable = false)))
           .named(field.name)
 
+      case s: StructType if SparkShreddingUtils.isVariantShreddingStruct(s) =>
+        // Variant struct takes a Variant and writes to Parquet as a shredded schema.
+        val group = Types.buildGroup(repetition)
+        s.fields.foreach { f =>
+          group.addField(convertField(f))
+        }
+        group.named(field.name)
+
       case StructType(fields) =>
         fields.foldLeft(Types.buildGroup(repetition)) { (builder, field) =>
           builder.addField(convertField(field))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
index 3e111252bc6fe..663182d8d1820 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
@@ -45,7 +45,7 @@ import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, Outpu
 import org.apache.spark.sql.execution.datasources.v2.V2ColumnUtils
 import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
 import org.apache.spark.sql.internal.SQLConf.PARQUET_AGGREGATE_PUSHDOWN_ENABLED
-import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, MapType, StructField, StructType, UserDefinedType}
+import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, MapType, StructField, StructType, UserDefinedType, VariantType}
 import org.apache.spark.util.ArrayImplicits._
 
 object ParquetUtils extends Logging {
@@ -420,6 +420,22 @@ object ParquetUtils extends Logging {
     statistics.getNumNulls;
   }
 
+  // Replaces each VariantType in the schema with the corresponding type in the shredding schema.
+  // Used for testing, where we force a single shredding schema for all Variant fields.
+  // Does not touch Variant fields nested in arrays, maps, or UDTs.
+  private def replaceVariantTypes(schema: StructType, shreddingSchema: StructType): StructType = {
+    val newFields = schema.fields.zip(shreddingSchema.fields).map {
+      case (field, shreddingField) =>
+        field.dataType match {
+          case s: StructType =>
+            field.copy(dataType = replaceVariantTypes(s, shreddingSchema))
+          case VariantType => field.copy(dataType = shreddingSchema)
+          case _ => field
+        }
+    }
+    StructType(newFields)
+  }
+
   def prepareWrite(
       sqlConf: SQLConf,
       job: Job,
@@ -454,8 +470,23 @@ object ParquetUtils extends Logging {
 
     ParquetOutputFormat.setWriteSupportClass(job, classOf[ParquetWriteSupport])
 
+    val shreddingSchema = if (sqlConf.getConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED) &&
+        !sqlConf.getConf(SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST).isEmpty) {
+      // Convert the schema to a shredding schema, and replace it anywhere that there is a
+      // VariantType in the original schema.
+      val simpleShreddingSchema = DataType.fromDDL(
+        sqlConf.getConf(SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST)
+      )
+      val oneShreddingSchema = SparkShreddingUtils.variantShreddingSchema(simpleShreddingSchema)
+      val schemaWithMetadata = SparkShreddingUtils.addWriteShreddingMetadata(oneShreddingSchema)
+      Some(replaceVariantTypes(dataSchema, schemaWithMetadata))
+    } else {
+      None
+    }
+
     // This metadata is useful for keeping UDTs like Vector/Matrix.
     ParquetWriteSupport.setSchema(dataSchema, conf)
+    shreddingSchema.foreach(ParquetWriteSupport.setShreddingSchema(_, conf))
 
     // Sets flags for `ParquetWriteSupport`, which converts Catalyst schema to Parquet
     // schema and writes actual rows to Parquet files.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
index 89a1cd5d4375a..35eb57a2e4fb2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.datasources.DataSourceUtils
 import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
 import org.apache.spark.sql.types._
+import org.apache.spark.types.variant.Variant
 
 /**
  * A Parquet [[WriteSupport]] implementation that writes Catalyst [[InternalRow]]s as Parquet
@@ -59,6 +60,10 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
   // Schema of the `InternalRow`s to be written
   private var schema: StructType = _
 
+  // Schema of the `InternalRow`s to be written, with VariantType replaced with its shredding
+  // schema, if appropriate.
+  private var shreddedSchema: StructType = _
+
   // `ValueWriter`s for all fields of the schema
   private var rootFieldWriters: Array[ValueWriter] = _
 
@@ -95,7 +100,16 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
 
   override def init(configuration: Configuration): WriteContext = {
     val schemaString = configuration.get(ParquetWriteSupport.SPARK_ROW_SCHEMA)
+    val shreddedSchemaString = configuration.get(ParquetWriteSupport.SPARK_VARIANT_SHREDDING_SCHEMA)
     this.schema = StructType.fromString(schemaString)
+    // If shreddingSchemaString is provided, we use that everywhere in the writer, except for
+    // setting the spark schema in the Parquet metadata. If it isn't provided, it means that there
+    // are no shredded Variant columns, so it is identical to this.schema.
+    this.shreddedSchema = if (shreddedSchemaString == null) {
+      this.schema
+    } else {
+      StructType.fromString(shreddedSchemaString)
+    }
     this.writeLegacyParquetFormat = {
       // `SQLConf.PARQUET_WRITE_LEGACY_FORMAT` should always be explicitly set in ParquetRelation
       assert(configuration.get(SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key) != null)
@@ -108,9 +122,9 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
       SQLConf.ParquetOutputTimestampType.withName(configuration.get(key))
     }
 
-    this.rootFieldWriters = schema.map(_.dataType).map(makeWriter).toArray[ValueWriter]
+    this.rootFieldWriters = shreddedSchema.map(_.dataType).map(makeWriter).toArray[ValueWriter]
 
-    val messageType = new SparkToParquetSchemaConverter(configuration).convert(schema)
+    val messageType = new SparkToParquetSchemaConverter(configuration).convert(shreddedSchema)
     val metadata = Map(
       SPARK_VERSION_METADATA_KEY -> SPARK_VERSION_SHORT,
       ParquetReadSupport.SPARK_METADATA_KEY -> schemaString
@@ -132,13 +146,23 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
       }
     }
 
-    logDebug(
-      s"""Initialized Parquet WriteSupport with Catalyst schema:
-         |${schema.prettyJson}
-         |and corresponding Parquet message type:
-         |$messageType
-       """.stripMargin)
-
+    if (shreddedSchemaString == null) {
+      logDebug(
+        s"""Initialized Parquet WriteSupport with Catalyst schema:
+           |${schema.prettyJson}
+           |and corresponding Parquet message type:
+           |$messageType
+         """.stripMargin)
+    } else {
+      logDebug(
+        s"""Initialized Parquet WriteSupport with Catalyst schema:
+           |${schema.prettyJson}
+           |and shredding schema:
+           |${shreddedSchema.prettyJson}
+           |and corresponding Parquet message type:
+           |$messageType
+         """.stripMargin)
+    }
     new WriteContext(messageType, metadata.asJava)
   }
 
@@ -148,7 +172,7 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
 
   override def write(row: InternalRow): Unit = {
     consumeMessage {
-      writeFields(row, schema, rootFieldWriters)
+      writeFields(row, shreddedSchema, rootFieldWriters)
     }
   }
 
@@ -250,6 +274,17 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
             }
           }
 
+      case s: StructType if SparkShreddingUtils.isVariantShreddingStruct(s) =>
+        val fieldWriters = s.map(_.dataType).map(makeWriter).toArray[ValueWriter]
+        val variantShreddingSchema = SparkShreddingUtils.buildVariantSchema(s)
+        (row: SpecializedGetters, ordinal: Int) =>
+          val v = row.getVariant(ordinal)
+          val variant = new Variant(v.getValue, v.getMetadata)
+          val shreddedValues = SparkShreddingUtils.castShredded(variant, variantShreddingSchema)
+          consumeGroup {
+            writeFields(shreddedValues, s, fieldWriters)
+          }
+
       case t: StructType =>
         val fieldWriters = t.map(_.dataType).map(makeWriter).toArray[ValueWriter]
         (row: SpecializedGetters, ordinal: Int) =>
@@ -499,6 +534,10 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
 
 object ParquetWriteSupport {
   val SPARK_ROW_SCHEMA: String = "org.apache.spark.sql.parquet.row.attributes"
+  // A version of `SPARK_ROW_SCHEMA`, where one or more Variant attributes have been replace with a
+  // shredded struct schema.
+  val SPARK_VARIANT_SHREDDING_SCHEMA: String =
+    "org.apache.spark.sql.parquet.variant.shredding.attributes"
 
   def setSchema(schema: StructType, configuration: Configuration): Unit = {
     configuration.set(SPARK_ROW_SCHEMA, schema.json)
@@ -506,4 +545,8 @@ object ParquetWriteSupport {
       ParquetOutputFormat.WRITER_VERSION,
       ParquetProperties.WriterVersion.PARQUET_1_0.toString)
   }
+
+  def setShreddingSchema(shreddingSchema: StructType, configuration: Configuration): Unit = {
+    configuration.set(SPARK_VARIANT_SHREDDING_SCHEMA, shreddingSchema.json)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala
index 2b81668b88b87..ffb6704061e66 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala
@@ -17,19 +17,438 @@
 
 package org.apache.spark.sql.execution.datasources.parquet
 
+import org.apache.parquet.io.ColumnIOFactory
+import org.apache.parquet.schema.{Type => ParquetType, Types => ParquetTypes}
+
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.util.GenericArrayData
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.catalyst.expressions.codegen._
+import org.apache.spark.sql.catalyst.expressions.codegen.Block._
+import org.apache.spark.sql.catalyst.expressions.variant._
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.execution.RowToColumnConverter
+import org.apache.spark.sql.execution.datasources.VariantMetadata
+import org.apache.spark.sql.execution.vectorized.WritableColumnVector
 import org.apache.spark.sql.types._
 import org.apache.spark.types.variant._
+import org.apache.spark.types.variant.VariantUtil.Type
 import org.apache.spark.unsafe.types._
 
+case class SparkShreddedRow(row: SpecializedGetters) extends ShreddingUtils.ShreddedRow {
+  override def isNullAt(ordinal: Int): Boolean = row.isNullAt(ordinal)
+  override def getBoolean(ordinal: Int): Boolean = row.getBoolean(ordinal)
+  override def getByte(ordinal: Int): Byte = row.getByte(ordinal)
+  override def getShort(ordinal: Int): Short = row.getShort(ordinal)
+  override def getInt(ordinal: Int): Int = row.getInt(ordinal)
+  override def getLong(ordinal: Int): Long = row.getLong(ordinal)
+  override def getFloat(ordinal: Int): Float = row.getFloat(ordinal)
+  override def getDouble(ordinal: Int): Double = row.getDouble(ordinal)
+  override def getDecimal(ordinal: Int, precision: Int, scale: Int): java.math.BigDecimal =
+    row.getDecimal(ordinal, precision, scale).toJavaBigDecimal
+  override def getString(ordinal: Int): String = row.getUTF8String(ordinal).toString
+  override def getBinary(ordinal: Int): Array[Byte] = row.getBinary(ordinal)
+  override def getStruct(ordinal: Int, numFields: Int): SparkShreddedRow =
+    SparkShreddedRow(row.getStruct(ordinal, numFields))
+  override def getArray(ordinal: Int): SparkShreddedRow =
+    SparkShreddedRow(row.getArray(ordinal))
+  override def numElements(): Int = row.asInstanceOf[ArrayData].numElements()
+}
+
+// The search result of a `VariantPathSegment` in a `VariantSchema`.
+case class SchemaPathSegment(
+    rawPath: VariantPathSegment,
+    // Whether this path segment is an object or array extraction.
+    isObject: Boolean,
+    // `schema.typedIdx`, if the path exists in the schema (for object extraction, the schema
+    // should contain an object `typed_value` containing the requested field; similar for array
+    // extraction). Negative otherwise.
+    typedIdx: Int,
+    // For object extraction, it is the index of the desired field in `schema.objectSchema`. If the
+    // requested field doesn't exist, both `extractionIdx/typedIdx` are set to negative.
+    // For array extraction, it is the array index. The information is already stored in `rawPath`,
+    // but accessing a raw int should be more efficient than `rawPath`, which is an `Either`.
+    extractionIdx: Int)
+
+// Represent a single field in a variant struct (see `VariantMetadata` for definition), that is, a
+// single requested field that the scan should produce by extracting from the variant column.
+case class FieldToExtract(path: Array[SchemaPathSegment], reader: ParquetVariantReader)
+
+// A helper class to cast from scalar `typed_value` into a scalar `dataType`. Need a custom
+// expression because it has different error reporting code than `Cast`.
+case class ScalarCastHelper(
+    child: Expression,
+    dataType: DataType,
+    castArgs: VariantCastArgs) extends UnaryExpression {
+  // The expression is only for the internal use of `ScalarReader`, which can guarantee the child
+  // is not nullable.
+  assert(!child.nullable)
+
+  // If `cast` is null, it means the cast always fails because the type combination is not allowed.
+  private val cast = if (Cast.canAnsiCast(child.dataType, dataType)) {
+    Cast(child, dataType, castArgs.zoneStr, EvalMode.TRY)
+  } else {
+    null
+  }
+  // Cast the input to string. Only used for reporting an invalid cast.
+  private val castToString = Cast(child, StringType, castArgs.zoneStr, EvalMode.ANSI)
+
+  override def nullable: Boolean = !castArgs.failOnError
+  override def withNewChildInternal(newChild: Expression): UnaryExpression = copy(child = newChild)
+
+  // No need to define the interpreted version of `eval`: the codegen must succeed.
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    // Throw an error or do nothing, depending on `castArgs.failOnError`.
+    val invalidCastCode = if (castArgs.failOnError) {
+      val castToStringCode = castToString.genCode(ctx)
+      val typeObj = ctx.addReferenceObj("dataType", dataType)
+      val cls = classOf[ScalarCastHelper].getName
+      s"""
+        ${castToStringCode.code}
+        $cls.throwInvalidVariantCast(${castToStringCode.value}, $typeObj);
+      """
+    } else {
+      ""
+    }
+    val customCast = (child.dataType, dataType) match {
+      case (_: LongType, _: TimestampType) => "castLongToTimestamp"
+      case (_: DecimalType, _: TimestampType) => "castDecimalToTimestamp"
+      case (_: DecimalType, _: StringType) => "castDecimalToString"
+      case _ => null
+    }
+    if (customCast != null) {
+      val childCode = child.genCode(ctx)
+      // We can avoid the try-catch block for decimal -> string, but the performance benefit is
+      // little. We can also be more specific in the exception type, like catching
+      // `ArithmeticException` instead of `Exception`, but it is unnecessary. The `try_cast` codegen
+      // also catches `Exception` instead of specific exceptions.
+      val code = code"""
+        ${childCode.code}
+        boolean ${ev.isNull} = false;
+        ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+        try {
+          ${ev.value} = ${classOf[VariantGet].getName}.$customCast(${childCode.value});
+        } catch (Exception e) {
+          ${ev.isNull} = true;
+          $invalidCastCode
+        }
+      """
+      ev.copy(code = code)
+    } else if (cast != null) {
+      val castCode = cast.genCode(ctx)
+      val code = code"""
+        ${castCode.code}
+        boolean ${ev.isNull} = ${castCode.isNull};
+        ${CodeGenerator.javaType(dataType)} ${ev.value} = ${castCode.value};
+        if (${ev.isNull}) { $invalidCastCode }
+      """
+      ev.copy(code = code)
+    } else {
+      val code = code"""
+        boolean ${ev.isNull} = true;
+        ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+        if (${ev.isNull}) { $invalidCastCode }
+      """
+      ev.copy(code = code)
+    }
+  }
+}
+
+object ScalarCastHelper {
+  // A helper function for codegen. The java compiler doesn't allow throwing a `Throwable` in a
+  // method without `throws` annotation.
+  def throwInvalidVariantCast(value: UTF8String, dataType: DataType): Any =
+    throw QueryExecutionErrors.invalidVariantCast(value.toString, dataType)
+}
+
+// The base class to read Parquet variant values into a Spark type.
+// For convenience, we also allow creating an instance of the base class itself. None of its
+// functions can be used, but it can serve as a container of `targetType` and `castArgs`.
+class ParquetVariantReader(
+    val schema: VariantSchema, val targetType: DataType, val castArgs: VariantCastArgs) {
+  // Read from a row containing a Parquet variant value (shredded or unshredded) and return a value
+  // of `targetType`. The row schema is described by `schema`.
+  // This function throws MALFORMED_VARIANT if the variant is missing. If the variant can be
+  // legally missing (the only possible situation is struct fields in object `typed_value`), the
+  // caller should check for it and avoid calling this function if the variant is missing.
+  def read(row: InternalRow, topLevelMetadata: Array[Byte]): Any = {
+    if (schema.typedIdx < 0 || row.isNullAt(schema.typedIdx)) {
+      if (schema.variantIdx < 0 || row.isNullAt(schema.variantIdx)) {
+        // Both `typed_value` and `value` are null, meaning the variant is missing.
+        throw QueryExecutionErrors.malformedVariant()
+      }
+      val v = new Variant(row.getBinary(schema.variantIdx), topLevelMetadata)
+      VariantGet.cast(v, targetType, castArgs)
+    } else {
+      readFromTyped(row, topLevelMetadata)
+    }
+  }
+
+  // Subclasses should override it to produce the read result when `typed_value` is not null.
+  protected def readFromTyped(row: InternalRow, topLevelMetadata: Array[Byte]): Any =
+    throw QueryExecutionErrors.unreachableError()
+
+  // A util function to rebuild the variant in binary format from a Parquet variant value.
+  protected final def rebuildVariant(row: InternalRow, topLevelMetadata: Array[Byte]): Variant = {
+    val builder = new VariantBuilder(false)
+    ShreddingUtils.rebuild(SparkShreddedRow(row), topLevelMetadata, schema, builder)
+    builder.result()
+  }
+
+  // A util function to throw error or return null when an invalid cast happens.
+  protected final def invalidCast(row: InternalRow, topLevelMetadata: Array[Byte]): Any = {
+    if (castArgs.failOnError) {
+      throw QueryExecutionErrors.invalidVariantCast(
+        rebuildVariant(row, topLevelMetadata).toJson(castArgs.zoneId), targetType)
+    } else {
+      null
+    }
+  }
+}
+
+object ParquetVariantReader {
+  // Create a reader for `targetType`. If `schema` is null, meaning that the extraction path doesn't
+  // exist in `typed_value`, it returns an instance of `ParquetVariantReader`. As described in the
+  // class comment, the reader is only a container of `targetType` and `castArgs` in this case.
+  def apply(schema: VariantSchema, targetType: DataType, castArgs: VariantCastArgs,
+            isTopLevelUnshredded: Boolean = false): ParquetVariantReader = targetType match {
+    case _ if schema == null => new ParquetVariantReader(schema, targetType, castArgs)
+    case s: StructType => new StructReader(schema, s, castArgs)
+    case a: ArrayType => new ArrayReader(schema, a, castArgs)
+    case m@MapType(_: StringType, _, _) => new MapReader(schema, m, castArgs)
+    case v: VariantType => new VariantReader(schema, v, castArgs, isTopLevelUnshredded)
+    case s: AtomicType => new ScalarReader(schema, s, castArgs)
+    case _ =>
+      // Type check should have rejected map with non-string type.
+      throw QueryExecutionErrors.unreachableError(s"Invalid target type: `${targetType.sql}`")
+  }
+}
+
+// Read Parquet variant values into a Spark struct type. It reads unshredded fields (fields that are
+// not in the typed object) from the `value`, and reads the shredded fields from the object
+// `typed_value`.
+// `value` must not contain any shredded field according to the shredding spec, but this requirement
+// is not enforced. If `value` does contain a shredded field, no error will occur, and the field in
+// object `typed_value` will be the final result.
+private[this] final class StructReader(
+  schema: VariantSchema, targetType: StructType, castArgs: VariantCastArgs)
+  extends ParquetVariantReader(schema, targetType, castArgs) {
+  // For each field in `targetType`, store the index of the field with the same name in object
+  // `typed_value`, or -1 if it doesn't exist in object `typed_value`.
+  private[this] val fieldInputIndices: Array[Int] = targetType.fields.map { f =>
+    val inputIdx = if (schema.objectSchemaMap != null) schema.objectSchemaMap.get(f.name) else null
+    if (inputIdx != null) inputIdx.intValue() else -1
+  }
+  // For each field in `targetType`, store the reader from the corresponding field in object
+  // `typed_value`, or null if it doesn't exist in object `typed_value`.
+  private[this] val fieldReaders: Array[ParquetVariantReader] =
+    targetType.fields.zip(fieldInputIndices).map { case (f, inputIdx) =>
+      if (inputIdx >= 0) {
+        val fieldSchema = schema.objectSchema(inputIdx).schema
+        ParquetVariantReader(fieldSchema, f.dataType, castArgs)
+      } else {
+        null
+      }
+    }
+  // If all fields in `targetType` can be found in object `typed_value`, then the reader doesn't
+  // need to read from `value`.
+  private[this] val needUnshreddedObject: Boolean = fieldInputIndices.exists(_ < 0)
+
+  override def readFromTyped(row: InternalRow, topLevelMetadata: Array[Byte]): Any = {
+    if (schema.objectSchema == null) return invalidCast(row, topLevelMetadata)
+    val obj = row.getStruct(schema.typedIdx, schema.objectSchema.length)
+    val result = new GenericInternalRow(fieldInputIndices.length)
+    var unshreddedObject: Variant = null
+    if (needUnshreddedObject && schema.variantIdx >= 0 && !row.isNullAt(schema.variantIdx)) {
+      unshreddedObject = new Variant(row.getBinary(schema.variantIdx), topLevelMetadata)
+      if (unshreddedObject.getType != Type.OBJECT) throw QueryExecutionErrors.malformedVariant()
+    }
+    val numFields = fieldInputIndices.length
+    var i = 0
+    while (i < numFields) {
+      val inputIdx = fieldInputIndices(i)
+      if (inputIdx >= 0) {
+        // Shredded field must not be null.
+        if (obj.isNullAt(inputIdx)) throw QueryExecutionErrors.malformedVariant()
+        val fieldSchema = schema.objectSchema(inputIdx).schema
+        val fieldInput = obj.getStruct(inputIdx, fieldSchema.numFields)
+        // Only read from the shredded field if it is not missing.
+        if ((fieldSchema.typedIdx >= 0 && !fieldInput.isNullAt(fieldSchema.typedIdx)) ||
+          (fieldSchema.variantIdx >= 0 && !fieldInput.isNullAt(fieldSchema.variantIdx))) {
+          result.update(i, fieldReaders(i).read(fieldInput, topLevelMetadata))
+        }
+      } else if (unshreddedObject != null) {
+        val fieldName = targetType.fields(i).name
+        val fieldType = targetType.fields(i).dataType
+        val unshreddedField = unshreddedObject.getFieldByKey(fieldName)
+        if (unshreddedField != null) {
+          result.update(i, VariantGet.cast(unshreddedField, fieldType, castArgs))
+        }
+      }
+      i += 1
+    }
+    result
+  }
+}
+
+// Read Parquet variant values into a Spark array type.
+private[this] final class ArrayReader(
+    schema: VariantSchema, targetType: ArrayType, castArgs: VariantCastArgs)
+  extends ParquetVariantReader(schema, targetType, castArgs) {
+  private[this] val elementReader = if (schema.arraySchema != null) {
+    ParquetVariantReader(schema.arraySchema, targetType.elementType, castArgs)
+  } else {
+    null
+  }
+
+  override def readFromTyped(row: InternalRow, topLevelMetadata: Array[Byte]): Any = {
+    if (schema.arraySchema == null) return invalidCast(row, topLevelMetadata)
+    val elementNumFields = schema.arraySchema.numFields
+    val arr = row.getArray(schema.typedIdx)
+    val size = arr.numElements()
+    val result = new Array[Any](size)
+    var i = 0
+    while (i < size) {
+      // Shredded array element must not be null.
+      if (arr.isNullAt(i)) throw QueryExecutionErrors.malformedVariant()
+      result(i) = elementReader.read(arr.getStruct(i, elementNumFields), topLevelMetadata)
+      i += 1
+    }
+    new GenericArrayData(result)
+  }
+}
+
+// Read Parquet variant values into a Spark map type with string key type. The input must be object
+// for a valid cast. The resulting map contains shredded fields from object `typed_value` and
+// unshredded fields from object `value`.
+// `value` must not contain any shredded field according to the shredding spec. Unlike
+// `StructReader`, this requirement is enforced in `MapReader`. If `value` does contain a shredded
+// field, throw a MALFORMED_VARIANT error. The purpose is to avoid duplicate map keys.
+private[this] final class MapReader(
+    schema: VariantSchema, targetType: MapType, castArgs: VariantCastArgs)
+  extends ParquetVariantReader(schema, targetType, castArgs) {
+  // Readers that convert each shredded field into the map value type.
+  private[this] val valueReaders = if (schema.objectSchema != null) {
+    schema.objectSchema.map { f =>
+      ParquetVariantReader(f.schema, targetType.valueType, castArgs)
+    }
+  } else {
+    null
+  }
+  // `UTF8String` representation of shredded field names. Do the `String -> UTF8String` once, so
+  // that `readFromTyped` doesn't need to do it repeatedly.
+  private[this] val shreddedFieldNames = if (schema.objectSchema != null) {
+    schema.objectSchema.map { f => UTF8String.fromString(f.fieldName) }
+  } else {
+    null
+  }
+
+  override def readFromTyped(row: InternalRow, topLevelMetadata: Array[Byte]): Any = {
+    if (schema.objectSchema == null) return invalidCast(row, topLevelMetadata)
+    val obj = row.getStruct(schema.typedIdx, schema.objectSchema.length)
+    val numShreddedFields = valueReaders.length
+    var unshreddedObject: Variant = null
+    if (schema.variantIdx >= 0 && !row.isNullAt(schema.variantIdx)) {
+      unshreddedObject = new Variant(row.getBinary(schema.variantIdx), topLevelMetadata)
+      if (unshreddedObject.getType != Type.OBJECT) throw QueryExecutionErrors.malformedVariant()
+    }
+    val numUnshreddedFields = if (unshreddedObject != null) unshreddedObject.objectSize() else 0
+    var keyArray = new Array[UTF8String](numShreddedFields + numUnshreddedFields)
+    var valueArray = new Array[Any](numShreddedFields + numUnshreddedFields)
+    var mapLength = 0
+    var i = 0
+    while (i < numShreddedFields) {
+      // Shredded field must not be null.
+      if (obj.isNullAt(i)) throw QueryExecutionErrors.malformedVariant()
+      val fieldSchema = schema.objectSchema(i).schema
+      val fieldInput = obj.getStruct(i, fieldSchema.numFields)
+      // Only add the shredded field to map if it is not missing.
+      if ((fieldSchema.typedIdx >= 0 && !fieldInput.isNullAt(fieldSchema.typedIdx)) ||
+        (fieldSchema.variantIdx >= 0 && !fieldInput.isNullAt(fieldSchema.variantIdx))) {
+        keyArray(mapLength) = shreddedFieldNames(i)
+        valueArray(mapLength) = valueReaders(i).read(fieldInput, topLevelMetadata)
+        mapLength += 1
+      }
+      i += 1
+    }
+    i = 0
+    while (i < numUnshreddedFields) {
+      val field = unshreddedObject.getFieldAtIndex(i)
+      if (schema.objectSchemaMap.containsKey(field.key)) {
+        throw QueryExecutionErrors.malformedVariant()
+      }
+      keyArray(mapLength) = UTF8String.fromString(field.key)
+      valueArray(mapLength) = VariantGet.cast(field.value, targetType.valueType, castArgs)
+      mapLength += 1
+      i += 1
+    }
+    // Need to shrink the arrays if there are missing shredded fields.
+    if (mapLength < keyArray.length) {
+      keyArray = keyArray.slice(0, mapLength)
+      valueArray = valueArray.slice(0, mapLength)
+    }
+    ArrayBasedMapData(keyArray, valueArray)
+  }
+}
+
+// Read Parquet variant values into a Spark variant type (the binary format).
+private[this] final class VariantReader(
+    schema: VariantSchema, targetType: DataType, castArgs: VariantCastArgs,
+    // An optional optimization: the user can set it to true if the Parquet variant column is
+    // unshredded and the extraction path is empty. We are not required to do anything special, bu
+    // we can avoid rebuilding variant for optimization purpose.
+    private[this] val isTopLevelUnshredded: Boolean)
+  extends ParquetVariantReader(schema, targetType, castArgs) {
+  override def read(row: InternalRow, topLevelMetadata: Array[Byte]): Any = {
+    if (isTopLevelUnshredded) {
+      if (row.isNullAt(schema.variantIdx)) throw QueryExecutionErrors.malformedVariant()
+      return new VariantVal(row.getBinary(schema.variantIdx), topLevelMetadata)
+    }
+    val v = rebuildVariant(row, topLevelMetadata)
+    new VariantVal(v.getValue, v.getMetadata)
+  }
+}
+
+// Read Parquet variant values into a Spark scalar type. When `typed_value` is not null but not a
+// scalar, all other target types should return an invalid cast, but only the string target type can
+// still build a string from array/object `typed_value`. For scalar `typed_value`, it depends on
+// `ScalarCastHelper` to perform the cast.
+// According to the shredding spec, scalar `typed_value` and `value` must not be non-null at the
+// same time. The requirement is not enforced in this reader. If they are both non-null, no error
+// will occur, and the reader will read from `typed_value`.
+private[this] final class ScalarReader(
+    schema: VariantSchema, targetType: DataType, castArgs: VariantCastArgs)
+  extends ParquetVariantReader(schema, targetType, castArgs) {
+  private[this] val castProject = if (schema.scalarSchema != null) {
+    val scalarType = SparkShreddingUtils.scalarSchemaToSparkType(schema.scalarSchema)
+    // Read the cast input from ordinal `schema.typedIdx` in the input row. The cast input is never
+    // null, because `readFromTyped` is only called when `typed_value` is not null.
+    val input = BoundReference(schema.typedIdx, scalarType, nullable = false)
+    MutableProjection.create(Seq(ScalarCastHelper(input, targetType, castArgs)))
+  } else {
+    null
+  }
+
+  override def readFromTyped(row: InternalRow, topLevelMetadata: Array[Byte]): Any = {
+    if (castProject == null) {
+      return if (targetType.isInstanceOf[StringType]) {
+        UTF8String.fromString(rebuildVariant(row, topLevelMetadata).toJson(castArgs.zoneId))
+      } else {
+        invalidCast(row, topLevelMetadata)
+      }
+    }
+    val result = castProject(row)
+    if (result.isNullAt(0)) null else result.get(0, targetType)
+  }
+}
+
 case object SparkShreddingUtils {
   val VariantValueFieldName = "value";
   val TypedValueFieldName = "typed_value";
   val MetadataFieldName = "metadata";
 
+  val VARIANT_WRITE_SHREDDING_KEY: String = "__VARIANT_WRITE_SHREDDING_KEY"
+
   def buildVariantSchema(schema: DataType): VariantSchema = {
     schema match {
       case s: StructType => buildVariantSchema(s, topLevel = true)
@@ -53,16 +472,21 @@ case object SparkShreddingUtils {
    */
   def variantShreddingSchema(dataType: DataType, isTopLevel: Boolean = true): StructType = {
     val fields = dataType match {
-      case ArrayType(elementType, containsNull) =>
+      case ArrayType(elementType, _) =>
+        // Always set containsNull to false. One of value or typed_value must always be set for
+        // array elements.
         val arrayShreddingSchema =
-          ArrayType(variantShreddingSchema(elementType, false), containsNull)
+          ArrayType(variantShreddingSchema(elementType, false), containsNull = false)
         Seq(
           StructField(VariantValueFieldName, BinaryType, nullable = true),
           StructField(TypedValueFieldName, arrayShreddingSchema, nullable = true)
         )
       case StructType(fields) =>
+        // The field name level is always non-nullable: Variant null values are represented in the
+        // "value" columna as "00", and missing values are represented by setting both "value" and
+        // "typed_value" to null.
         val objectShreddingSchema = StructType(fields.map(f =>
-            f.copy(dataType = variantShreddingSchema(f.dataType, false))))
+            f.copy(dataType = variantShreddingSchema(f.dataType, false), nullable = false)))
         Seq(
           StructField(VariantValueFieldName, BinaryType, nullable = true),
           StructField(TypedValueFieldName, objectShreddingSchema, nullable = true)
@@ -89,6 +513,27 @@ case object SparkShreddingUtils {
     }
   }
 
+  /**
+   * Given a schema that represents a valid shredding schema (e.g. constructed by
+   * SparkShreddingUtils.variantShreddingSchema), add metadata to the top-level fields to mark it
+   * as a shredding schema for writers.
+   */
+  def addWriteShreddingMetadata(schema: StructType): StructType = {
+    val newFields = schema.fields.map { f =>
+      f.copy(metadata = new
+          MetadataBuilder()
+            .withMetadata(f.metadata)
+            .putNull(VARIANT_WRITE_SHREDDING_KEY).build())
+    }
+    StructType(newFields)
+  }
+
+  // Check if the struct is marked with metadata set by addWriteShreddingMetadata - i.e. it
+  // represents a Variant converted to a shredding schema for writing.
+  def isVariantShreddingStruct(s: StructType): Boolean = {
+    s.fields.length > 0 && s.fields.forall(_.metadata.contains(VARIANT_WRITE_SHREDDING_KEY))
+  }
+
   /*
    * Given a Spark schema that represents a valid shredding schema (e.g. constructed by
    * SparkShreddingUtils.variantShreddingSchema), return the corresponding VariantSchema.
@@ -101,6 +546,11 @@ case object SparkShreddingUtils {
     var objectSchema: Array[VariantSchema.ObjectField] = null
     var arraySchema: VariantSchema = null
 
+    // The struct must not be empty or contain duplicate field names. The latter is enforced in the
+    // loop below (`if (typedIdx != -1)` and other similar checks).
+    if (schema.fields.isEmpty) {
+      throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
+    }
     schema.fields.zipWithIndex.foreach { case (f, i) =>
       f.name match {
         case TypedValueFieldName =>
@@ -110,8 +560,11 @@ case object SparkShreddingUtils {
           typedIdx = i
           f.dataType match {
             case StructType(fields) =>
-              objectSchema =
-                  new Array[VariantSchema.ObjectField](fields.length)
+              // The struct must not be empty or contain duplicate field names.
+              if (fields.isEmpty || fields.map(_.name).distinct.length != fields.length) {
+                throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
+              }
+              objectSchema = new Array[VariantSchema.ObjectField](fields.length)
               fields.zipWithIndex.foreach { case (field, fieldIdx) =>
                 field.dataType match {
                   case s: StructType =>
@@ -163,6 +616,32 @@ case object SparkShreddingUtils {
       scalarSchema, objectSchema, arraySchema)
   }
 
+  // Convert a scalar variant schema into a Spark scalar type.
+  def scalarSchemaToSparkType(scalar: VariantSchema.ScalarType): DataType = scalar match {
+    case _: VariantSchema.StringType => StringType
+    case it: VariantSchema.IntegralType => it.size match {
+      case VariantSchema.IntegralSize.BYTE => ByteType
+      case VariantSchema.IntegralSize.SHORT => ShortType
+      case VariantSchema.IntegralSize.INT => IntegerType
+      case VariantSchema.IntegralSize.LONG => LongType
+    }
+    case _: VariantSchema.FloatType => FloatType
+    case _: VariantSchema.DoubleType => DoubleType
+    case _: VariantSchema.BooleanType => BooleanType
+    case _: VariantSchema.BinaryType => BinaryType
+    case dt: VariantSchema.DecimalType => DecimalType(dt.precision, dt.scale)
+    case _: VariantSchema.DateType => DateType
+    case _: VariantSchema.TimestampType => TimestampType
+    case _: VariantSchema.TimestampNTZType => TimestampNTZType
+  }
+
+  // Convert a Parquet type into a Spark data type.
+  def parquetTypeToSparkType(parquetType: ParquetType): DataType = {
+    val messageType = ParquetTypes.buildMessage().addField(parquetType).named("foo")
+    val column = new ColumnIOFactory().getColumnIO(messageType)
+    new ParquetToSparkSchemaConverter().convertField(column.getChild(0)).sparkType
+  }
+
   class SparkShreddedResult(schema: VariantSchema) extends VariantShreddingWriter.ShreddedResult {
     // Result is stored as an InternalRow.
     val row = new GenericInternalRow(schema.numFields)
@@ -217,4 +696,188 @@ case object SparkShreddingUtils {
         .asInstanceOf[SparkShreddedResult]
         .row
   }
+
+  // Return a list of fields to extract. `targetType` must be either variant or variant struct.
+  // If it is variant, return null because the target is the full variant and there is no field to
+  // extract. If it is variant struct, return a list of fields matching the variant struct fields.
+  def getFieldsToExtract(targetType: DataType, inputSchema: VariantSchema): Array[FieldToExtract] =
+    targetType match {
+      case _: VariantType => null
+      case s: StructType if VariantMetadata.isVariantStruct(s) =>
+        s.fields.map { f =>
+          val metadata = VariantMetadata.fromMetadata(f.metadata)
+          val rawPath = metadata.parsedPath()
+          val schemaPath = new Array[SchemaPathSegment](rawPath.length)
+          var schema = inputSchema
+          // Search `rawPath` in `schema` to produce `schemaPath`. If a raw path segment cannot be
+          // found at a certain level of the file type, then `typedIdx` will be -1 starting from
+          // this position, and the final `schema` will be null.
+          for (i <- rawPath.indices) {
+            val isObject = rawPath(i).isInstanceOf[ObjectExtraction]
+            var typedIdx = -1
+            var extractionIdx = -1
+            rawPath(i) match {
+              case ObjectExtraction(key) if schema != null && schema.objectSchema != null =>
+                val fieldIdx = schema.objectSchemaMap.get(key)
+                if (fieldIdx != null) {
+                  typedIdx = schema.typedIdx
+                  extractionIdx = fieldIdx
+                  schema = schema.objectSchema(fieldIdx).schema
+                } else {
+                  schema = null
+                }
+              case ArrayExtraction(index) if schema != null && schema.arraySchema != null =>
+                typedIdx = schema.typedIdx
+                extractionIdx = index
+                schema = schema.arraySchema
+              case _ =>
+                schema = null
+            }
+            schemaPath(i) = SchemaPathSegment(rawPath(i), isObject, typedIdx, extractionIdx)
+          }
+          val reader = ParquetVariantReader(schema, f.dataType, VariantCastArgs(
+            metadata.failOnError,
+            Some(metadata.timeZoneId),
+            DateTimeUtils.getZoneId(metadata.timeZoneId)),
+            isTopLevelUnshredded = schemaPath.isEmpty && inputSchema.isUnshredded)
+          FieldToExtract(schemaPath, reader)
+        }
+      case _ =>
+        throw QueryExecutionErrors.unreachableError(s"Invalid target type: `${targetType.sql}`")
+    }
+
+  // Extract a single variant struct field from a Parquet variant value. It steps into `inputRow`
+  // according to the variant extraction path, and read the extracted value as the target type.
+  private def extractField(
+      inputRow: InternalRow,
+      topLevelMetadata: Array[Byte],
+      inputSchema: VariantSchema,
+      pathList: Array[SchemaPathSegment],
+      reader: ParquetVariantReader): Any = {
+    var pathIdx = 0
+    val pathLen = pathList.length
+    var row = inputRow
+    var schema = inputSchema
+    while (pathIdx < pathLen) {
+      val path = pathList(pathIdx)
+
+      if (path.typedIdx < 0) {
+        // The extraction doesn't exist in `typed_value`. Try to extract the remaining part of the
+        // path in `value`.
+        val variantIdx = schema.variantIdx
+        if (variantIdx < 0 || row.isNullAt(variantIdx)) return null
+        var v = new Variant(row.getBinary(variantIdx), topLevelMetadata)
+        while (pathIdx < pathLen) {
+          v = pathList(pathIdx).rawPath match {
+            case ObjectExtraction(key) if v.getType == Type.OBJECT => v.getFieldByKey(key)
+            case ArrayExtraction(index) if v.getType == Type.ARRAY => v.getElementAtIndex(index)
+            case _ => null
+          }
+          if (v == null) return null
+          pathIdx += 1
+        }
+        return VariantGet.cast(v, reader.targetType, reader.castArgs)
+      }
+
+      if (row.isNullAt(path.typedIdx)) return null
+      if (path.isObject) {
+        val obj = row.getStruct(path.typedIdx, schema.objectSchema.length)
+        // Object field must not be null.
+        if (obj.isNullAt(path.extractionIdx)) throw QueryExecutionErrors.malformedVariant()
+        schema = schema.objectSchema(path.extractionIdx).schema
+        row = obj.getStruct(path.extractionIdx, schema.numFields)
+        // Return null if the field is missing.
+        if ((schema.typedIdx < 0 || row.isNullAt(schema.typedIdx)) &&
+          (schema.variantIdx < 0 || row.isNullAt(schema.variantIdx))) {
+          return null
+        }
+      } else {
+        val arr = row.getArray(path.typedIdx)
+        // Return null if the extraction index is out of bound.
+        if (path.extractionIdx >= arr.numElements()) return null
+        // Array element must not be null.
+        if (arr.isNullAt(path.extractionIdx)) throw QueryExecutionErrors.malformedVariant()
+        schema = schema.arraySchema
+        row = arr.getStruct(path.extractionIdx, schema.numFields)
+      }
+      pathIdx += 1
+    }
+    reader.read(row, topLevelMetadata)
+  }
+
+  // Assemble a variant (binary format) from a Parquet variant value.
+  def assembleVariant(row: InternalRow, schema: VariantSchema): VariantVal = {
+    val v = ShreddingUtils.rebuild(SparkShreddedRow(row), schema)
+    new VariantVal(v.getValue, v.getMetadata)
+  }
+
+  // Assemble a variant struct, in which each field is extracted from the Parquet variant value.
+  def assembleVariantStruct(
+      inputRow: InternalRow,
+      schema: VariantSchema,
+      fields: Array[FieldToExtract]): InternalRow = {
+    if (inputRow.isNullAt(schema.topLevelMetadataIdx)) {
+      throw QueryExecutionErrors.malformedVariant()
+    }
+    val topLevelMetadata = inputRow.getBinary(schema.topLevelMetadataIdx)
+    val numFields = fields.length
+    val resultRow = new GenericInternalRow(numFields)
+    var fieldIdx = 0
+    while (fieldIdx < numFields) {
+      resultRow.update(fieldIdx, extractField(inputRow, topLevelMetadata, schema,
+        fields(fieldIdx).path, fields(fieldIdx).reader))
+      fieldIdx += 1
+    }
+    resultRow
+  }
+
+  // Assemble a batch of variant (binary format) from a batch of Parquet variant values.
+  def assembleVariantBatch(
+      input: WritableColumnVector,
+      output: WritableColumnVector,
+      schema: VariantSchema): Unit = {
+    val numRows = input.getElementsAppended
+    output.reset()
+    output.reserve(numRows)
+    val valueChild = output.getChild(0)
+    val metadataChild = output.getChild(1)
+    var i = 0
+    while (i < numRows) {
+      if (input.isNullAt(i)) {
+        output.appendStruct(true)
+      } else {
+        output.appendStruct(false)
+        val v = SparkShreddingUtils.assembleVariant(input.getStruct(i), schema)
+        valueChild.appendByteArray(v.getValue, 0, v.getValue.length)
+        metadataChild.appendByteArray(v.getMetadata, 0, v.getMetadata.length)
+      }
+      i += 1
+    }
+  }
+
+  // Assemble a batch of variant struct from a batch of Parquet variant values.
+  def assembleVariantStructBatch(
+      input: WritableColumnVector,
+      output: WritableColumnVector,
+      schema: VariantSchema,
+      fields: Array[FieldToExtract]): Unit = {
+    val numRows = input.getElementsAppended
+    output.reset()
+    output.reserve(numRows)
+    val converter = new RowToColumnConverter(StructType(Array(StructField("", output.dataType()))))
+    val converterVectors = Array(output)
+    val converterRow = new GenericInternalRow(1)
+    output.reset()
+    output.reserve(input.getElementsAppended)
+    var i = 0
+    while (i < numRows) {
+      if (input.isNullAt(i)) {
+        converterRow.update(0, null)
+      } else {
+        converterRow.update(0, assembleVariantStruct(input.getStruct(i), schema, fields))
+      }
+      converter.convert(converterRow, converterVectors)
+      i += 1
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index fcc3a257cd2dd..c78f9702557cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -22,6 +22,7 @@ import java.util.Locale
 import scala.collection.mutable.{HashMap, HashSet}
 import scala.jdk.CollectionConverters._
 
+import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog._
@@ -36,6 +37,7 @@ import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.command.ViewHelper.generateViewProperties
 import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1}
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.InsertableRelation
 import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec
@@ -46,14 +48,21 @@ import org.apache.spark.util.ArrayImplicits._
  * Replaces [[UnresolvedRelation]]s if the plan is for direct query on files.
  */
 class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+
+  override def conf: SQLConf = sparkSession.sessionState.conf
+
   object UnresolvedRelationResolution {
     def unapply(plan: LogicalPlan): Option[LogicalPlan] = {
-      plan match {
+      val result = plan match {
         case u: UnresolvedRelation if maybeSQLFile(u) =>
           try {
             val ds = resolveDataSource(u)
             Some(LogicalRelation(ds.resolveRelation()))
           } catch {
+            case e: SparkUnsupportedOperationException =>
+              u.failAnalysis(
+                errorClass = e.getCondition,
+                messageParameters = e.getMessageParameters.asScala.toMap)
             case _: ClassNotFoundException => None
             case e: Exception if !e.isInstanceOf[AnalysisException] =>
               // the provider is valid, but failed to create a logical plan
@@ -66,6 +75,17 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
         case _ =>
           None
       }
+      result.foreach(resolvedRelation => plan match {
+        case unresolvedRelation: UnresolvedRelation =>
+          // We put the resolved relation into the [[AnalyzerBridgeState]] for
+          // it to be later reused by the single-pass [[Resolver]] to avoid resolving the
+          // relation metadata twice.
+          AnalysisContext.get.getSinglePassResolverBridgeState.map { bridgeState =>
+            bridgeState.relationsWithResolvedMetadata.put(unresolvedRelation, resolvedRelation)
+          }
+        case _ =>
+      })
+      result
     }
   }
 
@@ -338,6 +358,9 @@ case class PreprocessTableCreation(catalog: SessionCatalog) extends Rule[Logical
     SchemaUtils.checkSchemaColumnNameDuplication(
       schema,
       conf.caseSensitiveAnalysis)
+    if (!conf.allowCollationsInMapKeys) {
+      SchemaUtils.checkNoCollationsInMapKeys(schema)
+    }
 
     val normalizedPartCols = normalizePartitionColumns(schema, table)
     val normalizedBucketSpec = normalizeBucketSpec(schema, table)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
index 56c44a1256815..86fa0c8523f1e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
@@ -89,6 +89,7 @@ case class CacheTableAsSelectExec(
       name = TableIdentifier(tempViewName),
       userSpecifiedColumns = Nil,
       comment = None,
+      collation = None,
       properties = Map.empty,
       originalText = Some(originalText),
       plan = query,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
index 9ffa0d728ca28..9c19609dce79a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
@@ -23,16 +23,19 @@ import scala.jdk.CollectionConverters._
 
 import com.fasterxml.jackson.databind.ObjectMapper
 
+import org.apache.spark.SparkContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.TimeTravelSpec
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SessionConfigSupport, SupportsCatalogOptions, SupportsRead, Table, TableProvider}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SessionConfigSupport, StagedTable, StagingTableCatalog, SupportsCatalogOptions, SupportsRead, Table, TableProvider}
 import org.apache.spark.sql.connector.catalog.TableCapability.BATCH_READ
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{LongType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -179,4 +182,34 @@ private[sql] object DataSourceV2Utils extends Logging {
       extraOptions + ("paths" -> objectMapper.writeValueAsString(paths.toArray))
     }
   }
+
+  /**
+   * If `table` is a StagedTable, commit the staged changes and report the commit metrics.
+   * Do nothing if the table is not a StagedTable.
+   */
+  def commitStagedChanges(
+      sparkContext: SparkContext, table: Table, metrics: Map[String, SQLMetric]): Unit = {
+    table match {
+      case stagedTable: StagedTable =>
+        stagedTable.commitStagedChanges()
+
+        val driverMetrics = stagedTable.reportDriverMetrics()
+        if (driverMetrics.nonEmpty) {
+          for (taskMetric <- driverMetrics) {
+            metrics.get(taskMetric.name()).foreach(_.set(taskMetric.value()))
+          }
+
+          val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+          SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
+        }
+      case _ =>
+    }
+  }
+
+  def commitMetrics(
+      sparkContext: SparkContext, tableCatalog: StagingTableCatalog): Map[String, SQLMetric] = {
+    tableCatalog.supportedCustomMetrics().map {
+      metric => metric.name() -> SQLMetrics.createV2CustomMetric(sparkContext, metric)
+    }.toMap
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
index 2679f14144569..be4f5dcb65aa1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
@@ -18,6 +18,9 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import java.io.{FileNotFoundException, IOException}
 
+import org.apache.hadoop.hdfs.BlockMissingException
+import org.apache.hadoop.security.AccessControlException
+
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.{CURRENT_FILE, PARTITIONED_FILE_READER}
 import org.apache.spark.rdd.InputFileBlockHolder
@@ -48,6 +51,8 @@ class FilePartitionReader[T](
           case e: FileNotFoundException if ignoreMissingFiles =>
             logWarning(s"Skipped missing file.", e)
             currentReader = null
+          case e @ (_ : AccessControlException | _ : BlockMissingException) =>
+            throw FileDataSourceV2.attachFilePath(file.urlEncodedPath, e)
           case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
             logWarning(
               s"Skipped the rest of the content in the corrupted file.", e)
@@ -64,6 +69,8 @@ class FilePartitionReader[T](
     val hasNext = try {
       currentReader != null && currentReader.next()
     } catch {
+      case e @ (_ : AccessControlException | _ : BlockMissingException) =>
+        throw FileDataSourceV2.attachFilePath(currentReader.file.urlEncodedPath, e)
       case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
         logWarning(log"Skipped the rest of the content in the corrupted file: " +
           log"${MDC(PARTITIONED_FILE_READER, currentReader)}", e)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 4eee731e0b2d6..863104da80c2e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex}
@@ -159,6 +160,19 @@ abstract class FileTable(
       options.asCaseSensitiveMap().asScala
     new CaseInsensitiveStringMap(finalOptions.asJava)
   }
+
+  /**
+   * Merge the options of FileTable and the LogicalWriteInfo while respecting the
+   * keys of the options carried by LogicalWriteInfo.
+   */
+  protected def mergedWriteInfo(writeInfo: LogicalWriteInfo): LogicalWriteInfo = {
+    LogicalWriteInfoImpl(
+      writeInfo.queryId(),
+      writeInfo.schema(),
+      mergedOptions(writeInfo.options()),
+      writeInfo.rowIdSchema(),
+      writeInfo.metadataSchema())
+  }
 }
 
 object FileTable {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala
index f4cabcb69d08c..77e1ade44780f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala
@@ -49,7 +49,7 @@ trait FileWrite extends Write {
 
   private val schema = info.schema()
   private val queryId = info.queryId()
-  private val options = info.options()
+  val options = info.options()
 
   override def description(): String = formatName
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
index 104d8a706efb7..894a3a10d4193 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.TableSpec
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Identifier, StagedTable, StagingTableCatalog, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.util.Utils
 
 case class ReplaceTableExec(
@@ -65,6 +66,9 @@ case class AtomicReplaceTableExec(
 
   val tableProperties = CatalogV2Util.convertTableProperties(tableSpec)
 
+  override val metrics: Map[String, SQLMetric] =
+    DataSourceV2Utils.commitMetrics(sparkContext, catalog)
+
   override protected def run(): Seq[InternalRow] = {
     if (catalog.tableExists(identifier)) {
       val table = catalog.loadTable(identifier)
@@ -92,7 +96,7 @@ case class AtomicReplaceTableExec(
 
   private def commitOrAbortStagedChanges(staged: StagedTable): Unit = {
     Utils.tryWithSafeFinallyAndFailureCallbacks({
-      staged.commitStagedChanges()
+      DataSourceV2Utils.commitStagedChanges(sparkContext, staged, metrics)
     })(catchBlock = {
       staged.abortStagedChanges()
     })
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index 37339a34af3db..4195560c5cc1c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -57,6 +57,7 @@ case class ShowCreateTableExec(
     showTableOptions(builder, tableOptions)
     showTablePartitioning(table, builder)
     showTableComment(table, builder)
+    showTableCollation(table, builder)
     showTableLocation(table, builder)
     showTableProperties(table, builder, tableOptions)
   }
@@ -155,6 +156,12 @@ case class ShowCreateTableExec(
       .foreach(builder.append)
   }
 
+  private def showTableCollation(table: Table, builder: StringBuilder): Unit = {
+    Option(table.properties.get(TableCatalog.PROP_COLLATION))
+      .map("COLLATION '" + escapeSingleQuotedString(_) + "'\n")
+      .foreach(builder.append)
+  }
+
   private def concatByMultiLines(iter: Iterable[String]): String = {
     iter.mkString("(\n  ", ",\n  ", ")\n")
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index 22c13fd98ced1..0a533645648e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -83,7 +83,7 @@ class V2SessionCatalog(catalog: SessionCatalog)
   }
 
   private def hasCustomSessionCatalog: Boolean = {
-    catalog.conf.contains(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    catalog.conf.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION) != "builtin"
   }
 
   override def loadTable(ident: Identifier): Table = {
@@ -239,7 +239,8 @@ class V2SessionCatalog(catalog: SessionCatalog)
         maybeClusterBySpec.map(
           clusterBySpec => ClusterBySpec.toProperty(newSchema, clusterBySpec, conf.resolver)),
       tracksPartitionsInCatalog = conf.manageFilesourcePartitions,
-      comment = Option(properties.get(TableCatalog.PROP_COMMENT)))
+      comment = Option(properties.get(TableCatalog.PROP_COMMENT)),
+      collation = Option(properties.get(TableCatalog.PROP_COLLATION)))
 
     try {
       catalog.createTable(tableDesc, ignoreIfExists = false)
@@ -290,6 +291,7 @@ class V2SessionCatalog(catalog: SessionCatalog)
     val schema = CatalogV2Util.applySchemaChanges(
       catalogTable.schema, changes, catalogTable.provider, "ALTER TABLE")
     val comment = properties.get(TableCatalog.PROP_COMMENT)
+    val collation = properties.get(TableCatalog.PROP_COLLATION)
     val owner = properties.getOrElse(TableCatalog.PROP_OWNER, catalogTable.owner)
     val location = properties.get(TableCatalog.PROP_LOCATION).map(CatalogUtils.stringToURI)
     val storage = if (location.isDefined) {
@@ -303,7 +305,7 @@ class V2SessionCatalog(catalog: SessionCatalog)
       catalog.alterTable(
         catalogTable.copy(
           properties = finalProperties, schema = schema, owner = owner, comment = comment,
-          storage = storage))
+          collation = collation, storage = storage))
     } catch {
       case _: NoSuchTableException =>
         throw QueryCompilationErrors.noSuchTableError(ident)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
index 319cc1c731577..17b2579ca873a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import java.util.{Optional, UUID}
 
+import scala.jdk.CollectionConverters._
+
 import org.apache.spark.sql.catalyst.expressions.PredicateHelper
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Project, ReplaceData, WriteDelta}
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -44,7 +46,8 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
     case a @ AppendData(r: DataSourceV2Relation, query, options, _, None, _) =>
-      val writeBuilder = newWriteBuilder(r.table, options, query.schema)
+      val writeOptions = mergeOptions(options, r.options.asScala.toMap)
+      val writeBuilder = newWriteBuilder(r.table, writeOptions, query.schema)
       val write = writeBuilder.build()
       val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
       a.copy(write = Some(write), query = newQuery)
@@ -61,7 +64,8 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
       }.toArray
 
       val table = r.table
-      val writeBuilder = newWriteBuilder(table, options, query.schema)
+      val writeOptions = mergeOptions(options, r.options.asScala.toMap)
+      val writeBuilder = newWriteBuilder(table, writeOptions, query.schema)
       val write = writeBuilder match {
         case builder: SupportsTruncate if isTruncate(predicates) =>
           builder.truncate().build()
@@ -76,7 +80,8 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
 
     case o @ OverwritePartitionsDynamic(r: DataSourceV2Relation, query, options, _, None) =>
       val table = r.table
-      val writeBuilder = newWriteBuilder(table, options, query.schema)
+      val writeOptions = mergeOptions(options, r.options.asScala.toMap)
+      val writeBuilder = newWriteBuilder(table, writeOptions, query.schema)
       val write = writeBuilder match {
         case builder: SupportsDynamicOverwrite =>
           builder.overwriteDynamicPartitions().build()
@@ -87,31 +92,44 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
       o.copy(write = Some(write), query = newQuery)
 
     case WriteToMicroBatchDataSource(
-        relation, table, query, queryId, writeOptions, outputMode, Some(batchId)) =>
-
+        relationOpt, table, query, queryId, options, outputMode, Some(batchId)) =>
+      val writeOptions = mergeOptions(
+        options, relationOpt.map(r => r.options.asScala.toMap).getOrElse(Map.empty))
       val writeBuilder = newWriteBuilder(table, writeOptions, query.schema, queryId)
       val write = buildWriteForMicroBatch(table, writeBuilder, outputMode)
       val microBatchWrite = new MicroBatchWrite(batchId, write.toStreaming)
       val customMetrics = write.supportedCustomMetrics.toImmutableArraySeq
-      val funCatalogOpt = relation.flatMap(_.funCatalog)
+      val funCatalogOpt = relationOpt.flatMap(_.funCatalog)
       val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, funCatalogOpt)
-      WriteToDataSourceV2(relation, microBatchWrite, newQuery, customMetrics)
+      WriteToDataSourceV2(relationOpt, microBatchWrite, newQuery, customMetrics)
 
     case rd @ ReplaceData(r: DataSourceV2Relation, _, query, _, _, None) =>
       val rowSchema = DataTypeUtils.fromAttributes(rd.dataInput)
-      val writeBuilder = newWriteBuilder(r.table, Map.empty, rowSchema)
+      val writeOptions = mergeOptions(Map.empty, r.options.asScala.toMap)
+      val writeBuilder = newWriteBuilder(r.table, writeOptions, rowSchema)
       val write = writeBuilder.build()
       val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
       // project away any metadata columns that could be used for distribution and ordering
       rd.copy(write = Some(write), query = Project(rd.dataInput, newQuery))
 
     case wd @ WriteDelta(r: DataSourceV2Relation, _, query, _, projections, None) =>
-      val deltaWriteBuilder = newDeltaWriteBuilder(r.table, Map.empty, projections)
+      val writeOptions = mergeOptions(Map.empty, r.options.asScala.toMap)
+      val deltaWriteBuilder = newDeltaWriteBuilder(r.table, writeOptions, projections)
       val deltaWrite = deltaWriteBuilder.build()
       val newQuery = DistributionAndOrderingUtils.prepareQuery(deltaWrite, query, r.funCatalog)
       wd.copy(write = Some(deltaWrite), query = newQuery)
   }
 
+  private def mergeOptions(
+      commandOptions: Map[String, String],
+      dsOptions: Map[String, String]): Map[String, String] = {
+    // for DataFrame API cases, same options are carried by both Command and DataSourceV2Relation
+    // for DataFrameV2 API cases, options are only carried by Command
+    // for SQL cases, options are only carried by DataSourceV2Relation
+    assert(commandOptions == dsOptions || commandOptions.isEmpty || dsOptions.isEmpty)
+    commandOptions ++ dsOptions
+  }
+
   private def buildWriteForMicroBatch(
       table: SupportsWrite,
       writeBuilder: WriteBuilder,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index b238b0ce9760c..308b1bceca12a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -110,6 +110,9 @@ case class AtomicCreateTableAsSelectExec(
 
   val properties = CatalogV2Util.convertTableProperties(tableSpec)
 
+  override val metrics: Map[String, SQLMetric] =
+    DataSourceV2Utils.commitMetrics(sparkContext, catalog)
+
   override protected def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
       if (ifNotExists) {
@@ -197,6 +200,9 @@ case class AtomicReplaceTableAsSelectExec(
 
   val properties = CatalogV2Util.convertTableProperties(tableSpec)
 
+  override val metrics: Map[String, SQLMetric] =
+    DataSourceV2Utils.commitMetrics(sparkContext, catalog)
+
   override protected def run(): Seq[InternalRow] = {
     val columns = getV2Columns(query.schema, catalog.useNullableQuerySchema)
     if (catalog.tableExists(ident)) {
@@ -474,7 +480,7 @@ trait WritingSparkTask[W <: DataWriter[InternalRow]] extends Logging with Serial
             log"(task ${MDC(LogKeys.TASK_ID, taskId)}, " +
             log"attempt ${MDC(LogKeys.TASK_ATTEMPT_ID, attemptId)}, " +
             log"stage ${MDC(LogKeys.STAGE_ID, stageId)}." +
-            log"${MDC(LogKeys.STAGE_ATTEMPT, stageAttempt)})")
+            log"${MDC(LogKeys.STAGE_ATTEMPT_ID, stageAttempt)})")
 
           dataWriter.commit()
         } else {
@@ -494,7 +500,8 @@ trait WritingSparkTask[W <: DataWriter[InternalRow]] extends Logging with Serial
       logInfo(log"Committed partition ${MDC(LogKeys.PARTITION_ID, partId)} " +
         log"(task ${MDC(LogKeys.TASK_ID, taskId)}, " +
         log"attempt ${MDC(LogKeys.TASK_ATTEMPT_ID, attemptId)}, " +
-        log"stage ${MDC(LogKeys.STAGE_ID, stageId)}.${MDC(LogKeys.STAGE_ATTEMPT, stageAttempt)})")
+        log"stage ${MDC(LogKeys.STAGE_ID, stageId)}." +
+        log"${MDC(LogKeys.STAGE_ATTEMPT_ID, stageAttempt)})")
 
       DataWritingSparkTaskResult(iterWithMetrics.count, msg)
 
@@ -503,12 +510,14 @@ trait WritingSparkTask[W <: DataWriter[InternalRow]] extends Logging with Serial
       logError(log"Aborting commit for partition ${MDC(LogKeys.PARTITION_ID, partId)} " +
         log"(task ${MDC(LogKeys.TASK_ID, taskId)}, " +
         log"attempt ${MDC(LogKeys.TASK_ATTEMPT_ID, attemptId)}, " +
-        log"stage ${MDC(LogKeys.STAGE_ID, stageId)}.${MDC(LogKeys.STAGE_ATTEMPT, stageAttempt)})")
+        log"stage ${MDC(LogKeys.STAGE_ID, stageId)}." +
+        log"${MDC(LogKeys.STAGE_ATTEMPT_ID, stageAttempt)})")
       dataWriter.abort()
       logError(log"Aborted commit for partition ${MDC(LogKeys.PARTITION_ID, partId)} " +
         log"(task ${MDC(LogKeys.TASK_ID, taskId)}, " +
         log"attempt ${MDC(LogKeys.TASK_ATTEMPT_ID, attemptId)}, " +
-        log"stage ${MDC(LogKeys.STAGE_ID, stageId)}.${MDC(LogKeys.STAGE_ATTEMPT, stageAttempt)})")
+        log"stage ${MDC(LogKeys.STAGE_ID, stageId)}." +
+        log"${MDC(LogKeys.STAGE_ATTEMPT_ID, stageAttempt)})")
     }, finallyBlock = {
       dataWriter.close()
     })
@@ -630,10 +639,7 @@ private[v2] trait V2CreateTableAsSelectBaseExec extends LeafV2CommandExec {
       val qe = session.sessionState.executePlan(append)
       qe.assertCommandExecuted()
 
-      table match {
-        case st: StagedTable => st.commitStagedChanges()
-        case _ =>
-      }
+      DataSourceV2Utils.commitStagedChanges(sparkContext, table, metrics)
 
       Nil
     })(catchBlock = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
index 4c201ca66cf6c..df8df37b711fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
@@ -49,10 +49,12 @@ case class CSVTable(
     CSVDataSource(parsedOptions).inferSchema(sparkSession, files, parsedOptions)
   }
 
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder {
-      override def build(): Write = CSVWrite(paths, formatName, supportsDataType, info)
+      override def build(): Write =
+        CSVWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
     }
+  }
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWrite.scala
index f38a1d385a39c..7011fea77d888 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWrite.scala
@@ -58,7 +58,7 @@ case class CSVWrite(
       }
 
       override def getFileExtension(context: TaskAttemptContext): String = {
-        ".csv" + CodecStreams.getCompressionExtension(context)
+        "." + csvOptions.extension + CodecStreams.getCompressionExtension(context)
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
index 54244c4d95e77..1c1d3393b95a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
@@ -49,10 +49,12 @@ case class JsonTable(
       sparkSession, files, parsedOptions)
   }
 
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder {
-      override def build(): Write = JsonWrite(paths, formatName, supportsDataType, info)
+      override def build(): Write =
+        JsonWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
     }
+  }
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
index 1037370967c87..81c347ae9c59c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
@@ -43,10 +43,12 @@ case class OrcTable(
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     OrcUtils.inferSchema(sparkSession, files, options.asScala.toMap)
 
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder {
-      override def build(): Write = OrcWrite(paths, formatName, supportsDataType, info)
+      override def build(): Write =
+        OrcWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
     }
+  }
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
index 8463a05569c05..28c5a62f91ecb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
@@ -43,10 +43,12 @@ case class ParquetTable(
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     ParquetUtils.inferSchema(sparkSession, options.asScala.toMap, files)
 
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder {
-      override def build(): Write = ParquetWrite(paths, formatName, supportsDataType, info)
+      override def build(): Write =
+        ParquetWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
     }
+  }
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonCustomMetric.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonCustomMetric.scala
index bca1cbed7e70b..7551cd04f20f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonCustomMetric.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonCustomMetric.scala
@@ -45,6 +45,8 @@ object PythonCustomMetric {
     // See also `UserDefinedPythonDataSource.createPythonMetrics`.
     PythonSQLMetrics.pythonSizeMetricsDesc.keys
       .map(_ -> new SQLMetric("size", -1)).toMap ++
+      PythonSQLMetrics.pythonTimingMetricsDesc.keys
+        .map(_ -> new SQLMetric("timing", -1)).toMap ++
       PythonSQLMetrics.pythonOtherMetricsDesc.keys
         .map(_ -> new SQLMetric("sum", -1)).toMap
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/UserDefinedPythonDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/UserDefinedPythonDataSource.scala
index 241d8087fc3c2..b3fd8479bda0d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/UserDefinedPythonDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/UserDefinedPythonDataSource.scala
@@ -307,7 +307,7 @@ case class PythonDataSourceReadInfo(
 /**
  * Send information to a Python process to plan a Python data source read.
  *
- * @param func an Python data source instance
+ * @param func a Python data source instance
  * @param inputSchema input schema to the data source read from its child plan
  * @param outputSchema output schema of the Python data source
  */
@@ -342,7 +342,7 @@ private class UserDefinedPythonDataSourceReadRunner(
     if (length == SpecialLengths.PYTHON_EXCEPTION_THROWN) {
       val msg = PythonWorkerUtils.readUTF(dataIn)
       throw QueryCompilationErrors.pythonDataSourceError(
-        action = "plan", tpe = "read", msg = msg)
+        action = "initialize", tpe = "reader", msg = msg)
     }
 
     // Receive the pickled 'read' function.
@@ -354,7 +354,7 @@ private class UserDefinedPythonDataSourceReadRunner(
     if (numPartitions == SpecialLengths.PYTHON_EXCEPTION_THROWN) {
       val msg = PythonWorkerUtils.readUTF(dataIn)
       throw QueryCompilationErrors.pythonDataSourceError(
-        action = "plan", tpe = "read", msg = msg)
+        action = "generate", tpe = "read partitions", msg = msg)
     }
     for (_ <- 0 until numPartitions) {
       val pickledPartition: Array[Byte] = PythonWorkerUtils.readBytes(dataIn)
@@ -420,7 +420,7 @@ private class UserDefinedPythonDataSourceWriteRunner(
     if (length == SpecialLengths.PYTHON_EXCEPTION_THROWN) {
       val msg = PythonWorkerUtils.readUTF(dataIn)
       throw QueryCompilationErrors.pythonDataSourceError(
-        action = "plan", tpe = "write", msg = msg)
+        action = "initialize", tpe = "writer", msg = msg)
     }
 
     // Receive the pickled data source write function.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala
index 2a9abfa5d6a50..4eebc19acee89 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala
@@ -53,6 +53,10 @@ class StateDataSource extends TableProvider with DataSourceRegister with Logging
 
   private lazy val serializedHadoopConf = new SerializableConfiguration(hadoopConf)
 
+  // Seq of operator names who uses state schema v3 and TWS related options.
+  // This Seq was used in checks before reading state schema files.
+  private val twsShortNameSeq = Seq("transformWithStateExec", "transformWithStateInPandasExec")
+
   override def shortName(): String = "statestore"
 
   override def getTable(
@@ -132,12 +136,11 @@ class StateDataSource extends TableProvider with DataSourceRegister with Logging
   private def runStateVarChecks(
       sourceOptions: StateSourceOptions,
       stateStoreMetadata: Array[StateMetadataTableEntry]): Unit = {
-    val twsShortName = "transformWithStateExec"
     if (sourceOptions.stateVarName.isDefined || sourceOptions.readRegisteredTimers) {
       // Perform checks for transformWithState operator in case state variable name is provided
       require(stateStoreMetadata.size == 1)
       val opMetadata = stateStoreMetadata.head
-      if (opMetadata.operatorName != twsShortName) {
+      if (!twsShortNameSeq.contains(opMetadata.operatorName)) {
         // if we are trying to query state source with state variable name, then the operator
         // should be transformWithState
         val errorMsg = "Providing state variable names is only supported with the " +
@@ -178,7 +181,7 @@ class StateDataSource extends TableProvider with DataSourceRegister with Logging
     } else {
       // if the operator is transformWithState, then a state variable argument is mandatory
       if (stateStoreMetadata.size == 1 &&
-        stateStoreMetadata.head.operatorName == twsShortName) {
+        twsShortNameSeq.contains(stateStoreMetadata.head.operatorName)) {
         throw StateDataSourceErrors.requiredOptionUnspecified("stateVarName")
       }
     }
@@ -212,7 +215,7 @@ class StateDataSource extends TableProvider with DataSourceRegister with Logging
       // Read the schema file path from operator metadata version v2 onwards
       // for the transformWithState operator
       val oldSchemaFilePath = if (storeMetadata.length > 0 && storeMetadata.head.version == 2
-        && storeMetadata.head.operatorName.contains("transformWithStateExec")) {
+        && twsShortNameSeq.exists(storeMetadata.head.operatorName.contains)) {
         val storeMetadataEntry = storeMetadata.head
         val operatorProperties = TransformWithStateOperatorProperties.fromJson(
           storeMetadataEntry.operatorPropertiesJson)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
index 87ae34532f88a..d8880b84c6211 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
@@ -39,10 +39,12 @@ case class TextTable(
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     Some(StructType(Array(StructField("value", StringType))))
 
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder {
-      override def build(): Write = TextWrite(paths, formatName, supportsDataType, info)
+      override def build(): Write =
+        TextWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
     }
+  }
 
   override def supportsDataType(dataType: DataType): Boolean = dataType == StringType
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlDataSource.scala
index 71f285e381745..8a179afb0f357 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlDataSource.scala
@@ -25,8 +25,10 @@ import scala.util.control.NonFatal
 import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.hdfs.BlockMissingException
 import org.apache.hadoop.mapreduce.Job
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
+import org.apache.hadoop.security.AccessControlException
 
 import org.apache.spark.TaskContext
 import org.apache.spark.input.{PortableDataStream, StreamInputFormat}
@@ -190,6 +192,7 @@ object MultiLineXmlDataSource extends XmlDataSource {
             Iterator.empty[String]
           case NonFatal(e) =>
             ExceptionUtils.getRootCause(e) match {
+              case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e
               case _: RuntimeException | _: IOException if parsedOptions.ignoreCorruptFiles =>
                 logWarning("Skipped the rest of the content in the corrupted file", e)
                 Iterator.empty[String]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
index 3a08b13be0134..6907061d67703 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.DYNAMIC_PRUNING_SUBQUERY
 import org.apache.spark.sql.execution.{InSubqueryExec, QueryExecution, SparkPlan, SubqueryBroadcastExec}
 import org.apache.spark.sql.execution.exchange.BroadcastExchangeExec
 import org.apache.spark.sql.execution.joins._
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This planner rule aims at rewriting dynamic pruning predicates in order to reuse the
@@ -36,6 +37,8 @@ import org.apache.spark.sql.execution.joins._
 */
 case class PlanDynamicPruningFilters(sparkSession: SparkSession) extends Rule[SparkPlan] {
 
+  override def conf: SQLConf = sparkSession.sessionState.conf
+
   /**
    * Identify the shape in which keys of a given plan are broadcasted.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index 8ec903f8e61da..de5c3aaa4fe4d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -64,7 +64,7 @@ case class EnsureRequirements(
     // Ensure that the operator's children satisfy their output distribution requirements.
     var children = originalChildren.zip(requiredChildDistributions).map {
       case (child, distribution) if child.outputPartitioning.satisfies(distribution) =>
-        child
+        ensureOrdering(child, distribution)
       case (child, BroadcastDistribution(mode)) =>
         BroadcastExchangeExec(mode, child)
       case (child, distribution) =>
@@ -290,6 +290,23 @@ case class EnsureRequirements(
     }
   }
 
+  private def ensureOrdering(plan: SparkPlan, distribution: Distribution) = {
+    (plan.outputPartitioning, distribution) match {
+      case (p @ KeyGroupedPartitioning(expressions, _, partitionValues, _),
+        d @ OrderedDistribution(ordering)) if p.satisfies(d) =>
+        val attrs = expressions.flatMap(_.collectLeaves()).map(_.asInstanceOf[Attribute])
+        val partitionOrdering: Ordering[InternalRow] = {
+          RowOrdering.create(ordering, attrs)
+        }
+        // Sort 'commonPartitionValues' and use this mechanism to ensure BatchScan's
+        // output partitions are ordered
+        val sorted = partitionValues.sorted(partitionOrdering)
+        populateCommonPartitionInfo(plan, sorted.map((_, 1)),
+          None, None, applyPartialClustering = false, replicatePartitions = false)
+      case _ => plan
+      }
+  }
+
   /**
    * Recursively reorders the join keys based on partitioning. It starts reordering the
    * join keys to match HashPartitioning on either side, followed by PartitioningCollection.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStatePythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStatePythonRunner.scala
index ae982f2f87f2e..d704638b85e8a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStatePythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStatePythonRunner.scala
@@ -65,7 +65,8 @@ class ApplyInPandasWithStatePythonRunner(
     stateValueSchema: StructType,
     override val pythonMetrics: Map[String, SQLMetric],
     jobArtifactUUID: Option[String])
-  extends BasePythonRunner[InType, OutType](funcs.map(_._1), evalType, argOffsets, jobArtifactUUID)
+  extends BasePythonRunner[InType, OutType](
+    funcs.map(_._1), evalType, argOffsets, jobArtifactUUID, pythonMetrics)
   with PythonArrowInput[InType]
   with PythonArrowOutput[OutType] {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
index a555d660ea1ac..579b496046852 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
@@ -38,7 +38,7 @@ abstract class BaseArrowPythonRunner(
     override val pythonMetrics: Map[String, SQLMetric],
     jobArtifactUUID: Option[String])
   extends BasePythonRunner[Iterator[InternalRow], ColumnarBatch](
-    funcs.map(_._1), evalType, argOffsets, jobArtifactUUID)
+    funcs.map(_._1), evalType, argOffsets, jobArtifactUUID, pythonMetrics)
   with BasicPythonArrowInput
   with BasicPythonArrowOutput {
 
@@ -116,6 +116,9 @@ object ArrowPythonRunner {
       conf.pandasGroupedMapAssignColumnsByName.toString)
     val arrowSafeTypeCheck = Seq(SQLConf.PANDAS_ARROW_SAFE_TYPE_CONVERSION.key ->
       conf.arrowSafeTypeConversion.toString)
-    Map(timeZoneConf ++ pandasColsByName ++ arrowSafeTypeCheck: _*)
+    val arrowAyncParallelism = conf.pythonUDFArrowConcurrencyLevel.map(v =>
+      Seq(SQLConf.PYTHON_UDF_ARROW_CONCURRENCY_LEVEL.key -> v.toString)
+    ).getOrElse(Seq.empty)
+    Map(timeZoneConf ++ pandasColsByName ++ arrowSafeTypeCheck ++ arrowAyncParallelism: _*)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonUDTFRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonUDTFRunner.scala
index f52b01b6646ac..99a9e706c6620 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonUDTFRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonUDTFRunner.scala
@@ -43,7 +43,7 @@ class ArrowPythonUDTFRunner(
     jobArtifactUUID: Option[String])
   extends BasePythonRunner[Iterator[InternalRow], ColumnarBatch](
       Seq(ChainedPythonFunctions(Seq(udtf.func))), evalType, Array(argMetas.map(_.offset)),
-      jobArtifactUUID)
+      jobArtifactUUID, pythonMetrics)
   with BasicPythonArrowInput
   with BasicPythonArrowOutput {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
index e6958392cad48..28318a319b088 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.python.EvalPythonExec.ArgumentMetadata
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{StructField, StructType}
 
 /**
@@ -39,10 +40,12 @@ case class BatchEvalPythonExec(udfs: Seq[PythonUDF], resultAttrs: Seq[Attribute]
   private[this] val jobArtifactUUID = JobArtifactSet.getCurrentJobArtifactState.map(_.uuid)
 
   override protected def evaluatorFactory: EvalPythonEvaluatorFactory = {
+    val batchSize = conf.getConf(SQLConf.PYTHON_UDF_MAX_RECORDS_PER_BATCH)
     new BatchEvalPythonEvaluatorFactory(
       child.output,
       udfs,
       output,
+      batchSize,
       pythonMetrics,
       jobArtifactUUID,
       conf.pythonUDFProfiler)
@@ -56,6 +59,7 @@ class BatchEvalPythonEvaluatorFactory(
     childOutput: Seq[Attribute],
     udfs: Seq[PythonUDF],
     output: Seq[Attribute],
+    batchSize: Int,
     pythonMetrics: Map[String, SQLMetric],
     jobArtifactUUID: Option[String],
     profiler: Option[String])
@@ -70,7 +74,7 @@ class BatchEvalPythonEvaluatorFactory(
     EvaluatePython.registerPicklers() // register pickler for Row
 
     // Input iterator to Python.
-    val inputIterator = BatchEvalPythonExec.getInputIterator(iter, schema)
+    val inputIterator = BatchEvalPythonExec.getInputIterator(iter, schema, batchSize)
 
     // Output iterator for results from Python.
     val outputIterator =
@@ -107,7 +111,8 @@ class BatchEvalPythonEvaluatorFactory(
 object BatchEvalPythonExec {
   def getInputIterator(
       iter: Iterator[InternalRow],
-      schema: StructType): Iterator[Array[Byte]] = {
+      schema: StructType,
+      batchSize: Int): Iterator[Array[Byte]] = {
     val dataTypes = schema.map(_.dataType)
     val needConversion = dataTypes.exists(EvaluatePython.needConversionInPython)
 
@@ -140,6 +145,6 @@ object BatchEvalPythonExec {
         }
         fields
       }
-    }.grouped(100).map(x => pickle.dumps(x.toArray))
+    }.grouped(batchSize).map(x => pickle.dumps(x.toArray))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonUDTFExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonUDTFExec.scala
index 9eebd4ea7e79c..c0dcb77817420 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonUDTFExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonUDTFExec.scala
@@ -23,8 +23,9 @@ import scala.jdk.CollectionConverters._
 
 import net.razorvine.pickle.Unpickler
 
-import org.apache.spark.{JobArtifactSet, TaskContext}
+import org.apache.spark.{JobArtifactSet, SparkEnv, TaskContext}
 import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType, PythonWorkerUtils}
+import org.apache.spark.internal.config.BUFFER_SIZE
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.GenericArrayData
@@ -63,7 +64,8 @@ case class BatchEvalPythonUDTFExec(
     EvaluatePython.registerPicklers()  // register pickler for Row
 
     // Input iterator to Python.
-    val inputIterator = BatchEvalPythonExec.getInputIterator(iter, schema)
+    // For Python UDTF, we don't have a separate configuration for the batch size yet.
+    val inputIterator = BatchEvalPythonExec.getInputIterator(iter, schema, 100)
 
     // Output iterator for results from Python.
     val outputIterator =
@@ -101,6 +103,9 @@ class PythonUDTFRunner(
     Seq((ChainedPythonFunctions(Seq(udtf.func)), udtf.resultId.id)),
     PythonEvalType.SQL_TABLE_UDF, Array(argMetas.map(_.offset)), pythonMetrics, jobArtifactUUID) {
 
+  // Overriding here to NOT use the same value of UDF config in UDTF.
+  override val bufferSize: Int = SparkEnv.get.conf.get(BUFFER_SIZE)
+
   override protected def writeUDF(dataOut: DataOutputStream): Unit = {
     PythonUDTFRunner.writeUDTF(dataOut, udtf, argMetas)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
index 5670cad67e7b0..c5e86d010938d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
@@ -51,7 +51,7 @@ class CoGroupedArrowPythonRunner(
     profiler: Option[String])
   extends BasePythonRunner[
     (Iterator[InternalRow], Iterator[InternalRow]), ColumnarBatch](
-    funcs.map(_._1), evalType, argOffsets, jobArtifactUUID)
+    funcs.map(_._1), evalType, argOffsets, jobArtifactUUID, pythonMetrics)
   with BasicPythonArrowOutput {
 
   override val pythonExec: String =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonSQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonSQLMetrics.scala
index 4df6d821c014f..bd22739613eef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonSQLMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonSQLMetrics.scala
@@ -24,6 +24,8 @@ trait PythonSQLMetrics { self: SparkPlan =>
   protected val pythonMetrics: Map[String, SQLMetric] = {
     PythonSQLMetrics.pythonSizeMetricsDesc.map { case (k, v) =>
       k -> SQLMetrics.createSizeMetric(sparkContext, v)
+    } ++ PythonSQLMetrics.pythonTimingMetricsDesc.map { case (k, v) =>
+      k -> SQLMetrics.createTimingMetric(sparkContext, v)
     } ++ PythonSQLMetrics.pythonOtherMetricsDesc.map { case (k, v) =>
       k -> SQLMetrics.createMetric(sparkContext, v)
     }
@@ -40,6 +42,14 @@ object PythonSQLMetrics {
     )
   }
 
+  val pythonTimingMetricsDesc: Map[String, String] = {
+    Map(
+      "pythonBootTime" -> "total time to start Python workers",
+      "pythonInitTime" -> "total time to initialize Python workers",
+      "pythonTotalTime" -> "total time to run Python workers"
+    )
+  }
+
   val pythonOtherMetricsDesc: Map[String, String] = {
     Map("pythonNumRowsReceived" -> "number of output rows")
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
index 87ff5a0ec4333..167e1fd8b0f01 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
@@ -36,7 +36,7 @@ abstract class BasePythonUDFRunner(
     pythonMetrics: Map[String, SQLMetric],
     jobArtifactUUID: Option[String])
   extends BasePythonRunner[Array[Byte], Array[Byte]](
-    funcs.map(_._1), evalType, argOffsets, jobArtifactUUID) {
+    funcs.map(_._1), evalType, argOffsets, jobArtifactUUID, pythonMetrics) {
 
   override val pythonExec: String =
     SQLConf.get.pysparkWorkerPythonExecutable.getOrElse(
@@ -46,6 +46,8 @@ abstract class BasePythonUDFRunner(
 
   override val faultHandlerEnabled: Boolean = SQLConf.get.pythonUDFWorkerFaulthandlerEnabled
 
+  override val bufferSize: Int = SQLConf.get.getConf(SQLConf.PYTHON_UDF_BUFFER_SIZE)
+
   protected def writeUDF(dataOut: DataOutputStream): Unit
 
   protected override def newWriter(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasExec.scala
index 7dd4d4647eeba..9b51822679a91 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasExec.scala
@@ -16,27 +16,32 @@
  */
 package org.apache.spark.sql.execution.python
 
+import java.util.UUID
+
 import scala.concurrent.duration.NANOSECONDS
 
 import org.apache.hadoop.conf.Configuration
 
-import org.apache.spark.JobArtifactSet
+import org.apache.spark.{JobArtifactSet, SparkException}
 import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType}
+import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Expression, PythonUDF, SortOrder}
+import org.apache.spark.sql.catalyst.plans.logical.ProcessingTime
 import org.apache.spark.sql.catalyst.plans.physical.Distribution
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.execution.{BinaryExecNode, CoGroupedIterator, SparkPlan}
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.python.PandasGroupUtils.{executePython, groupAndProject, resolveArgOffsets}
-import org.apache.spark.sql.execution.streaming.{StatefulOperatorCustomMetric, StatefulOperatorCustomSumMetric, StatefulOperatorPartitioning, StatefulOperatorStateInfo, StatefulProcessorHandleImpl, StateStoreWriter, WatermarkSupport}
+import org.apache.spark.sql.execution.streaming.{DriverStatefulProcessorHandleImpl, StatefulOperatorCustomMetric, StatefulOperatorCustomSumMetric, StatefulOperatorPartitioning, StatefulOperatorStateInfo, StatefulProcessorHandleImpl, StateStoreWriter, TransformWithStateMetadataUtils, TransformWithStateVariableInfo, WatermarkSupport}
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper.StateStoreAwareZipPartitionsHelper
-import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, StateSchemaValidationResult, StateStore, StateStoreConf, StateStoreId, StateStoreOps, StateStoreProviderId}
+import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, OperatorStateMetadata, RocksDBStateStoreProvider, StateSchemaValidationResult, StateStore, StateStoreColFamilySchema, StateStoreConf, StateStoreId, StateStoreOps, StateStoreProvider, StateStoreProviderId}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.{OutputMode, TimeMode}
 import org.apache.spark.sql.types.{BinaryType, StructField, StructType}
-import org.apache.spark.util.{CompletionIterator, SerializableConfiguration}
+import org.apache.spark.util.{CompletionIterator, SerializableConfiguration, Utils}
 
 /**
  * Physical operator for executing
@@ -52,8 +57,11 @@ import org.apache.spark.util.{CompletionIterator, SerializableConfiguration}
  * @param eventTimeWatermarkForLateEvents event time watermark for filtering late events
  * @param eventTimeWatermarkForEviction event time watermark for state eviction
  * @param child the physical plan for the underlying data
+ * @param isStreaming defines whether the query is streaming or batch
+ * @param hasInitialState defines whether the query has initial state
  * @param initialState the physical plan for the input initial state
  * @param initialStateGroupingAttrs grouping attributes for initial state
+ * @param initialStateSchema schema for initial state
  */
 case class TransformWithStateInPandasExec(
     functionExpr: Expression,
@@ -66,12 +74,17 @@ case class TransformWithStateInPandasExec(
     eventTimeWatermarkForLateEvents: Option[Long],
     eventTimeWatermarkForEviction: Option[Long],
     child: SparkPlan,
+    isStreaming: Boolean = true,
     hasInitialState: Boolean,
     initialState: SparkPlan,
     initialStateGroupingAttrs: Seq[Attribute],
     initialStateSchema: StructType)
-  extends BinaryExecNode with StateStoreWriter with WatermarkSupport {
+  extends BinaryExecNode
+  with StateStoreWriter
+  with WatermarkSupport
+  with TransformWithStateMetadataUtils {
 
+  override def shortName: String = "transformWithStateInPandasExec"
   private val pythonUDF = functionExpr.asInstanceOf[PythonUDF]
   private val pythonFunction = pythonUDF.func
   private val chainedFunc =
@@ -80,6 +93,7 @@ case class TransformWithStateInPandasExec(
   private val sessionLocalTimeZone = conf.sessionLocalTimeZone
   private val pythonRunnerConf = ArrowPythonRunner.getPythonRunnerConfMap(conf)
   private[this] val jobArtifactUUID = JobArtifactSet.getCurrentJobArtifactState.map(_.uuid)
+  private val (dedupAttributes, argOffsets) = resolveArgOffsets(child.output, groupingAttributes)
 
   private val groupingKeyStructFields = groupingAttributes
     .map(a => StructField(a.name, a.dataType, a.nullable))
@@ -98,6 +112,22 @@ case class TransformWithStateInPandasExec(
   // Each state variable has its own schema, this is a dummy one.
   protected val schemaForValueRow: StructType = new StructType().add("value", BinaryType)
 
+  override def operatorStateMetadataVersion: Int = 2
+
+  override def getColFamilySchemas(): Map[String, StateStoreColFamilySchema] = {
+    driverProcessorHandle.getColumnFamilySchemas
+  }
+
+  override def getStateVariableInfos(): Map[String, TransformWithStateVariableInfo] = {
+    driverProcessorHandle.getStateVariableInfos
+  }
+
+  /** Metadata of this stateful operator and its states stores.
+   * Written during IncrementalExecution. `validateAndMaybeEvolveStateSchema` will initialize
+   * `columnFamilySchemas` and `stateVariableInfos` during `init()` call on driver. */
+  private val driverProcessorHandle: DriverStatefulProcessorHandleImpl =
+    new DriverStatefulProcessorHandleImpl(timeMode, groupingKeyExprEncoder)
+
   /**
    * Distribute by grouping attributes - We need the underlying data and the initial state data
    * to have the same grouping so that the data are co-located on the same task.
@@ -118,12 +148,74 @@ case class TransformWithStateInPandasExec(
     groupingAttributes.map(SortOrder(_, Ascending)),
     initialStateGroupingAttrs.map(SortOrder(_, Ascending)))
 
+  override def operatorStateMetadata(
+      stateSchemaPaths: List[String]): OperatorStateMetadata = {
+    getOperatorStateMetadata(stateSchemaPaths, getStateInfo, shortName, timeMode, outputMode)
+  }
+
+  override def validateNewMetadata(
+      oldOperatorMetadata: OperatorStateMetadata,
+      newOperatorMetadata: OperatorStateMetadata): Unit = {
+    validateNewMetadataForTWS(oldOperatorMetadata, newOperatorMetadata)
+  }
+
   override def validateAndMaybeEvolveStateSchema(
       hadoopConf: Configuration,
       batchId: Long,
       stateSchemaVersion: Int): List[StateSchemaValidationResult] = {
-    // TODO(SPARK-49212): Implement schema evolution support
-    List.empty
+    // Start a python runner on driver, and execute pre-init UDF on the runner
+    val runner = new TransformWithStateInPandasPythonPreInitRunner(
+      pythonFunction,
+      "pyspark.sql.streaming.transform_with_state_driver_worker",
+      sessionLocalTimeZone,
+      groupingKeySchema,
+      driverProcessorHandle
+    )
+    // runner initialization
+    runner.init()
+    try {
+      // execute UDF on the python runner
+      runner.process()
+    } catch {
+      case e: Throwable =>
+        throw new SparkException("TransformWithStateInPandas driver worker " +
+          "exited unexpectedly (crashed)", e)
+    }
+    runner.stop()
+
+    validateAndWriteStateSchema(hadoopConf, batchId, stateSchemaVersion, getStateInfo,
+      session, operatorStateMetadataVersion)
+  }
+
+  override def shouldRunAnotherBatch(newInputWatermark: Long): Boolean = {
+    if (timeMode == ProcessingTime) {
+      // TODO SPARK-50180: check if we can return true only if actual timers are registered,
+      //  or there is expired state
+      true
+    } else if (outputMode == OutputMode.Append || outputMode == OutputMode.Update) {
+      eventTimeWatermarkForEviction.isDefined &&
+        newInputWatermark > eventTimeWatermarkForEviction.get
+    } else {
+      false
+    }
+  }
+
+  /**
+   * Controls watermark propagation to downstream modes. If timeMode is
+   * ProcessingTime, the output rows cannot be interpreted in eventTime, hence
+   * this node will not propagate watermark in this timeMode.
+   *
+   * For timeMode EventTime, output watermark is same as input Watermark because
+   * transformWithState does not allow users to set the event time column to be
+   * earlier than the watermark.
+   */
+  override def produceOutputWatermark(inputWatermarkMs: Long): Option[Long] = {
+    timeMode match {
+      case ProcessingTime =>
+        None
+      case _ =>
+        Some(inputWatermarkMs)
+    }
   }
 
   override def customStatefulOperatorMetrics: Seq[StatefulOperatorCustomMetric] = {
@@ -156,18 +248,32 @@ case class TransformWithStateInPandasExec(
     metrics
 
     if (!hasInitialState) {
-      child.execute().mapPartitionsWithStateStore[InternalRow](
-        getStateInfo,
-        schemaForKeyRow,
-        schemaForValueRow,
-        NoPrefixKeyStateEncoderSpec(schemaForKeyRow),
-        session.sqlContext.sessionState,
-        Some(session.sqlContext.streams.stateStoreCoordinator),
-        useColumnFamilies = true,
-        useMultipleValuesPerKey = true
-      ) {
-        case (store: StateStore, dataIterator: Iterator[InternalRow]) =>
-          processDataWithPartition(store, dataIterator)
+      if (isStreaming) {
+        child.execute().mapPartitionsWithStateStore[InternalRow](
+          getStateInfo,
+          schemaForKeyRow,
+          schemaForValueRow,
+          NoPrefixKeyStateEncoderSpec(schemaForKeyRow),
+          session.sqlContext.sessionState,
+          Some(session.sqlContext.streams.stateStoreCoordinator),
+          useColumnFamilies = true,
+          useMultipleValuesPerKey = true
+        ) {
+          case (store: StateStore, dataIterator: Iterator[InternalRow]) =>
+            processDataWithPartition(store, dataIterator)
+        }
+      } else {
+        // If the query is running in batch mode, we need to create a new StateStore and instantiate
+        // a temp directory on the executors in mapPartitionsWithIndex.
+        val hadoopConfBroadcast = sparkContext.broadcast(
+          new SerializableConfiguration(session.sessionState.newHadoopConf()))
+        child.execute().mapPartitionsWithIndex[InternalRow](
+          (partitionId: Int, dataIterator: Iterator[InternalRow]) => {
+            initNewStateStoreAndProcessData(partitionId, hadoopConfBroadcast) { store =>
+              processDataWithPartition(store, dataIterator)
+            }
+          }
+        )
       }
     } else {
       val storeConf = new StateStoreConf(session.sqlContext.sessionState.conf)
@@ -182,25 +288,71 @@ case class TransformWithStateInPandasExec(
         // The state store aware zip partitions will provide us with two iterators,
         // child data iterator and the initial state iterator per partition.
         case (partitionId, childDataIterator, initStateIterator) =>
-          val stateStoreId = StateStoreId(stateInfo.get.checkpointLocation,
-            stateInfo.get.operatorId, partitionId)
-          val storeProviderId = StateStoreProviderId(stateStoreId, stateInfo.get.queryRunId)
-          val store = StateStore.get(
-            storeProviderId = storeProviderId,
-            keySchema = schemaForKeyRow,
-            valueSchema = schemaForValueRow,
-            NoPrefixKeyStateEncoderSpec(schemaForKeyRow),
-            version = stateInfo.get.storeVersion,
-            stateStoreCkptId = stateInfo.get.getStateStoreCkptId(partitionId).map(_.head),
-            useColumnFamilies = true,
-            storeConf = storeConf,
-            hadoopConf = hadoopConfBroadcast.value.value
-          )
-          processDataWithPartition(store, childDataIterator, initStateIterator)
+          if (isStreaming) {
+            val stateStoreId = StateStoreId(stateInfo.get.checkpointLocation,
+              stateInfo.get.operatorId, partitionId)
+            val storeProviderId = StateStoreProviderId(stateStoreId, stateInfo.get.queryRunId)
+            val store = StateStore.get(
+              storeProviderId = storeProviderId,
+              keySchema = schemaForKeyRow,
+              valueSchema = schemaForValueRow,
+              NoPrefixKeyStateEncoderSpec(schemaForKeyRow),
+              version = stateInfo.get.storeVersion,
+              stateStoreCkptId = stateInfo.get.getStateStoreCkptId(partitionId).map(_.head),
+              useColumnFamilies = true,
+              storeConf = storeConf,
+              hadoopConf = hadoopConfBroadcast.value.value
+            )
+            processDataWithPartition(store, childDataIterator, initStateIterator)
+          } else {
+            initNewStateStoreAndProcessData(partitionId, hadoopConfBroadcast) { store =>
+              processDataWithPartition(store, childDataIterator, initStateIterator)
+            }
+          }
       }
     }
   }
 
+  /**
+   * Create a new StateStore for given partitionId and instantiate a temp directory
+   * on the executors. Process data and close the stateStore provider afterwards.
+   */
+  private def initNewStateStoreAndProcessData(
+      partitionId: Int,
+      hadoopConfBroadcast: Broadcast[SerializableConfiguration])
+    (f: StateStore => Iterator[InternalRow]): Iterator[InternalRow] = {
+
+    val providerId = {
+      val tempDirPath = Utils.createTempDir().getAbsolutePath
+      new StateStoreProviderId(
+        StateStoreId(tempDirPath, 0, partitionId), getStateInfo.queryRunId)
+    }
+
+    val sqlConf = new SQLConf()
+    sqlConf.setConfString(SQLConf.STATE_STORE_PROVIDER_CLASS.key,
+      classOf[RocksDBStateStoreProvider].getName)
+    val storeConf = new StateStoreConf(sqlConf)
+
+    // Create StateStoreProvider for this partition
+    val stateStoreProvider = StateStoreProvider.createAndInit(
+      providerId,
+      schemaForKeyRow,
+      schemaForValueRow,
+      NoPrefixKeyStateEncoderSpec(schemaForKeyRow),
+      useColumnFamilies = true,
+      storeConf = storeConf,
+      hadoopConf = hadoopConfBroadcast.value.value,
+      useMultipleValuesPerKey = true)
+
+    val store = stateStoreProvider.getStore(0, None)
+    val outputIterator = f(store)
+    CompletionIterator[InternalRow, Iterator[InternalRow]](outputIterator.iterator, {
+      stateStoreProvider.close()
+    }).map { row =>
+      row
+    }
+  }
+
   private def processDataWithPartition(
       store: StateStore,
       dataIterator: Iterator[InternalRow],
@@ -213,12 +365,18 @@ case class TransformWithStateInPandasExec(
     val currentTimeNs = System.nanoTime
     val updatesStartTimeNs = currentTimeNs
 
-    val (dedupAttributes, argOffsets) = resolveArgOffsets(child.output, groupingAttributes)
-    val data =
-      groupAndProject(dataIterator, groupingAttributes, child.output, dedupAttributes)
+    // If timeout is based on event time, then filter late data based on watermark
+    val filteredIter = watermarkPredicateForDataForLateEvents match {
+      case Some(predicate) =>
+        applyRemovingRowsOlderThanWatermark(dataIterator, predicate)
+      case _ =>
+        dataIterator
+    }
+
+    val data = groupAndProject(filteredIter, groupingAttributes, child.output, dedupAttributes)
 
     val processorHandle = new StatefulProcessorHandleImpl(store, getStateInfo.queryRunId,
-      groupingKeyExprEncoder, timeMode, isStreaming = true, batchTimestampMs, metrics)
+      groupingKeyExprEncoder, timeMode, isStreaming, batchTimestampMs, metrics)
 
     val outputIterator = if (!hasInitialState) {
       val runner = new TransformWithStateInPandasPythonRunner(
@@ -270,8 +428,12 @@ case class TransformWithStateInPandasExec(
       // by the upstream (consumer) operators in addition to the processing in this operator.
       allUpdatesTimeMs += NANOSECONDS.toMillis(System.nanoTime - updatesStartTimeNs)
       commitTimeMs += timeTakenMs {
-        processorHandle.doTtlCleanup()
-        store.commit()
+        if (isStreaming) {
+          processorHandle.doTtlCleanup()
+          store.commit()
+        } else {
+          store.abort()
+        }
       }
       setStoreMetrics(store)
       setOperatorMetrics()
@@ -293,3 +455,48 @@ case class TransformWithStateInPandasExec(
 
   override def right: SparkPlan = initialState
 }
+
+// scalastyle:off argcount
+object TransformWithStateInPandasExec {
+
+  // Plan logical transformWithStateInPandas for batch queries
+  def generateSparkPlanForBatchQueries(
+      functionExpr: Expression,
+      groupingAttributes: Seq[Attribute],
+      output: Seq[Attribute],
+      outputMode: OutputMode,
+      timeMode: TimeMode,
+      child: SparkPlan,
+      hasInitialState: Boolean = false,
+      initialState: SparkPlan,
+      initialStateGroupingAttrs: Seq[Attribute],
+      initialStateSchema: StructType): SparkPlan = {
+    val shufflePartitions = child.session.sessionState.conf.numShufflePartitions
+    val statefulOperatorStateInfo = StatefulOperatorStateInfo(
+      checkpointLocation = "", // empty checkpointLocation will be populated in doExecute
+      queryRunId = UUID.randomUUID(),
+      operatorId = 0,
+      storeVersion = 0,
+      numPartitions = shufflePartitions,
+      stateStoreCkptIds = None
+    )
+
+    new TransformWithStateInPandasExec(
+      functionExpr,
+      groupingAttributes,
+      output,
+      outputMode,
+      timeMode,
+      Some(statefulOperatorStateInfo),
+      Some(System.currentTimeMillis),
+      None,
+      None,
+      child,
+      isStreaming = false,
+      hasInitialState,
+      initialState,
+      initialStateGroupingAttrs,
+      initialStateSchema)
+  }
+}
+// scalastyle:on argcount
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasPythonRunner.scala
index c5980012124fe..f415ae2543d34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasPythonRunner.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.python
 
-import java.io.DataOutputStream
+import java.io.{DataInputStream, DataOutputStream}
 import java.net.ServerSocket
 
 import scala.concurrent.ExecutionContext
@@ -25,13 +25,13 @@ import scala.concurrent.ExecutionContext
 import org.apache.arrow.vector.VectorSchemaRoot
 import org.apache.arrow.vector.ipc.ArrowStreamWriter
 
-import org.apache.spark.TaskContext
-import org.apache.spark.api.python.{BasePythonRunner, ChainedPythonFunctions, PythonRDD}
+import org.apache.spark.{SparkException, TaskContext}
+import org.apache.spark.api.python.{BasePythonRunner, ChainedPythonFunctions, PythonFunction, PythonRDD, PythonWorkerUtils, StreamingPythonRunner}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.python.TransformWithStateInPandasPythonRunner.{GroupedInType, InType}
-import org.apache.spark.sql.execution.streaming.StatefulProcessorHandleImpl
+import org.apache.spark.sql.execution.streaming.{DriverStatefulProcessorHandleImpl, StatefulProcessorHandleImpl}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -173,16 +173,16 @@ abstract class TransformWithStateInPandasPythonBaseRunner[I](
     groupingKeySchema: StructType,
     batchTimestampMs: Option[Long],
     eventTimeWatermarkForEviction: Option[Long])
-  extends BasePythonRunner[I, ColumnarBatch](funcs.map(_._1), evalType, argOffsets, jobArtifactUUID)
+  extends BasePythonRunner[I, ColumnarBatch](
+    funcs.map(_._1), evalType, argOffsets, jobArtifactUUID, pythonMetrics)
   with PythonArrowInput[I]
   with BasicPythonArrowOutput
+  with TransformWithStateInPandasPythonRunnerUtils
   with Logging {
 
   protected val sqlConf = SQLConf.get
   protected val arrowMaxRecordsPerBatch = sqlConf.arrowMaxRecordsPerBatch
 
-  private var stateServerSocketPort: Int = 0
-
   override protected val workerConf: Map[String, String] = initialWorkerConf +
     (SQLConf.ARROW_EXECUTION_MAX_RECORDS_PER_BATCH.key -> arrowMaxRecordsPerBatch.toString)
 
@@ -204,21 +204,7 @@ abstract class TransformWithStateInPandasPythonBaseRunner[I](
       inputIterator: Iterator[I],
       partitionIndex: Int,
       context: TaskContext): Iterator[ColumnarBatch] = {
-    var stateServerSocket: ServerSocket = null
-    var failed = false
-    try {
-      stateServerSocket = new ServerSocket( /* port = */ 0,
-        /* backlog = */ 1)
-      stateServerSocketPort = stateServerSocket.getLocalPort
-    } catch {
-      case e: Throwable =>
-        failed = true
-        throw e
-    } finally {
-      if (failed) {
-        closeServerSocketChannelSilently(stateServerSocket)
-      }
-    }
+    initStateServer()
 
     val executor = ThreadUtils.newDaemonSingleThreadExecutor("stateConnectionListenerThread")
     val executionContext = ExecutionContext.fromExecutor(executor)
@@ -238,7 +224,108 @@ abstract class TransformWithStateInPandasPythonBaseRunner[I](
     super.compute(inputIterator, partitionIndex, context)
   }
 
-  private def closeServerSocketChannelSilently(stateServerSocket: ServerSocket): Unit = {
+  override protected def writeUDF(dataOut: DataOutputStream): Unit = {
+    PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets, None)
+  }
+}
+
+/**
+ * TransformWithStateInPandas driver side Python runner. Similar as executor side runner,
+ * will start a new daemon thread on the Python runner to run state server.
+ */
+class TransformWithStateInPandasPythonPreInitRunner(
+    func: PythonFunction,
+    workerModule: String,
+    timeZoneId: String,
+    groupingKeySchema: StructType,
+    processorHandleImpl: DriverStatefulProcessorHandleImpl)
+  extends StreamingPythonRunner(func, "", "", workerModule)
+  with TransformWithStateInPandasPythonRunnerUtils
+  with Logging {
+  protected val sqlConf = SQLConf.get
+
+  private var dataOut: DataOutputStream = _
+  private var dataIn: DataInputStream = _
+
+  private var daemonThread: Thread = _
+
+  override def init(): (DataOutputStream, DataInputStream) = {
+    val result = super.init()
+    dataOut = result._1
+    dataIn = result._2
+
+    // start state server, update socket port
+    startStateServer()
+    (dataOut, dataIn)
+  }
+
+  def process(): Unit = {
+    // Also write the port number for state server
+    dataOut.writeInt(stateServerSocketPort)
+    PythonWorkerUtils.writeUTF(groupingKeySchema.json, dataOut)
+    dataOut.flush()
+
+    val resFromPython = dataIn.readInt()
+    if (resFromPython != 0) {
+      val errMessage = PythonWorkerUtils.readUTF(dataIn)
+      throw streamingPythonRunnerInitializationFailure(resFromPython, errMessage)
+    }
+  }
+
+  override def stop(): Unit = {
+    super.stop()
+    closeServerSocketChannelSilently(stateServerSocket)
+    daemonThread.interrupt()
+  }
+
+  private def startStateServer(): Unit = {
+    initStateServer()
+
+    daemonThread = new Thread {
+      override def run(): Unit = {
+        try {
+          new TransformWithStateInPandasStateServer(stateServerSocket, processorHandleImpl,
+            groupingKeySchema, timeZoneId, errorOnDuplicatedFieldNames = true,
+            largeVarTypes = sqlConf.arrowUseLargeVarTypes,
+            sqlConf.arrowTransformWithStateInPandasMaxRecordsPerBatch).run()
+        } catch {
+          case e: Exception =>
+            throw new SparkException("TransformWithStateInPandas state server " +
+              "daemon thread exited unexpectedly (crashed)", e)
+        }
+      }
+    }
+    daemonThread.setDaemon(true)
+    daemonThread.setName("stateConnectionListenerThread")
+    daemonThread.start()
+  }
+}
+
+/**
+ * TransformWithStateInPandas Python runner utils functions for handling a state server
+ * in a new daemon thread.
+ */
+trait TransformWithStateInPandasPythonRunnerUtils extends Logging {
+  protected var stateServerSocketPort: Int = 0
+  protected var stateServerSocket: ServerSocket = null
+  protected def initStateServer(): Unit = {
+    var failed = false
+    try {
+      stateServerSocket = new ServerSocket(/* port = */ 0,
+        /* backlog = */ 1)
+      stateServerSocketPort = stateServerSocket.getLocalPort
+    } catch {
+      case e: Throwable =>
+        failed = true
+        throw e
+    } finally {
+      if (failed) {
+        closeServerSocketChannelSilently(stateServerSocket)
+      }
+    }
+  }
+
+  protected def closeServerSocketChannelSilently(stateServerSocket: ServerSocket): Unit = {
     try {
       logInfo(log"closing the state server socket")
       stateServerSocket.close()
@@ -247,10 +334,6 @@ abstract class TransformWithStateInPandasPythonBaseRunner[I](
         logError(log"failed to close state server socket", e)
     }
   }
-
-  override protected def writeUDF(dataOut: DataOutputStream): Unit = {
-    PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets, None)
-  }
 }
 
 object TransformWithStateInPandasPythonRunner {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServer.scala
index 0373c8607ff2c..e37e4266b46b8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServer.scala
@@ -33,8 +33,9 @@ import org.apache.spark.sql.api.python.PythonSQLUtils
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
-import org.apache.spark.sql.execution.streaming.{ImplicitGroupingKeyTracker, StatefulProcessorHandleImpl, StatefulProcessorHandleState, StateVariableType}
-import org.apache.spark.sql.execution.streaming.state.StateMessage.{HandleState, ImplicitGroupingKeyRequest, ListStateCall, MapStateCall, StatefulProcessorCall, StateRequest, StateResponse, StateResponseWithLongTypeVal, StateVariableRequest, TimerRequest, TimerStateCallCommand, TimerValueRequest, ValueStateCall}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.execution.streaming.{ImplicitGroupingKeyTracker, StatefulProcessorHandleImpl, StatefulProcessorHandleImplBase, StatefulProcessorHandleState, StateVariableType}
+import org.apache.spark.sql.execution.streaming.state.StateMessage.{HandleState, ImplicitGroupingKeyRequest, ListStateCall, MapStateCall, StatefulProcessorCall, StateRequest, StateResponse, StateResponseWithLongTypeVal, StateResponseWithStringTypeVal, StateVariableRequest, TimerRequest, TimerStateCallCommand, TimerValueRequest, UtilsRequest, ValueStateCall}
 import org.apache.spark.sql.streaming.{ListState, MapState, TTLConfig, ValueState}
 import org.apache.spark.sql.types.{BinaryType, LongType, StructField, StructType}
 import org.apache.spark.sql.util.ArrowUtils
@@ -52,7 +53,7 @@ import org.apache.spark.util.Utils
  */
 class TransformWithStateInPandasStateServer(
     stateServerSocket: ServerSocket,
-    statefulProcessorHandle: StatefulProcessorHandleImpl,
+    statefulProcessorHandle: StatefulProcessorHandleImplBase,
     groupingKeySchema: StructType,
     timeZoneId: String,
     errorOnDuplicatedFieldNames: Boolean,
@@ -120,6 +121,8 @@ class TransformWithStateInPandasStateServer(
   }
 
   /** Timer related class variables */
+  // An iterator to store all expired timer info. This is meant to be consumed only once per
+  // partition. This should be called after finishing handling all input rows.
   private var expiryTimestampIter: Option[Iterator[(Any, Long)]] =
     if (expiryTimerIterForTest != null) {
       Option(expiryTimerIterForTest)
@@ -156,6 +159,11 @@ class TransformWithStateInPandasStateServer(
           logWarning(log"No more data to read from the socket")
           statefulProcessorHandle.setHandleState(StatefulProcessorHandleState.CLOSED)
           return
+        case _: InterruptedException =>
+          logInfo(log"Thread interrupted, shutting down state server")
+          Thread.currentThread().interrupt()
+          statefulProcessorHandle.setHandleState(StatefulProcessorHandleState.CLOSED)
+          return
         case e: Exception =>
           logError(log"Error reading message: ${MDC(LogKeys.ERROR, e.getMessage)}", e)
           sendResponse(1, e.getMessage)
@@ -184,6 +192,19 @@ class TransformWithStateInPandasStateServer(
         handleStateVariableRequest(message.getStateVariableRequest)
       case StateRequest.MethodCase.TIMERREQUEST =>
         handleTimerRequest(message.getTimerRequest)
+      case StateRequest.MethodCase.UTILSREQUEST =>
+        handleUtilsRequest(message.getUtilsRequest)
+      case _ =>
+        throw new IllegalArgumentException("Invalid method call")
+    }
+  }
+
+  private[sql] def handleUtilsRequest(message: UtilsRequest): Unit = {
+    message.getMethodCase match {
+      case UtilsRequest.MethodCase.PARSESTRINGSCHEMA =>
+        val stringSchema = message.getParseStringSchema.getSchema
+        val schema = CatalystSqlParser.parseTableSchema(stringSchema)
+        sendResponseWithStringVal(0, null, schema.json)
       case _ =>
         throw new IllegalArgumentException("Invalid method call")
     }
@@ -212,11 +233,13 @@ class TransformWithStateInPandasStateServer(
         // API and it will only be used by `group_ops` once per partition, we won't
         // need to worry about different function calls will interleaved and hence
         // this implementation is safe
+        assert(statefulProcessorHandle.isInstanceOf[StatefulProcessorHandleImpl])
         val expiryRequest = message.getExpiryTimerRequest()
         val expiryTimestamp = expiryRequest.getExpiryTimestampMs
         if (!expiryTimestampIter.isDefined) {
           expiryTimestampIter =
-            Option(statefulProcessorHandle.getExpiredTimers(expiryTimestamp))
+            Option(statefulProcessorHandle
+              .asInstanceOf[StatefulProcessorHandleImpl].getExpiredTimers(expiryTimestamp))
         }
         // expiryTimestampIter could be None in the TWSPandasServerSuite
         if (!expiryTimestampIter.isDefined || !expiryTimestampIter.get.hasNext) {
@@ -265,6 +288,9 @@ class TransformWithStateInPandasStateServer(
       case StatefulProcessorCall.MethodCase.SETHANDLESTATE =>
         val requestedState = message.getSetHandleState.getState
         requestedState match {
+          case HandleState.PRE_INIT =>
+            logInfo(log"set handle state to Pre-init")
+            statefulProcessorHandle.setHandleState(StatefulProcessorHandleState.PRE_INIT)
           case HandleState.CREATED =>
             logInfo(log"set handle state to Created")
             statefulProcessorHandle.setHandleState(StatefulProcessorHandleState.CREATED)
@@ -688,6 +714,22 @@ class TransformWithStateInPandasStateServer(
       outputStream.write(responseMessageBytes)
     }
 
+    def sendResponseWithStringVal(
+        status: Int,
+        errorMessage: String = null,
+        stringVal: String): Unit = {
+      val responseMessageBuilder = StateResponseWithStringTypeVal.newBuilder().setStatusCode(status)
+      if (status != 0 && errorMessage != null) {
+        responseMessageBuilder.setErrorMessage(errorMessage)
+      }
+      responseMessageBuilder.setValue(stringVal)
+      val responseMessage = responseMessageBuilder.build()
+      val responseMessageBytes = responseMessage.toByteArray
+      val byteLength = responseMessageBytes.length
+      outputStream.writeInt(byteLength)
+      outputStream.write(responseMessageBytes)
+    }
+
     def sendIteratorAsArrowBatches[T](
         iter: Iterator[T],
         outputSchema: StructType,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala
index ea1f5e6ae1340..575e3d4072b8c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala
@@ -24,12 +24,13 @@ import scala.collection.mutable.ArrayBuffer
 import net.razorvine.pickle.Pickler
 
 import org.apache.spark.api.python.{PythonEvalType, PythonFunction, PythonWorkerUtils, SpecialLengths}
-import org.apache.spark.sql.{Column, DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql.{Column, DataFrame, Dataset, SparkSession, TableArg, TableValuedFunctionArgument}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Descending, Expression, FunctionTableSubqueryArgumentExpression, NamedArgumentExpression, NullsFirst, NullsLast, PythonUDAF, PythonUDF, PythonUDTF, PythonUDTFAnalyzeResult, PythonUDTFSelectedExpression, SortOrder, UnresolvedPolymorphicPythonUDTF}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.{Generate, LogicalPlan, NamedParametersSupport, OneRowRelation}
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
+import org.apache.spark.sql.internal.ExpressionUtils.expression
 import org.apache.spark.sql.types.{DataType, StructType}
 
 /**
@@ -75,10 +76,10 @@ case class UserDefinedPythonFunction(
    * Returns a [[Column]] that will evaluate the UDF expression with the given input.
    */
   def fromUDFExpr(expr: Expression): Column = {
-    expr match {
+    Column(expr match {
       case udaf: PythonUDAF => udaf.toAggregateExpression()
       case _ => expr
-    }
+    })
   }
 }
 
@@ -159,8 +160,16 @@ case class UserDefinedPythonTableFunction(
   }
 
   /** Returns a [[DataFrame]] that will evaluate to calling this UDTF with the given input. */
-  def apply(session: SparkSession, exprs: Column*): DataFrame = {
-    val udtf = builder(exprs.map(session.expression), session.sessionState.sqlParser)
+  def apply(session: SparkSession, exprs: TableValuedFunctionArgument*): DataFrame = {
+    val parser = session.sessionState.sqlParser
+    val expressions = exprs.map {
+      case col: Column => session.expression(col)
+      case tableArg: TableArg => tableArg.expression
+      case other => throw new IllegalArgumentException(
+        s"Unsupported argument type: ${other.getClass.getName}"
+      )
+    }
+    val udtf = builder(expressions, parser)
     Dataset.ofRows(session, udtf)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
index 148766f9d0026..221ca17ddf19d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
@@ -22,13 +22,13 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, Da
 import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{functions, DataFrame}
+import org.apache.spark.sql.{functions, Column, DataFrame}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Expression, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
 import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.catalyst.util.GenericArrayData
-import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -52,13 +52,15 @@ object FrequentItems extends Logging {
       df: DataFrame,
       cols: Seq[String],
       support: Double): DataFrame = {
+    import df.sparkSession.expression
     require(support >= 1e-4 && support <= 1.0, s"Support must be in [1e-4, 1], but got $support.")
 
     // number of max items to keep counts for
     val sizeOfMap = (1 / support).toInt
 
     val frequentItemCols = cols.map { col =>
-      column(new CollectFrequentItems(functions.col(col), sizeOfMap)).as(s"${col}_freqItems")
+      Column(new CollectFrequentItems(expression(functions.col(col)), sizeOfMap))
+        .as(s"${col}_freqItems")
     }
 
     df.select(frequentItemCols: _*)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index 2a7e9818aedd9..511f4421e16ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -214,7 +214,7 @@ class IncrementalExecution(
       // filepath, and write this path out in the OperatorStateMetadata file
       case statefulOp: StatefulOperator if isFirstBatch =>
         val stateSchemaVersion = statefulOp match {
-          case _: TransformWithStateExec =>
+          case _: TransformWithStateExec | _: TransformWithStateInPandasExec =>
             sparkSession.sessionState.conf.
               getConf(SQLConf.STREAMING_TRANSFORM_WITH_STATE_OP_STATE_SCHEMA_VERSION)
           case _ => STATE_SCHEMA_DEFAULT_VERSION
@@ -439,6 +439,23 @@ class IncrementalExecution(
               eventTimeWatermarkForEviction = iwEviction)
           ))
 
+      // UpdateEventTimeColumnExec is used to tag the eventTime column, and validate
+      // emitted rows adhere to watermark in the output of transformWithStateInp.
+      // Hence, this node shares the same watermark value as TransformWithStateInPandasExec.
+      // This is the same as above in TransformWithStateExec.
+      // The only difference is TransformWithStateInPandasExec is analysed slightly different
+      // with no SerializeFromObjectExec wrapper.
+      case UpdateEventTimeColumnExec(eventTime, delay, None, t: TransformWithStateInPandasExec)
+        if t.stateInfo.isDefined =>
+        val stateInfo = t.stateInfo.get
+        val iwLateEvents = inputWatermarkForLateEvents(stateInfo)
+        val iwEviction = inputWatermarkForEviction(stateInfo)
+
+        UpdateEventTimeColumnExec(eventTime, delay, iwLateEvents,
+          t.copy(
+            eventTimeWatermarkForLateEvents = iwLateEvents,
+            eventTimeWatermarkForEviction = iwEviction)
+        )
 
       case t: TransformWithStateExec if t.stateInfo.isDefined =>
         t.copy(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImplWithTTL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImplWithTTL.scala
index 4c8dd6a193c25..4e32b80578155 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImplWithTTL.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImplWithTTL.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchemaUtils._
 import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, StateStore, StateStoreErrors}
 import org.apache.spark.sql.streaming.{ListState, TTLConfig}
-import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.NextIterator
 
 /**
@@ -45,21 +44,13 @@ class ListStateImplWithTTL[S](
     valEncoder: ExpressionEncoder[Any],
     ttlConfig: TTLConfig,
     batchTimestampMs: Long,
-    metrics: Map[String, SQLMetric] = Map.empty)
-  extends SingleKeyTTLStateImpl(stateName, store, keyExprEnc, batchTimestampMs)
-  with ListStateMetricsImpl
-  with ListState[S] {
-
-  override def stateStore: StateStore = store
-  override def baseStateName: String = stateName
-  override def exprEncSchema: StructType = keyExprEnc.schema
+    metrics: Map[String, SQLMetric])
+  extends OneToManyTTLState(
+    stateName, store, keyExprEnc.schema, ttlConfig, batchTimestampMs, metrics) with ListState[S] {
 
   private lazy val stateTypesEncoder = StateTypesEncoder(keyExprEnc, valEncoder,
     stateName, hasTtl = true)
 
-  private lazy val ttlExpirationMs =
-    StateTTL.calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
-
   initialize()
 
   private def initialize(): Unit = {
@@ -106,35 +97,22 @@ class ListStateImplWithTTL[S](
     validateNewState(newState)
 
     val encodedKey = stateTypesEncoder.encodeGroupingKey()
-    var isFirst = true
-    var entryCount = 0L
-    TWSMetricsUtils.resetMetric(metrics, "numUpdatedStateRows")
-
-    newState.foreach { v =>
-      val encodedValue = stateTypesEncoder.encodeValue(v, ttlExpirationMs)
-      if (isFirst) {
-        store.put(encodedKey, encodedValue, stateName)
-        isFirst = false
-      } else {
-        store.merge(encodedKey, encodedValue, stateName)
-      }
-      entryCount += 1
-      TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
+    val newStateUnsafeRows = newState.iterator.map { v =>
+      stateTypesEncoder.encodeValue(v, ttlExpirationMs)
     }
-    upsertTTLForStateKey(encodedKey)
-    updateEntryCount(encodedKey, entryCount)
+
+    updatePrimaryAndSecondaryIndices(true, encodedKey, newStateUnsafeRows, ttlExpirationMs)
   }
 
   /** Append an entry to the list. */
   override def appendValue(newState: S): Unit = {
     StateStoreErrors.requireNonNullStateValue(newState, stateName)
+
     val encodedKey = stateTypesEncoder.encodeGroupingKey()
-    val entryCount = getEntryCount(encodedKey)
-    store.merge(encodedKey,
-      stateTypesEncoder.encodeValue(newState, ttlExpirationMs), stateName)
-    TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
-    upsertTTLForStateKey(encodedKey)
-    updateEntryCount(encodedKey, entryCount + 1)
+    val newStateUnsafeRow = stateTypesEncoder.encodeValue(newState, ttlExpirationMs)
+
+    updatePrimaryAndSecondaryIndices(false, encodedKey,
+      Iterator.single(newStateUnsafeRow), ttlExpirationMs)
   }
 
   /** Append an entire list to the existing value. */
@@ -142,25 +120,21 @@ class ListStateImplWithTTL[S](
     validateNewState(newState)
 
     val encodedKey = stateTypesEncoder.encodeGroupingKey()
-    var entryCount = getEntryCount(encodedKey)
-    newState.foreach { v =>
-      val encodedValue = stateTypesEncoder.encodeValue(v, ttlExpirationMs)
-      store.merge(encodedKey, encodedValue, stateName)
-      entryCount += 1
-      TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
+    // The UnsafeRows created here are reused: we do NOT copy them. As a result,
+    // this iterator must only be used lazily, and it should never be materialized,
+    // unless you call newStateUnsafeRows.map(_.copy()).
+    val newStateUnsafeRows = newState.iterator.map { v =>
+      stateTypesEncoder.encodeValue(v, ttlExpirationMs)
     }
-    upsertTTLForStateKey(encodedKey)
-    updateEntryCount(encodedKey, entryCount)
+
+    updatePrimaryAndSecondaryIndices(false, encodedKey,
+      newStateUnsafeRows, ttlExpirationMs)
   }
 
   /** Remove this state. */
   override def clear(): Unit = {
-    val encodedKey = stateTypesEncoder.encodeGroupingKey()
-    store.remove(encodedKey, stateName)
-    val entryCount = getEntryCount(encodedKey)
-    TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows", entryCount)
-    removeEntryCount(encodedKey)
-    clearTTLState()
+    val groupingKey = stateTypesEncoder.encodeGroupingKey()
+    clearAllStateForElementKey(groupingKey)
   }
 
   private def validateNewState(newState: Array[S]): Unit = {
@@ -175,36 +149,41 @@ class ListStateImplWithTTL[S](
   /**
    * Loops through all the values associated with the grouping key, and removes
    * the expired elements from the list.
-   * @param groupingKey grouping key for which cleanup should be performed.
+   * @param elementKey grouping key for which cleanup should be performed.
    */
-  override def clearIfExpired(groupingKey: UnsafeRow): Long = {
+  override def clearExpiredValues(elementKey: UnsafeRow): ValueExpirationResult = {
     var numValuesExpired = 0L
-    val unsafeRowValuesIterator = store.valuesIterator(groupingKey, stateName)
+    val unsafeRowValuesIterator = store.valuesIterator(elementKey, stateName)
     // We clear the list, and use the iterator to put back all of the non-expired values
-    store.remove(groupingKey, stateName)
-    removeEntryCount(groupingKey)
+    store.remove(elementKey, stateName)
+
+    var newMinExpirationMsOpt: Option[Long] = None
     var isFirst = true
-    var entryCount = 0L
     unsafeRowValuesIterator.foreach { encodedValue =>
       if (!stateTypesEncoder.isExpired(encodedValue, batchTimestampMs)) {
         if (isFirst) {
-          store.put(groupingKey, encodedValue, stateName)
           isFirst = false
+          store.put(elementKey, encodedValue, stateName)
         } else {
-          store.merge(groupingKey, encodedValue, stateName)
+          store.merge(elementKey, encodedValue, stateName)
+        }
+
+        // If it is not expired, it needs to be reinserted (either via put or merge), but
+        // it also has an expiration time that might be the new minimum.
+        val currentExpirationMs = stateTypesEncoder.decodeTtlExpirationMs(encodedValue)
+
+        newMinExpirationMsOpt = newMinExpirationMsOpt match {
+          case Some(minExpirationMs) =>
+            Some(math.min(minExpirationMs, currentExpirationMs.get))
+          case None =>
+            Some(currentExpirationMs.get)
         }
-        entryCount += 1
       } else {
         numValuesExpired += 1
       }
     }
-    updateEntryCount(groupingKey, entryCount)
-    TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows", numValuesExpired)
-    numValuesExpired
-  }
 
-  private def upsertTTLForStateKey(encodedGroupingKey: UnsafeRow): Unit = {
-    upsertTTLForStateKey(ttlExpirationMs, encodedGroupingKey)
+    ValueExpirationResult(numValuesExpired, newMinExpirationMsOpt)
   }
 
   /*
@@ -238,11 +217,23 @@ class ListStateImplWithTTL[S](
     }
   }
 
+  private[sql] def getMinValues(): Iterator[Long] = {
+    val groupingKey = stateTypesEncoder.encodeGroupingKey()
+    minIndexIterator()
+      .filter(_._1 == groupingKey)
+      .map(_._2)
+  }
+
   /**
-   * Get all ttl values stored in ttl state for current implicit
-   * grouping key.
+   * Get the TTL value stored in TTL state for the current implicit grouping key,
+   * if it exists.
    */
-  private[sql] def getValuesInTTLState(): Iterator[Long] = {
-    getValuesInTTLState(stateTypesEncoder.encodeGroupingKey())
+  private[sql] def getValueInTTLState(): Option[Long] = {
+    val groupingKey = stateTypesEncoder.encodeGroupingKey()
+    val ttlRowsForGroupingKey = getTTLRows().filter(_.elementKey == groupingKey).toSeq
+
+    assert(ttlRowsForGroupingKey.size <= 1, "Multiple TTLRows found for grouping key " +
+      s"$groupingKey. Expected at most 1. Found: ${ttlRowsForGroupingKey.mkString(", ")}.")
+    ttlRowsForGroupingKey.headOption.map(_.expirationMs)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImplWithTTL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImplWithTTL.scala
index 19704b6d1bd59..64581006555e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImplWithTTL.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImplWithTTL.scala
@@ -18,7 +18,6 @@ package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchemaUtils._
 import org.apache.spark.sql.execution.streaming.state.{PrefixKeyScanStateEncoderSpec, StateStore, StateStoreErrors}
@@ -48,17 +47,14 @@ class MapStateImplWithTTL[K, V](
     valEncoder: ExpressionEncoder[Any],
     ttlConfig: TTLConfig,
     batchTimestampMs: Long,
-    metrics: Map[String, SQLMetric] = Map.empty)
-  extends CompositeKeyTTLStateImpl[K](stateName, store,
-    keyExprEnc, userKeyEnc, batchTimestampMs)
-  with MapState[K, V] with Logging {
+metrics: Map[String, SQLMetric])
+  extends OneToOneTTLState(
+    stateName, store, getCompositeKeySchema(keyExprEnc.schema, userKeyEnc.schema), ttlConfig,
+    batchTimestampMs, metrics) with MapState[K, V] with Logging {
 
   private val stateTypesEncoder = new CompositeKeyStateEncoder(
     keyExprEnc, userKeyEnc, valEncoder, stateName, hasTtl = true)
 
-  private val ttlExpirationMs =
-    StateTTL.calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
-
   initialize()
 
   private def initialize(): Unit = {
@@ -102,15 +98,12 @@ class MapStateImplWithTTL[K, V](
     StateStoreErrors.requireNonNullStateValue(key, stateName)
     StateStoreErrors.requireNonNullStateValue(value, stateName)
 
-    val encodedGroupingKey = stateTypesEncoder.encodeGroupingKey()
-    val encodedUserKey = stateTypesEncoder.encodeUserKey(key)
-
-    val encodedValue = stateTypesEncoder.encodeValue(value, ttlExpirationMs)
     val encodedCompositeKey = stateTypesEncoder.encodeCompositeKey(key)
-    store.put(encodedCompositeKey, encodedValue, stateName)
-    TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
+    val ttlExpirationMs = StateTTL
+      .calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
+    val encodedValue = stateTypesEncoder.encodeValue(value, ttlExpirationMs)
 
-    upsertTTLForStateKey(ttlExpirationMs, encodedGroupingKey, encodedUserKey)
+    updatePrimaryAndSecondaryIndices(encodedCompositeKey, encodedValue, ttlExpirationMs)
   }
 
   /** Get the map associated with grouping key */
@@ -161,41 +154,12 @@ class MapStateImplWithTTL[K, V](
 
   /** Remove this state. */
   override def clear(): Unit = {
-    keys().foreach { itr =>
-      removeKey(itr)
-    }
-    clearTTLState()
-  }
-
-  /**
-   * Clears the user state associated with this grouping key
-   * if it has expired. This function is called by Spark to perform
-   * cleanup at the end of transformWithState processing.
-   *
-   * Spark uses a secondary index to determine if the user state for
-   * this grouping key has expired. However, its possible that the user
-   * has updated the TTL and secondary index is out of date. Implementations
-   * must validate that the user State has actually expired before cleanup based
-   * on their own State data.
-   *
-   * @param groupingKey grouping key for which cleanup should be performed.
-   * @param userKey     user key for which cleanup should be performed.
-   */
-  override def clearIfExpired(
-      groupingKeyRow: UnsafeRow,
-      userKeyRow: UnsafeRow): Long = {
-    val compositeKeyRow = stateTypesEncoder.encodeCompositeKey(groupingKeyRow, userKeyRow)
+    val encodedGroupingKey = stateTypesEncoder.encodeGroupingKey()
+    val unsafeRowPairIterator = store.prefixScan(encodedGroupingKey, stateName)
 
-    val retRow = store.get(compositeKeyRow, stateName)
-    var numRemovedElements = 0L
-    if (retRow != null) {
-      if (stateTypesEncoder.isExpired(retRow, batchTimestampMs)) {
-        store.remove(compositeKeyRow, stateName)
-        numRemovedElements += 1
-        TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows")
-      }
+    unsafeRowPairIterator.foreach { rowPair =>
+      clearAllStateForElementKey(rowPair.key)
     }
-    numRemovedElements
   }
 
   /*
@@ -243,30 +207,18 @@ class MapStateImplWithTTL[K, V](
    * grouping key.
    */
   private[sql] def getKeyValuesInTTLState(): Iterator[(K, Long)] = {
-    val ttlIterator = ttlIndexIterator()
     val implicitGroupingKey = stateTypesEncoder.encodeGroupingKey()
-    var nextValue: Option[(K, Long)] = None
-
-    new Iterator[(K, Long)] {
-      override def hasNext: Boolean = {
-        while (nextValue.isEmpty && ttlIterator.hasNext) {
-          val nextTtlValue = ttlIterator.next()
-          val groupingKey = nextTtlValue.groupingKey
-          if (groupingKey equals implicitGroupingKey.getStruct(
-            0, keyExprEnc.schema.length)) {
-            val userKey = stateTypesEncoder.decodeUserKey(
-              nextTtlValue.userKey)
-            nextValue = Some(userKey.asInstanceOf[K], nextTtlValue.expirationMs)
-          }
-        }
-        nextValue.isDefined
-      }
-
-      override def next(): (K, Long) = {
-        val result = nextValue.get
-        nextValue = None
-        result
-      }
+      .getStruct(0, keyExprEnc.schema.length)
+
+    // We're getting composite rows back
+    getTTLRows().filter { ttlRow =>
+      val compositeKey = ttlRow.elementKey
+      val groupingKey = compositeKey.getStruct(0, keyExprEnc.schema.length)
+      groupingKey == implicitGroupingKey
+    }.map { ttlRow =>
+      val compositeKey = ttlRow.elementKey
+      val userKey = stateTypesEncoder.decodeCompositeKey(compositeKey)
+      (userKey.asInstanceOf[K], ttlRow.expirationMs)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 40d58e5a402a1..23e72fc4e3e2f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -513,6 +513,7 @@ class MicroBatchExecution(
               execCtx.startOffsets ++= execCtx.endOffsets
               watermarkTracker.setWatermark(
                 math.max(watermarkTracker.currentWatermark, commitMetadata.nextBatchWatermarkMs))
+              currentStateStoreCkptId ++= commitMetadata.stateUniqueIds
             } else if (latestCommittedBatchId == latestBatchId - 1) {
               execCtx.endOffsets.foreach {
                 case (source: Source, end: Offset) =>
@@ -965,7 +966,8 @@ class MicroBatchExecution(
       updateStateStoreCkptId(execCtx, latestExecPlan)
     }
     execCtx.reportTimeTaken("commitOffsets") {
-      if (!commitLog.add(execCtx.batchId, CommitMetadata(watermarkTracker.currentWatermark))) {
+      if (!commitLog.add(execCtx.batchId,
+        CommitMetadata(watermarkTracker.currentWatermark, currentStateStoreCkptId.toMap))) {
         throw QueryExecutionErrors.concurrentStreamLogUpdate(execCtx.batchId)
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index e1e5b3a7ef88e..a599f3bc66118 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -102,7 +102,7 @@ object OffsetSeqMetadata extends Logging {
     FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, STREAMING_AGGREGATION_STATE_FORMAT_VERSION,
     STREAMING_JOIN_STATE_FORMAT_VERSION, STATE_STORE_COMPRESSION_CODEC,
     STATE_STORE_ROCKSDB_FORMAT_VERSION, STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION,
-    PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN
+    PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN, STREAMING_STATE_STORE_ENCODING_FORMAT
   )
 
   /**
@@ -125,7 +125,8 @@ object OffsetSeqMetadata extends Logging {
       SymmetricHashJoinStateManager.legacyVersion.toString,
     STATE_STORE_COMPRESSION_CODEC.key -> CompressionCodec.LZ4,
     STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION.key -> "false",
-    PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> "true"
+    PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> "true",
+    STREAMING_STATE_STORE_ENCODING_FORMAT.key -> "unsaferow"
   )
 
   def apply(json: String): OffsetSeqMetadata = Serialization.read[OffsetSeqMetadata](json)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateStoreColumnFamilySchemaUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateStoreColumnFamilySchemaUtils.scala
index 7da8408f98b0f..585298fa4c993 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateStoreColumnFamilySchemaUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateStoreColumnFamilySchemaUtils.scala
@@ -20,10 +20,49 @@ import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchemaUtils._
 import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, PrefixKeyScanStateEncoderSpec, StateStoreColFamilySchema}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types._
 
 object StateStoreColumnFamilySchemaUtils {
 
+  /**
+   * Avro uses zig-zag encoding for some fixed-length types, like Longs and Ints. For range scans
+   * we want to use big-endian encoding, so we need to convert the source schema to replace these
+   * types with BinaryType.
+   *
+   * @param schema The schema to convert
+   * @param ordinals If non-empty, only convert fields at these ordinals.
+   *                 If empty, convert all fields.
+   */
+  def convertForRangeScan(schema: StructType, ordinals: Seq[Int] = Seq.empty): StructType = {
+    val ordinalSet = ordinals.toSet
+
+    StructType(schema.fields.zipWithIndex.flatMap { case (field, idx) =>
+      if ((ordinals.isEmpty || ordinalSet.contains(idx)) && isFixedSize(field.dataType)) {
+        // For each numeric field, create two fields:
+        // 1. Byte marker for null, positive, or negative values
+        // 2. The original numeric value in big-endian format
+        // Byte type is converted to Int in Avro, which doesn't work for us as Avro
+        // uses zig-zag encoding as opposed to big-endian for Ints
+        Seq(
+          StructField(s"${field.name}_marker", BinaryType, nullable = false),
+          field.copy(name = s"${field.name}_value", BinaryType)
+        )
+      } else {
+        Seq(field)
+      }
+    })
+  }
+
+  private def isFixedSize(dataType: DataType): Boolean = dataType match {
+    case _: ByteType | _: BooleanType | _: ShortType | _: IntegerType | _: LongType |
+         _: FloatType | _: DoubleType => true
+    case _ => false
+  }
+
+  def getTtlColFamilyName(stateName: String): String = {
+    "$ttl_" + stateName
+  }
+
   def getValueStateSchema[T](
       stateName: String,
       keyEncoder: ExpressionEncoder[Any],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateTypesEncoderUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateTypesEncoderUtils.scala
index d87de4c69c40a..a2b7ee4ba7916 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateTypesEncoderUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateTypesEncoderUtils.scala
@@ -30,6 +30,11 @@ import org.apache.spark.sql.types._
  * files and to be passed into `RocksDBStateKey(/Value)Encoder`.
  */
 object TransformWithStateKeyValueRowSchemaUtils {
+  /**
+   * Creates a schema that is the concatenation of the grouping key and a user-defined
+   * key. This is used by MapState to create a composite key that is then treated as
+   * an "elementKey" by OneToOneTTLState.
+   */
   def getCompositeKeySchema(
       groupingKeySchema: StructType,
       userKeySchema: StructType): StructType = {
@@ -38,24 +43,37 @@ object TransformWithStateKeyValueRowSchemaUtils {
       .add("userKey", new StructType(userKeySchema.fields))
   }
 
-  def getSingleKeyTTLRowSchema(keySchema: StructType): StructType =
+  /**
+   * Represents the schema of keys in the TTL index, managed by TTLState implementations.
+   * There is no value associated with entries in the TTL index, so there is no method
+   * called, for example, getTTLValueSchema.
+   */
+  def getTTLRowKeySchema(keySchema: StructType): StructType =
     new StructType()
       .add("expirationMs", LongType)
-      .add("groupingKey", keySchema)
+      .add("elementKey", keySchema)
 
-  def getCompositeKeyTTLRowSchema(
-      groupingKeySchema: StructType,
-      userKeySchema: StructType): StructType =
+  /**
+   * Represents the schema of a single long value, which is used to store the expiration
+   * timestamp of elements in the minimum index, managed by OneToManyTTLState.
+   */
+  def getExpirationMsRowSchema(): StructType =
     new StructType()
       .add("expirationMs", LongType)
-      .add("groupingKey", new StructType(groupingKeySchema.fields))
-      .add("userKey", new StructType(userKeySchema.fields))
 
+  /**
+   * Represents the schema of an element with TTL in the primary index. We store the expiration
+   * of each value along with the value itself, since each value has its own TTL. It is used as
+   * the value schema of every value, for every stateful variable.
+   */
   def getValueSchemaWithTTL(schema: StructType, hasTTL: Boolean): StructType = {
     if (hasTTL) {
-      new StructType().add("value", schema)
+      new StructType()
+        .add("value", schema)
         .add("ttlExpirationMs", LongType)
-    } else schema
+    } else {
+      schema
+    }
   }
 }
 
@@ -118,7 +136,9 @@ class StateTypesEncoder[V](
   def decodeValue(row: UnsafeRow): V = {
     if (hasTtl) {
       rowToObjDeserializer.apply(row.getStruct(0, valEncoder.schema.length))
-    } else rowToObjDeserializer.apply(row)
+    } else {
+      rowToObjDeserializer.apply(row)
+    }
   }
 
   /**
@@ -225,10 +245,6 @@ class CompositeKeyStateEncoder[K, V](
     compositeKeyProjection(InternalRow(groupingKey, userKey))
   }
 
-  def decodeUserKey(row: UnsafeRow): K = {
-    userKeyRowToObjDeserializer.apply(row)
-  }
-
   /**
    * The input row is of composite Key schema.
    * Only user key is returned though grouping key also exist in the row.
@@ -239,37 +255,14 @@ class CompositeKeyStateEncoder[K, V](
 }
 
 /** Class for TTL with single key serialization */
-class SingleKeyTTLEncoder(
-    keyExprEnc: ExpressionEncoder[Any]) {
-
-  private val ttlKeyProjection = UnsafeProjection.create(
-    getSingleKeyTTLRowSchema(keyExprEnc.schema))
-
-  def encodeTTLRow(expirationMs: Long, groupingKey: UnsafeRow): UnsafeRow = {
-    ttlKeyProjection.apply(
-      InternalRow(expirationMs, groupingKey.asInstanceOf[InternalRow]))
-  }
-}
-
-/** Class for TTL with composite key serialization */
-class CompositeKeyTTLEncoder[K](
-    keyExprEnc: ExpressionEncoder[Any],
-    userKeyEnc: ExpressionEncoder[Any]) {
+class TTLEncoder(schema: StructType) {
 
-  private val ttlKeyProjection = UnsafeProjection.create(
-    getCompositeKeyTTLRowSchema(keyExprEnc.schema, userKeyEnc.schema))
+  private val ttlKeyProjection = UnsafeProjection.create(getTTLRowKeySchema(schema))
 
-  def encodeTTLRow(
-      expirationMs: Long,
-      groupingKey: UnsafeRow,
-      userKey: UnsafeRow): UnsafeRow = {
+  // Take a groupingKey UnsafeRow and turn it into a (expirationMs, groupingKey) UnsafeRow.
+  def encodeTTLRow(expirationMs: Long, elementKey: UnsafeRow): UnsafeRow = {
     ttlKeyProjection.apply(
-      InternalRow(
-        expirationMs,
-        groupingKey.getStruct(0, keyExprEnc.schema.length)
-          .asInstanceOf[InternalRow],
-        userKey.getStruct(0, userKeyEnc.schema.length)
-          .asInstanceOf[InternalRow]))
+      InternalRow(expirationMs, elementKey.asInstanceOf[InternalRow]))
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala
index 0f90fa8d9e490..5d13af0af7c43 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala
@@ -189,7 +189,7 @@ class StatefulProcessorHandleImpl(
   def doTtlCleanup(): Unit = {
     val numValuesRemovedDueToTTLExpiry = metrics.get("numValuesRemovedDueToTTLExpiry").get
     ttlStates.forEach { s =>
-      numValuesRemovedDueToTTLExpiry += s.clearExpiredState()
+      numValuesRemovedDueToTTLExpiry += s.clearExpiredStateForAllKeys()
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index bd501c9357234..44202bb0d2944 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -715,6 +715,7 @@ abstract class StreamExecution(
 
 object StreamExecution {
   val QUERY_ID_KEY = "sql.streaming.queryId"
+  val RUN_ID_KEY = "sql.streaming.runId"
   val IS_CONTINUOUS_PROCESSING = "__is_continuous_processing"
   val IO_EXCEPTION_NAMES = Seq(
     classOf[InterruptedException].getName,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TTLState.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TTLState.scala
index 87d1a15dff1a9..b4449f99d6ba5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TTLState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TTLState.scala
@@ -19,274 +19,529 @@ package org.apache.spark.sql.execution.streaming
 import java.time.Duration
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchemaUtils._
-import org.apache.spark.sql.execution.streaming.state.{RangeKeyScanStateEncoderSpec, StateStore}
+import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, RangeKeyScanStateEncoderSpec, StateStore}
+import org.apache.spark.sql.streaming.TTLConfig
 import org.apache.spark.sql.types._
 
-object StateTTLSchema {
-  val TTL_VALUE_ROW_SCHEMA: StructType =
-    StructType(Array(StructField("__dummy__", NullType)))
-}
-
 /**
- * Encapsulates the ttl row information stored in [[SingleKeyTTLStateImpl]].
+ * Any state variable that wants to support TTL must implement this trait,
+ * which they can do by extending [[OneToOneTTLState]] or [[OneToManyTTLState]].
  *
- * @param groupingKey grouping key for which ttl is set
- * @param expirationMs expiration time for the grouping key
- */
-case class SingleKeyTTLRow(
-    groupingKey: UnsafeRow,
-    expirationMs: Long)
-
-/**
- * Encapsulates the ttl row information stored in [[CompositeKeyTTLStateImpl]].
+ * The only required methods here are ones relating to evicting expired and all
+ * state, via clearExpiredStateForAllKeys and clearAllStateForElementKey,
+ * respectively. How classes do this is implementation detail, but the general
+ * pattern is to use secondary indexes to make sure cleanup scans
+ * theta(records to evict), not theta(all records).
  *
- * @param groupingKey grouping key for which ttl is set
- * @param userKey user key for which ttl is set
- * @param expirationMs expiration time for the grouping key
- */
-case class CompositeKeyTTLRow(
-   groupingKey: UnsafeRow,
-   userKey: UnsafeRow,
-   expirationMs: Long)
-
-/**
- * Represents the underlying state for secondary TTL Index for a user defined
- * state variable.
+ * There are two broad patterns of implementing stateful variables, and thus
+ * there are two broad patterns for implementing TTL. The first is when there
+ * is a one-to-one mapping between an element key [1] and a value; the primary
+ * and secondary index management for this case is implemented by
+ * [[OneToOneTTLState]]. When a single element key can have multiple values,
+ * all of which can expire at their own, unique times, then
+ * [[OneToManyTTLState]] should be used.
+ *
+ * In either case, implementations need to use some sort of secondary index
+ * that orders element keys by expiration time. This base functionality
+ * is provided by methods in this trait that read/write/delete to the
+ * so-called "TTL index". It is a secondary index with the layout of
+ * (expirationMs, elementKey) -> EMPTY_ROW. The expirationMs is big-endian
+ * encoded to allow for efficient range scans to find all expired keys.
+ *
+ * TTLState (or any abstract sub-classes) should never deal with encoding or
+ * decoding UnsafeRows to and from their user-facing types. The stateful variable
+ * themselves should be doing this; all other TTLState sub-classes should be concerned
+ * only with writing, reading, and deleting UnsafeRows and their associated
+ * expirations from the primary and secondary indexes. [2]
+ *
+ * [1]. You might ask, why call it "element key" instead of "grouping key"?
+ *      This is because a single grouping key might have multiple elements, as in
+ *      the case of a map, which has composite keys of the form (groupingKey, mapKey).
+ *      In the case of ValueState, though, the element key is the grouping key.
+ *      To generalize to both cases, this class should always use the term elementKey.)
  *
- * This state allows Spark to query ttl values based on expiration time
- * allowing efficient ttl cleanup.
+ * [2]. You might also ask, why design it this way? We want the TTLState abstract
+ *      sub-classes to write to both the primary and secondary indexes, since they
+ *      both need to stay in sync; co-locating the logic is cleanest.
  */
 trait TTLState {
+  // Name of the state variable, e.g. the string the user passes to get{Value/List/Map}State
+  // in the init() method of a StatefulProcessor.
+  private[sql] def stateName: String
+
+  // The StateStore instance used to store the state. There is only one instance shared
+  // among the primary and secondary indexes, since it uses virtual column families
+  // to keep the indexes separate.
+  private[sql] def store: StateStore
+
+  // The schema of the primary key for the state variable. For value and list state, this
+  // is the grouping key. For map state, this is the composite key of the grouping key and
+  // a map key.
+  private[sql] def elementKeySchema: StructType
+
+  // The timestamp at which the batch is being processed. All state variables that have
+  // an expiration at or before this timestamp must be cleaned up.
+  private[sql] def batchTimestampMs: Long
+
+  // The configuration for this run of the streaming query. It may change between runs
+  // (e.g. user sets ttlConfig1, stops their query, updates to ttlConfig2, and then
+  // resumes their query).
+  private[sql] def ttlConfig: TTLConfig
+
+  // A map from metric name to the underlying SQLMetric. This should not be updated
+  // by the underlying state variable, as the TTL state implementation should be
+  // handling all reads/writes/deletes to the indexes.
+  private[sql] def metrics: Map[String, SQLMetric] = Map.empty
+
+  private final val TTL_INDEX = "$ttl_" + stateName
+  private final val TTL_INDEX_KEY_SCHEMA = getTTLRowKeySchema(elementKeySchema)
+  private final val TTL_EMPTY_VALUE_ROW_SCHEMA: StructType =
+    StructType(Array(StructField("__empty__", NullType)))
+
+  private final val TTL_ENCODER = new TTLEncoder(elementKeySchema)
+
+  // Empty row used for values
+  private final val TTL_EMPTY_VALUE_ROW =
+    UnsafeProjection.create(Array[DataType](NullType)).apply(InternalRow.apply(null))
 
-  /**
-   * Perform the user state clean up based on ttl values stored in
-   * this state. NOTE that its not safe to call this operation concurrently
-   * when the user can also modify the underlying State. Cleanup should be initiated
-   * after arbitrary state operations are completed by the user.
-   *
-   * @return number of values cleaned up.
-   */
-  def clearExpiredState(): Long
-}
+  private[sql] final def ttlExpirationMs = StateTTL
+    .calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
 
-/**
- * Manages the ttl information for user state keyed with a single key (grouping key).
- */
-abstract class SingleKeyTTLStateImpl(
-    stateName: String,
-    store: StateStore,
-    keyExprEnc: ExpressionEncoder[Any],
-    ttlExpirationMs: Long)
-  extends TTLState {
+  store.createColFamilyIfAbsent(
+    TTL_INDEX,
+    TTL_INDEX_KEY_SCHEMA,
+    TTL_EMPTY_VALUE_ROW_SCHEMA,
+    RangeKeyScanStateEncoderSpec(TTL_INDEX_KEY_SCHEMA, Seq(0)),
+    isInternal = true
+  )
 
-  import org.apache.spark.sql.execution.streaming.StateTTLSchema._
+  private[sql] def insertIntoTTLIndex(expirationMs: Long, elementKey: UnsafeRow): Unit = {
+    val secondaryIndexKey = TTL_ENCODER.encodeTTLRow(expirationMs, elementKey)
+    store.put(secondaryIndexKey, TTL_EMPTY_VALUE_ROW, TTL_INDEX)
+  }
 
-  private val ttlColumnFamilyName = "$ttl_" + stateName
-  private val keySchema = getSingleKeyTTLRowSchema(keyExprEnc.schema)
-  private val keyTTLRowEncoder = new SingleKeyTTLEncoder(keyExprEnc)
+  // The deleteFromTTLIndex overload that takes an expiration time and elementKey as an
+  // argument is used when we need to _construct_ the key to delete from the TTL index.
+  //
+  // If we know the timestamp to delete and the elementKey, but don't have a pre-constructed
+  // UnsafeRow, then you should use this method to delete from the TTL index.
+  private[sql] def deleteFromTTLIndex(expirationMs: Long, elementKey: UnsafeRow): Unit = {
+    val secondaryIndexKey = TTL_ENCODER.encodeTTLRow(expirationMs, elementKey)
+    store.remove(secondaryIndexKey, TTL_INDEX)
+  }
 
-  // empty row used for values
-  private val EMPTY_ROW =
-    UnsafeProjection.create(Array[DataType](NullType)).apply(InternalRow.apply(null))
+  // The deleteFromTTLIndex overload that takes an UnsafeRow as an argument is used when
+  // we're deleting elements from the TTL index that we are iterating over.
+  //
+  // If we were to use the other deleteFromTTLIndex method, we would have to re-encode the
+  // components into an UnsafeRow. It is more efficient to just pass the UnsafeRow that we
+  // read from the iterator.
+  private[sql] def deleteFromTTLIndex(ttlKey: UnsafeRow): Unit = {
+    store.remove(ttlKey, TTL_INDEX)
+  }
 
-  store.createColFamilyIfAbsent(ttlColumnFamilyName, keySchema, TTL_VALUE_ROW_SCHEMA,
-    RangeKeyScanStateEncoderSpec(keySchema, Seq(0)), isInternal = true)
+  private[sql] def toTTLRow(ttlKey: UnsafeRow): TTLRow = {
+    val expirationMs = ttlKey.getLong(0)
+    val elementKey = ttlKey.getStruct(1, TTL_INDEX_KEY_SCHEMA.length)
+    TTLRow(elementKey, expirationMs)
+  }
 
-  /**
-   * This function will be called when clear() on State Variables
-   * with ttl enabled is called. This function should clear any
-   * associated ttlState, since we are clearing the user state.
-   */
-  def clearTTLState(): Unit = {
-    val iterator = store.iterator(ttlColumnFamilyName)
-    iterator.foreach { kv =>
-      store.remove(kv.key, ttlColumnFamilyName)
-    }
+  private[sql] def getTTLRows(): Iterator[TTLRow] = {
+    store.iterator(TTL_INDEX).map(kv => toTTLRow(kv.key))
   }
 
-  def upsertTTLForStateKey(
-      expirationMs: Long,
-      groupingKey: UnsafeRow): Unit = {
-    val encodedTtlKey = keyTTLRowEncoder.encodeTTLRow(
-      expirationMs, groupingKey)
-    store.put(encodedTtlKey, EMPTY_ROW, ttlColumnFamilyName)
+  // Returns an Iterator over all the keys in the TTL index that have expired. This method
+  // does not delete the keys from the TTL index; it is the responsibility of the caller
+  // to do so.
+  //
+  // The schema of the UnsafeRow returned by this iterator is (expirationMs, elementKey).
+  private[sql] def ttlEvictionIterator(): Iterator[UnsafeRow] = {
+    val ttlIterator = store.iterator(TTL_INDEX)
+
+    // Recall that the format is (expirationMs, elementKey) -> TTL_EMPTY_VALUE_ROW, so
+    // kv.value doesn't ever need to be used.
+    ttlIterator.takeWhile { kv =>
+      val expirationMs = kv.key.getLong(0)
+      StateTTL.isExpired(expirationMs, batchTimestampMs)
+    }.map(_.key)
   }
 
+  // Encapsulates a row stored in a TTL index. Exposed for testing.
+  private[sql] case class TTLRow(elementKey: UnsafeRow, expirationMs: Long)
+
   /**
-   * Clears any state which has ttl older than [[ttlExpirationMs]].
+   * Evicts the state associated with this stateful variable that has expired
+   * due to TTL. The eviction applies to all grouping keys, and to all indexes,
+   * primary or secondary.
+   *
+   * This method can be called at any time in the micro-batch execution,
+   * as long as it is allowed to complete before subsequent state operations are
+   * issued. Operations to the state variable should not be issued concurrently while
+   * this is running, since it may leave the state variable in an inconsistent state
+   * as it cleans up.
+   *
+   * @return number of values cleaned up.
    */
-  override def clearExpiredState(): Long = {
-    val iterator = store.iterator(ttlColumnFamilyName)
-    var numValuesExpired = 0L
+  private[sql] def clearExpiredStateForAllKeys(): Long
 
-    iterator.takeWhile { kv =>
-      val expirationMs = kv.key.getLong(0)
-      StateTTL.isExpired(expirationMs, ttlExpirationMs)
-    }.foreach { kv =>
-      val groupingKey = kv.key.getStruct(1, keyExprEnc.schema.length)
-      numValuesExpired += clearIfExpired(groupingKey)
-      store.remove(kv.key, ttlColumnFamilyName)
+  /**
+   * When a user calls clear() on a stateful variable, this method is invoked to
+   * clear all of the state for the current (implicit) grouping key. It is responsible
+   * for deleting from the primary index as well as any secondary index(es).
+   *
+   * If a given state variable has to clean up multiple elementKeys (in MapState, for
+   * example, every key in the map is its own elementKey), then this method should
+   * be invoked for each of those keys.
+   */
+  private[sql] def clearAllStateForElementKey(elementKey: UnsafeRow): Unit
+}
+
+/**
+ * OneToOneTTLState is an implementation of [[TTLState]] that is used to manage
+ * TTL for state variables that need a single secondary index to efficiently manage
+ * records with an expiration.
+ *
+ * The primary index for state variables that can use a [[OneToOneTTLState]] have
+ * the form of: [elementKey -> (value, elementExpiration)]. You'll notice that, given
+ * a timestamp, it would take linear time to probe the primary index for all of its
+ * expired values.
+ *
+ * As a result, this class uses helper methods from [[TTLState]] to maintain the secondary
+ * index from [(elementExpiration, elementKey) -> EMPTY_ROW].
+ *
+ * For an explanation of why this structure is not always sufficient (e.g. why the class
+ * [[OneToManyTTLState]] is needed), please visit its class-doc comment.
+ */
+abstract class OneToOneTTLState(
+    stateNameArg: String,
+    storeArg: StateStore,
+    elementKeySchemaArg: StructType,
+    ttlConfigArg: TTLConfig,
+    batchTimestampMsArg: Long,
+    metricsArg: Map[String, SQLMetric]) extends TTLState {
+  override private[sql] def stateName: String = stateNameArg
+  override private[sql] def store: StateStore = storeArg
+  override private[sql] def elementKeySchema: StructType = elementKeySchemaArg
+  override private[sql] def ttlConfig: TTLConfig = ttlConfigArg
+  override private[sql] def batchTimestampMs: Long = batchTimestampMsArg
+  override private[sql] def metrics: Map[String, SQLMetric] = metricsArg
+
+  /**
+   * This method updates the TTL for the given elementKey to be expirationMs,
+   * updating both the primary and secondary indices if needed.
+   *
+   * Note that an elementKey may be the state variable's grouping key, _or_ it
+   * could be a composite key. MapState is an example of a state variable that
+   * has composite keys, which has the structure of the groupingKey followed by
+   * the specific key in the map. This method doesn't need to know what type of
+   * key is being used, though, since in either case, it's just an UnsafeRow.
+   *
+   * @param elementKey the key for which the TTL should be updated, which may
+   *                   either be the encoded grouping key, or the grouping key
+   *                   and some user-defined key.
+   * @param elementValue the value to update the primary index with. It is of the
+   *                     form (value, expirationMs).
+   * @param expirationMs the new expiration timestamp to use for elementKey.
+   */
+  private[sql] def updatePrimaryAndSecondaryIndices(
+      elementKey: UnsafeRow,
+      elementValue: UnsafeRow,
+      expirationMs: Long): Unit = {
+    val existingPrimaryValue = store.get(elementKey, stateName)
+
+    // Doesn't exist. Insert into the primary and TTL indexes.
+    if (existingPrimaryValue == null) {
+      store.put(elementKey, elementValue, stateName)
+      TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
+      insertIntoTTLIndex(expirationMs, elementKey)
+    } else {
+      // If the values are equal, then they must be equal in actual value and the expiration
+      // timestamp. We don't need to update any index in this case.
+      if (elementValue != existingPrimaryValue) {
+        store.put(elementKey, elementValue, stateName)
+        TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
+
+        // Small optimization: the value could have changed, but the expirationMs could have
+        // stayed the same. We only put into the TTL index if the expirationMs has changed.
+        val existingExpirationMs = existingPrimaryValue.getLong(1)
+        if (existingExpirationMs != expirationMs) {
+          deleteFromTTLIndex(existingExpirationMs, elementKey)
+          insertIntoTTLIndex(expirationMs, elementKey)
+        }
+      }
     }
-    numValuesExpired
   }
 
-  private[sql] def ttlIndexIterator(): Iterator[SingleKeyTTLRow] = {
-    val ttlIterator = store.iterator(ttlColumnFamilyName)
+  override private[sql] def clearExpiredStateForAllKeys(): Long = {
+    var numValuesExpired = 0L
 
-    new Iterator[SingleKeyTTLRow] {
-      override def hasNext: Boolean = ttlIterator.hasNext
+    ttlEvictionIterator().foreach { ttlKey =>
+      // Delete from secondary index
+      deleteFromTTLIndex(ttlKey)
+      // Delete from primary index
+      store.remove(toTTLRow(ttlKey).elementKey, stateName)
 
-      override def next(): SingleKeyTTLRow = {
-        val kv = ttlIterator.next()
-        SingleKeyTTLRow(
-          expirationMs = kv.key.getLong(0),
-          groupingKey = kv.key.getStruct(1, keyExprEnc.schema.length)
-        )
-      }
+      numValuesExpired += 1
     }
+
+    TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows", numValuesExpired)
+    numValuesExpired
   }
 
-  private[sql] def getValuesInTTLState(groupingKey: UnsafeRow): Iterator[Long] = {
-    val ttlIterator = ttlIndexIterator()
-    var nextValue: Option[Long] = None
-
-    new Iterator[Long] {
-      override def hasNext: Boolean = {
-        while (nextValue.isEmpty && ttlIterator.hasNext) {
-          val nextTtlValue = ttlIterator.next()
-          val valueGroupingKey = nextTtlValue.groupingKey
-          if (valueGroupingKey equals groupingKey) {
-            nextValue = Some(nextTtlValue.expirationMs)
-          }
-        }
-        nextValue.isDefined
-      }
+  override private[sql] def clearAllStateForElementKey(elementKey: UnsafeRow): Unit = {
+    val existingPrimaryValue = store.get(elementKey, stateName)
+    if (existingPrimaryValue != null) {
+      val existingExpirationMs = existingPrimaryValue.getLong(1)
 
-      override def next(): Long = {
-        val result = nextValue.get
-        nextValue = None
-        result
-      }
+      store.remove(elementKey, stateName)
+      TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows")
+
+      deleteFromTTLIndex(existingExpirationMs, elementKey)
     }
   }
-
-  /**
-   * Clears the user state associated with this grouping key
-   * if it has expired. This function is called by Spark to perform
-   * cleanup at the end of transformWithState processing.
-   *
-   * Spark uses a secondary index to determine if the user state for
-   * this grouping key has expired. However, its possible that the user
-   * has updated the TTL and secondary index is out of date. Implementations
-   * must validate that the user State has actually expired before cleanup based
-   * on their own State data.
-   *
-   * @param groupingKey grouping key for which cleanup should be performed.
-   *
-   * @return true if the state was cleared, false otherwise.
-   */
-  def clearIfExpired(groupingKey: UnsafeRow): Long
 }
 
 /**
- * Manages the ttl information for user state keyed with a single key (grouping key).
+ * [[OneToManyTTLState]] is an implementation of [[TTLState]] for stateful variables
+ * that associate a single key with multiple values; every value has its own expiration
+ * timestamp.
+ *
+ * We need an efficient way to find all the values that have expired, but we cannot
+ * issue point-wise deletes to the elements, since they are merged together using the
+ * RocksDB StringAppendOperator for merging. As such, we cannot keep a secondary index
+ * on the key (expirationMs, groupingKey, indexInList), since we have no way to delete a
+ * specific indexInList from the RocksDB value. (In the future, we could write a custom
+ * merge operator that can handle tombstones for deleted indexes, but RocksDB doesn't
+ * support custom merge operators written in Java/Scala.)
+ *
+ * Instead, we manage expiration per grouping key instead. Our secondary index will look
+ * like (expirationMs, groupingKey) -> EMPTY_ROW. This way, we can quickly find all the
+ * grouping keys that contain at least one element that has expired.
+ *
+ * To make sure that we aren't "late" in cleaning up expired values, this secondary index
+ * maps from the minimum expiration in a list and a grouping key to the EMPTY_VALUE. This
+ * index is called the "TTL index" in the code (to be consistent with [[OneToOneTTLState]]),
+ * though it behaves more like a work queue of lists that need to be cleaned up.
+ *
+ * Since a grouping key may have a large list and we need to quickly know what the
+ * minimum expiration is, we need to reverse this work queue index. This reversed index
+ * maps from key to the minimum expiration in the list, and it is called the "min-expiry" index.
+ *
+ * Note: currently, this is only used by ListState with TTL.
  */
-abstract class CompositeKeyTTLStateImpl[K](
-    stateName: String,
-    store: StateStore,
-    keyExprEnc: ExpressionEncoder[Any],
-    userKeyEncoder: ExpressionEncoder[Any],
-    ttlExpirationMs: Long)
-  extends TTLState {
-
-  import org.apache.spark.sql.execution.streaming.StateTTLSchema._
-
-  private val ttlColumnFamilyName = "$ttl_" + stateName
-  private val keySchema = getCompositeKeyTTLRowSchema(
-    keyExprEnc.schema, userKeyEncoder.schema
+abstract class OneToManyTTLState(
+    stateNameArg: String,
+    storeArg: StateStore,
+    elementKeySchemaArg: StructType,
+    ttlConfigArg: TTLConfig,
+    batchTimestampMsArg: Long,
+    metricsArg: Map[String, SQLMetric]) extends TTLState {
+  override private[sql] def stateName: String = stateNameArg
+  override private[sql] def store: StateStore = storeArg
+  override private[sql] def elementKeySchema: StructType = elementKeySchemaArg
+  override private[sql] def ttlConfig: TTLConfig = ttlConfigArg
+  override private[sql] def batchTimestampMs: Long = batchTimestampMsArg
+  override private[sql] def metrics: Map[String, SQLMetric] = metricsArg
+
+  // Schema of the min-expiry index: elementKey -> minExpirationMs
+  private val MIN_INDEX = "$min_" + stateName
+  private val MIN_INDEX_SCHEMA = elementKeySchema
+  private val MIN_INDEX_VALUE_SCHEMA = getExpirationMsRowSchema()
+
+  // Projects a Long into an UnsafeRow
+  private val minIndexValueProjector = UnsafeProjection.create(MIN_INDEX_VALUE_SCHEMA)
+
+  // Schema of the entry count index: elementKey -> count
+  private val COUNT_INDEX = "$count_" + stateName
+  private val COUNT_INDEX_VALUE_SCHEMA: StructType =
+    StructType(Seq(StructField("count", LongType, nullable = false)))
+  private val countIndexValueProjector = UnsafeProjection.create(COUNT_INDEX_VALUE_SCHEMA)
+
+  // Reused internal row that we use to create an UnsafeRow with the schema of
+  // COUNT_INDEX_VALUE_SCHEMA and the desired value. It is not thread safe (although, anyway,
+  // this class is not thread safe).
+  private val reusedCountIndexValueRow = new GenericInternalRow(1)
+
+  store.createColFamilyIfAbsent(
+    MIN_INDEX,
+    MIN_INDEX_SCHEMA,
+    MIN_INDEX_VALUE_SCHEMA,
+    NoPrefixKeyStateEncoderSpec(MIN_INDEX_SCHEMA),
+    isInternal = true
   )
 
-  private val keyRowEncoder = new CompositeKeyTTLEncoder[K](
-    keyExprEnc, userKeyEncoder)
+  store.createColFamilyIfAbsent(
+    COUNT_INDEX,
+    elementKeySchema,
+    COUNT_INDEX_VALUE_SCHEMA,
+    NoPrefixKeyStateEncoderSpec(elementKeySchema),
+    isInternal = true
+  )
 
-  // empty row used for values
-  private val EMPTY_ROW =
-    UnsafeProjection.create(Array[DataType](NullType)).apply(InternalRow.apply(null))
+  // Helper method to get the number of entries in the list state for a given element key
+  private def getEntryCount(elementKey: UnsafeRow): Long = {
+    val countRow = store.get(elementKey, COUNT_INDEX)
+    if (countRow != null) {
+      countRow.getLong(0)
+    } else {
+      0L
+    }
+  }
 
-  store.createColFamilyIfAbsent(ttlColumnFamilyName, keySchema,
-    TTL_VALUE_ROW_SCHEMA, RangeKeyScanStateEncoderSpec(keySchema,
-      Seq(0)), isInternal = true)
+  // Helper function to update the number of entries in the list state for a given element key
+  private def updateEntryCount(elementKey: UnsafeRow, updatedCount: Long): Unit = {
+    reusedCountIndexValueRow.setLong(0, updatedCount)
+    store.put(elementKey,
+      countIndexValueProjector(reusedCountIndexValueRow.asInstanceOf[InternalRow]),
+      COUNT_INDEX
+    )
+  }
 
-  def clearTTLState(): Unit = {
-    val iterator = store.iterator(ttlColumnFamilyName)
-    iterator.foreach { kv =>
-      store.remove(kv.key, ttlColumnFamilyName)
-    }
+  // Helper function to remove the number of entries in the list state for a given element key
+  private def removeEntryCount(elementKey: UnsafeRow): Unit = {
+    store.remove(elementKey, COUNT_INDEX)
   }
 
-  def upsertTTLForStateKey(
-      expirationMs: Long,
-      groupingKey: UnsafeRow,
-      userKey: UnsafeRow): Unit = {
-    val encodedTtlKey = keyRowEncoder.encodeTTLRow(
-      expirationMs, groupingKey, userKey)
-    store.put(encodedTtlKey, EMPTY_ROW, ttlColumnFamilyName)
+  private def writePrimaryIndexEntries(
+      overwritePrimaryIndex: Boolean,
+      elementKey: UnsafeRow,
+      elementValues: Iterator[UnsafeRow]): Unit = {
+    val initialEntryCount = if (overwritePrimaryIndex) {
+      removeEntryCount(elementKey)
+      0
+    } else {
+      getEntryCount(elementKey)
+    }
+
+    // Manually keep track of the count so that we can update the count index. We don't
+    // want to call elementValues.size since that will try to re-read the iterator.
+    var numNewElements = 0
+
+    // If we're overwriting the primary index, then we only need to put the first value,
+    // and then we can merge the rest.
+    var isFirst = true
+    elementValues.foreach { value =>
+      numNewElements += 1
+      if (isFirst && overwritePrimaryIndex) {
+        isFirst = false
+        store.put(elementKey, value, stateName)
+      } else {
+        store.merge(elementKey, value, stateName)
+      }
+    }
+
+    TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows", numNewElements)
+    updateEntryCount(elementKey, initialEntryCount + numNewElements)
   }
 
-  /**
-   * Clears any state which has ttl older than [[ttlExpirationMs]].
-   */
-  override def clearExpiredState(): Long = {
-    val iterator = store.iterator(ttlColumnFamilyName)
-    var numRemovedElements = 0L
-    iterator.takeWhile { kv =>
-      val expirationMs = kv.key.getLong(0)
-      StateTTL.isExpired(expirationMs, ttlExpirationMs)
-    }.foreach { kv =>
-      numRemovedElements += clearIfExpired(
-        kv.key.getStruct(1, keyExprEnc.schema.length),
-        kv.key.getStruct(2, userKeyEncoder.schema.length))
-      store.remove(kv.key, ttlColumnFamilyName)
+  private[sql] def updatePrimaryAndSecondaryIndices(
+      overwritePrimaryIndex: Boolean,
+      elementKey: UnsafeRow,
+      elementValues: Iterator[UnsafeRow],
+      expirationMs: Long): Unit = {
+    val existingMinExpirationUnsafeRow = store.get(elementKey, MIN_INDEX)
+
+    writePrimaryIndexEntries(overwritePrimaryIndex, elementKey, elementValues)
+
+    // If nothing exists in the minimum index, then we need to make sure to write
+    // the minimum and the TTL indices. There's nothing to clean-up from the
+    // secondary index, since it's empty.
+    if (existingMinExpirationUnsafeRow == null) {
+      // Insert into the min-expiry and TTL index, in no particular order.
+      store.put(elementKey, minIndexValueProjector(InternalRow(expirationMs)), MIN_INDEX)
+      insertIntoTTLIndex(expirationMs, elementKey)
+    } else {
+      val existingMinExpiration = existingMinExpirationUnsafeRow.getLong(0)
+
+      if (overwritePrimaryIndex || expirationMs < existingMinExpiration) {
+        // We don't actually have to delete from the min-expiry index, since we're going
+        // to overwrite it on the next line. However, since the TTL index has the existing
+        // minimum expiration in it, we need to delete that.
+        deleteFromTTLIndex(existingMinExpiration, elementKey)
+
+        // Insert into the min-expiry and TTL index, in no particular order.
+        store.put(elementKey, minIndexValueProjector(InternalRow(expirationMs)), MIN_INDEX)
+        insertIntoTTLIndex(expirationMs, elementKey)
+      }
     }
-    numRemovedElements
   }
 
-  private[sql] def ttlIndexIterator(): Iterator[CompositeKeyTTLRow] = {
-    val ttlIterator = store.iterator(ttlColumnFamilyName)
+  // The return type of clearExpiredValues. For a one-to-many stateful variable, cleanup
+  // must go through all of the values. numValuesExpired represents the number of entries
+  // that were removed (for metrics), and newMinExpirationMs is the new minimum expiration
+  // for the values remaining in the state variable.
+  case class ValueExpirationResult(
+      numValuesExpired: Long,
+      newMinExpirationMs: Option[Long])
 
-    new Iterator[CompositeKeyTTLRow] {
-      override def hasNext: Boolean = ttlIterator.hasNext
+  // Clears all the expired values for the given elementKey.
+  protected def clearExpiredValues(elementKey: UnsafeRow): ValueExpirationResult
 
-      override def next(): CompositeKeyTTLRow = {
-        val kv = ttlIterator.next()
-        CompositeKeyTTLRow(
-          expirationMs = kv.key.getLong(0),
-          groupingKey = kv.key.getStruct(1, keyExprEnc.schema.length),
-          userKey = kv.key.getStruct(2, userKeyEncoder.schema.length)
-        )
+  override private[sql] def clearExpiredStateForAllKeys(): Long = {
+    var totalNumValuesExpired = 0L
+
+    ttlEvictionIterator().foreach { ttlKey =>
+      val ttlRow = toTTLRow(ttlKey)
+      val elementKey = ttlRow.elementKey
+
+      // Delete from TTL index and minimum index
+      deleteFromTTLIndex(ttlKey)
+      store.remove(elementKey, MIN_INDEX)
+
+      // Now, we need the specific implementation to remove all the values associated with
+      // elementKey.
+      val valueExpirationResult = clearExpiredValues(elementKey)
+
+      valueExpirationResult.newMinExpirationMs.foreach { newExpirationMs =>
+        // Insert into the min-expiry and TTL index, in no particular order.
+        store.put(elementKey, minIndexValueProjector(InternalRow(newExpirationMs)), MIN_INDEX)
+        insertIntoTTLIndex(newExpirationMs, elementKey)
       }
+
+      // If we have records [foo, bar, baz] and bar and baz are expiring, then, the
+      // entryCountBeforeExpirations would be 3. The numValuesExpired would be 2, and so the
+      // newEntryCount would be 3 - 2 = 1.
+      val entryCountBeforeExpirations = getEntryCount(elementKey)
+      val numValuesExpired = valueExpirationResult.numValuesExpired
+      val newEntryCount = entryCountBeforeExpirations - numValuesExpired
+
+      TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows", numValuesExpired)
+
+      if (newEntryCount == 0) {
+        removeEntryCount(elementKey)
+      } else {
+        updateEntryCount(elementKey, newEntryCount)
+      }
+
+      totalNumValuesExpired += numValuesExpired
     }
+
+    totalNumValuesExpired
   }
 
-  /**
-   * Clears the user state associated with this grouping key
-   * if it has expired. This function is called by Spark to perform
-   * cleanup at the end of transformWithState processing.
-   *
-   * Spark uses a secondary index to determine if the user state for
-   * this grouping key has expired. However, its possible that the user
-   * has updated the TTL and secondary index is out of date. Implementations
-   * must validate that the user State has actually expired before cleanup based
-   * on their own State data.
-   *
-   * @param groupingKey grouping key for which cleanup should be performed.
-   * @param userKey user key for which cleanup should be performed.
-   */
-  def clearIfExpired(groupingKeyRow: UnsafeRow,
-                     userKeyRow: UnsafeRow): Long
+  override private[sql] def clearAllStateForElementKey(elementKey: UnsafeRow): Unit = {
+    val existingMinExpirationUnsafeRow = store.get(elementKey, MIN_INDEX)
+    if (existingMinExpirationUnsafeRow != null) {
+      val existingMinExpiration = existingMinExpirationUnsafeRow.getLong(0)
+
+      store.remove(elementKey, stateName)
+      TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows", getEntryCount(elementKey))
+      removeEntryCount(elementKey)
+
+      store.remove(elementKey, MIN_INDEX)
+      deleteFromTTLIndex(existingMinExpiration, elementKey)
+    }
+  }
+
+  // Exposed for testing.
+  private[sql] def minIndexIterator(): Iterator[(UnsafeRow, Long)] = {
+    store
+      .iterator(MIN_INDEX)
+      .map(kv => (kv.key, kv.value.getLong(0)))
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TimerStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TimerStateImpl.scala
index d0fbaf6600609..5d20f53449c59 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TimerStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TimerStateImpl.scala
@@ -178,7 +178,7 @@ class TimerStateImpl(
           val rowPair = iter.next()
           val keyRow = rowPair.key
           val result = getTimerRowFromSecIndex(keyRow)
-          if (result._2 < expiryTimestampMs) {
+          if (result._2 <= expiryTimestampMs) {
             result
           } else {
             finished = true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala
index f4705b89d5a87..aabbb5f8cacef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala
@@ -20,7 +20,6 @@ import java.util.UUID
 import java.util.concurrent.TimeUnit.NANOSECONDS
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
 
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
@@ -76,17 +75,32 @@ case class TransformWithStateExec(
     initialStateDataAttrs: Seq[Attribute],
     initialStateDeserializer: Expression,
     initialState: SparkPlan)
-  extends BinaryExecNode with StateStoreWriter with WatermarkSupport with ObjectProducerExec {
+  extends BinaryExecNode
+  with StateStoreWriter
+  with WatermarkSupport
+  with ObjectProducerExec
+  with TransformWithStateMetadataUtils {
 
   override def shortName: String = "transformWithStateExec"
 
   // dummy value schema, the real schema will get during state variable init time
   private val DUMMY_VALUE_ROW_SCHEMA = new StructType().add("value", BinaryType)
 
+  // We need to just initialize key and value deserializer once per partition.
+  // The deserializers need to be lazily created on the executor since they
+  // are not serializable.
+  // Ideas for for improvement can be found here:
+  // https://issues.apache.org/jira/browse/SPARK-50437
+  private lazy val getKeyObj =
+    ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
+
+  private lazy val getValueObj =
+    ObjectOperator.deserializeRowToObject(valueDeserializer, dataAttributes)
+
   override def shouldRunAnotherBatch(newInputWatermark: Long): Boolean = {
     if (timeMode == ProcessingTime) {
-      // TODO: check if we can return true only if actual timers are registered, or there is
-      // expired state
+      // TODO SPARK-50180: check if we can return true only if actual timers are registered,
+      //  or there is expired state
       true
     } else if (outputMode == OutputMode.Append || outputMode == OutputMode.Update) {
       eventTimeWatermarkForEviction.isDefined &&
@@ -111,32 +125,32 @@ case class TransformWithStateExec(
     driverProcessorHandle
   }
 
+  /**
+   * This method is used for the driver-side stateful processor after we
+   * have collected all the necessary schemas.
+   * This instance of the stateful processor won't be used again.
+   */
+  private def closeProcessorHandle(): Unit = {
+    statefulProcessor.close()
+    statefulProcessor.setHandle(null)
+  }
+
   /**
    * Fetching the columnFamilySchemas from the StatefulProcessorHandle
    * after init is called.
    */
-  private def getColFamilySchemas(): Map[String, StateStoreColFamilySchema] = {
+  override def getColFamilySchemas(): Map[String, StateStoreColFamilySchema] = {
     val columnFamilySchemas = getDriverProcessorHandle().getColumnFamilySchemas
     closeProcessorHandle()
     columnFamilySchemas
   }
 
-  private def getStateVariableInfos(): Map[String, TransformWithStateVariableInfo] = {
+  override def getStateVariableInfos(): Map[String, TransformWithStateVariableInfo] = {
     val stateVariableInfos = getDriverProcessorHandle().getStateVariableInfos
     closeProcessorHandle()
     stateVariableInfos
   }
 
-  /**
-   * This method is used for the driver-side stateful processor after we
-   * have collected all the necessary schemas.
-   * This instance of the stateful processor won't be used again.
-   */
-  private def closeProcessorHandle(): Unit = {
-    statefulProcessor.close()
-    statefulProcessor.setHandle(null)
-  }
-
   /**
    * Controls watermark propagation to downstream modes. If timeMode is
    * ProcessingTime, the output rows cannot be interpreted in eventTime, hence
@@ -230,11 +244,6 @@ case class TransformWithStateExec(
 
   private def handleInputRows(keyRow: UnsafeRow, valueRowIter: Iterator[InternalRow]):
     Iterator[InternalRow] = {
-    val getKeyObj =
-      ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
-
-    val getValueObj =
-      ObjectOperator.deserializeRowToObject(valueDeserializer, dataAttributes)
 
     val getOutputRow = ObjectOperator.wrapObjectToRow(outputObjectType)
 
@@ -261,8 +270,6 @@ case class TransformWithStateExec(
   private def processInitialStateRows(
       keyRow: UnsafeRow,
       initStateIter: Iterator[InternalRow]): Unit = {
-    val getKeyObj =
-      ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
 
     val getInitStateValueObj =
       ObjectOperator.deserializeRowToObject(initialStateDeserializer, initialStateDataAttrs)
@@ -453,84 +460,22 @@ case class TransformWithStateExec(
       hadoopConf: Configuration,
       batchId: Long,
       stateSchemaVersion: Int): List[StateSchemaValidationResult] = {
-    assert(stateSchemaVersion >= 3)
-    val newSchemas = getColFamilySchemas()
-    val stateSchemaDir = stateSchemaDirPath()
-    val newStateSchemaFilePath =
-      new Path(stateSchemaDir, s"${batchId}_${UUID.randomUUID().toString}")
-    val metadataPath = new Path(getStateInfo.checkpointLocation, s"${getStateInfo.operatorId}")
-    val metadataReader = OperatorStateMetadataReader.createReader(
-      metadataPath, hadoopConf, operatorStateMetadataVersion, batchId)
-    val operatorStateMetadata = try {
-      metadataReader.read()
-    } catch {
-        // If this is the first time we are running the query, there will be no metadata
-        // and this error is expected. In this case, we return None.
-        case ex: Exception if batchId == 0 =>
-          None
-    }
-
-    val oldStateSchemaFilePath: Option[Path] = operatorStateMetadata match {
-      case Some(metadata) =>
-        metadata match {
-          case v2: OperatorStateMetadataV2 =>
-            Some(new Path(v2.stateStoreInfo.head.stateSchemaFilePath))
-          case _ => None
-        }
-      case None => None
-    }
-    List(StateSchemaCompatibilityChecker.
-      validateAndMaybeEvolveStateSchema(getStateInfo, hadoopConf,
-      newSchemas.values.toList, session.sessionState, stateSchemaVersion,
-      storeName = StateStoreId.DEFAULT_STORE_NAME,
-      oldSchemaFilePath = oldStateSchemaFilePath,
-      newSchemaFilePath = Some(newStateSchemaFilePath)))
+    val info = getStateInfo
+    validateAndWriteStateSchema(hadoopConf, batchId, stateSchemaVersion,
+      info, session, operatorStateMetadataVersion)
   }
 
   /** Metadata of this stateful operator and its states stores. */
   override def operatorStateMetadata(
       stateSchemaPaths: List[String]): OperatorStateMetadata = {
     val info = getStateInfo
-    val operatorInfo = OperatorInfoV1(info.operatorId, shortName)
-    // stateSchemaFilePath should be populated at this point
-    val stateStoreInfo =
-      Array(StateStoreMetadataV2(
-        StateStoreId.DEFAULT_STORE_NAME, 0, info.numPartitions, stateSchemaPaths.head))
-
-    val operatorProperties = TransformWithStateOperatorProperties(
-      timeMode.toString,
-      outputMode.toString,
-      getStateVariableInfos().values.toList
-    )
-    OperatorStateMetadataV2(operatorInfo, stateStoreInfo, operatorProperties.json)
-  }
-
-  private def stateSchemaDirPath(): Path = {
-    val storeName = StateStoreId.DEFAULT_STORE_NAME
-    val stateCheckpointPath =
-      new Path(getStateInfo.checkpointLocation,
-        s"${getStateInfo.operatorId.toString}")
-
-    val stateSchemaPath = new Path(stateCheckpointPath, "_stateSchema")
-    val storeNamePath = new Path(stateSchemaPath, storeName)
-    storeNamePath
+    getOperatorStateMetadata(stateSchemaPaths, info, shortName, timeMode, outputMode)
   }
 
   override def validateNewMetadata(
       oldOperatorMetadata: OperatorStateMetadata,
       newOperatorMetadata: OperatorStateMetadata): Unit = {
-    (oldOperatorMetadata, newOperatorMetadata) match {
-      case (
-        oldMetadataV2: OperatorStateMetadataV2,
-        newMetadataV2: OperatorStateMetadataV2) =>
-        val oldOperatorProps = TransformWithStateOperatorProperties.fromJson(
-          oldMetadataV2.operatorPropertiesJson)
-        val newOperatorProps = TransformWithStateOperatorProperties.fromJson(
-          newMetadataV2.operatorPropertiesJson)
-        TransformWithStateOperatorProperties.validateOperatorProperties(
-          oldOperatorProps, newOperatorProps)
-      case (_, _) =>
-    }
+    validateNewMetadataForTWS(oldOperatorMetadata, newOperatorMetadata)
   }
 
   override protected def doExecute(): RDD[InternalRow] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateVariableUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateVariableUtils.scala
index bc67cee57fef8..34dddeab59d29 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateVariableUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateVariableUtils.scala
@@ -16,6 +16,10 @@
  */
 package org.apache.spark.sql.execution.streaming
 
+import java.util.UUID
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
 import org.json4s.DefaultFormats
 import org.json4s.JsonAST._
 import org.json4s.JsonDSL._
@@ -23,9 +27,10 @@ import org.json4s.jackson.JsonMethods
 import org.json4s.jackson.JsonMethods.{compact, render}
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.streaming.StateVariableType.StateVariableType
-import org.apache.spark.sql.execution.streaming.state.StateStoreErrors
-import org.apache.spark.sql.streaming.TimeMode
+import org.apache.spark.sql.execution.streaming.state.{OperatorInfoV1, OperatorStateMetadata, OperatorStateMetadataReader, OperatorStateMetadataV2, StateSchemaCompatibilityChecker, StateSchemaValidationResult, StateStoreColFamilySchema, StateStoreErrors, StateStoreId, StateStoreMetadataV2}
+import org.apache.spark.sql.streaming.{OutputMode, TimeMode}
 
 /**
  * This file contains utility classes and functions for managing state variables in
@@ -158,3 +163,104 @@ object TransformWithStateOperatorProperties extends Logging {
     }
   }
 }
+
+/**
+ * This trait contains utils functions related to TransformWithState metadata.
+ * This is used both in Scala and Python side of TransformWithState metadata support when calling
+ * `init()` with DriverStatefulProcessorHandleImpl, and get the state schema and state metadata
+ * on driver during physical planning phase.
+ */
+trait TransformWithStateMetadataUtils extends Logging {
+  def getColFamilySchemas(): Map[String, StateStoreColFamilySchema]
+
+  def getStateVariableInfos(): Map[String, TransformWithStateVariableInfo]
+
+  def getOperatorStateMetadata(
+      stateSchemaPaths: List[String],
+      info: StatefulOperatorStateInfo,
+      shortName: String,
+      timeMode: TimeMode,
+      outputMode: OutputMode): OperatorStateMetadata = {
+    val operatorInfo = OperatorInfoV1(info.operatorId, shortName)
+    // stateSchemaFilePath should be populated at this point
+    val stateStoreInfo =
+      Array(StateStoreMetadataV2(
+        StateStoreId.DEFAULT_STORE_NAME, 0, info.numPartitions, stateSchemaPaths.head))
+
+    val operatorProperties = TransformWithStateOperatorProperties(
+      timeMode.toString,
+      outputMode.toString,
+      getStateVariableInfos().values.toList
+    )
+    OperatorStateMetadataV2(operatorInfo, stateStoreInfo, operatorProperties.json)
+  }
+
+  def validateAndWriteStateSchema(
+      hadoopConf: Configuration,
+      batchId: Long,
+      stateSchemaVersion: Int,
+      info: StatefulOperatorStateInfo,
+      session: SparkSession,
+      operatorStateMetadataVersion: Int = 2): List[StateSchemaValidationResult] = {
+    assert(stateSchemaVersion >= 3)
+    val newSchemas = getColFamilySchemas()
+    val stateSchemaDir = stateSchemaDirPath(info)
+    val newStateSchemaFilePath =
+      new Path(stateSchemaDir, s"${batchId}_${UUID.randomUUID().toString}")
+    val metadataPath = new Path(info.checkpointLocation, s"${info.operatorId}")
+    val metadataReader = OperatorStateMetadataReader.createReader(
+      metadataPath, hadoopConf, operatorStateMetadataVersion, batchId)
+    val operatorStateMetadata = try {
+      metadataReader.read()
+    } catch {
+      // If this is the first time we are running the query, there will be no metadata
+      // and this error is expected. In this case, we return None.
+      case _: Exception if batchId == 0 =>
+        None
+    }
+
+    val oldStateSchemaFilePath: Option[Path] = operatorStateMetadata match {
+      case Some(metadata) =>
+        metadata match {
+          case v2: OperatorStateMetadataV2 =>
+            Some(new Path(v2.stateStoreInfo.head.stateSchemaFilePath))
+          case _ => None
+        }
+      case None => None
+    }
+    // state schema file written here, writing the new schema list we passed here
+    List(StateSchemaCompatibilityChecker.
+      validateAndMaybeEvolveStateSchema(info, hadoopConf,
+        newSchemas.values.toList, session.sessionState, stateSchemaVersion,
+        storeName = StateStoreId.DEFAULT_STORE_NAME,
+        oldSchemaFilePath = oldStateSchemaFilePath,
+        newSchemaFilePath = Some(newStateSchemaFilePath)))
+  }
+
+  def validateNewMetadataForTWS(
+      oldOperatorMetadata: OperatorStateMetadata,
+      newOperatorMetadata: OperatorStateMetadata): Unit = {
+    (oldOperatorMetadata, newOperatorMetadata) match {
+      case (
+        oldMetadataV2: OperatorStateMetadataV2,
+        newMetadataV2: OperatorStateMetadataV2) =>
+        val oldOperatorProps = TransformWithStateOperatorProperties.fromJson(
+          oldMetadataV2.operatorPropertiesJson)
+        val newOperatorProps = TransformWithStateOperatorProperties.fromJson(
+          newMetadataV2.operatorPropertiesJson)
+        TransformWithStateOperatorProperties.validateOperatorProperties(
+          oldOperatorProps, newOperatorProps)
+      case (_, _) =>
+    }
+  }
+
+  private def stateSchemaDirPath(info: StatefulOperatorStateInfo): Path = {
+    val storeName = StateStoreId.DEFAULT_STORE_NAME
+    val stateCheckpointPath =
+      new Path(info.checkpointLocation, s"${info.operatorId.toString}")
+
+    val stateSchemaPath = new Path(stateCheckpointPath, "_stateSchema")
+    val storeNamePath = new Path(stateSchemaPath, storeName)
+    storeNamePath
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImplWithTTL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImplWithTTL.scala
index 60eea5842645e..87e4596f67309 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImplWithTTL.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImplWithTTL.scala
@@ -17,7 +17,6 @@
 package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchemaUtils._
 import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, StateStore}
@@ -44,20 +43,20 @@ class ValueStateImplWithTTL[S](
     ttlConfig: TTLConfig,
     batchTimestampMs: Long,
     metrics: Map[String, SQLMetric] = Map.empty)
-  extends SingleKeyTTLStateImpl(
-    stateName, store, keyExprEnc, batchTimestampMs) with ValueState[S] {
+  extends OneToOneTTLState(
+    stateName, store, keyExprEnc.schema, ttlConfig, batchTimestampMs, metrics) with ValueState[S] {
 
-  private val stateTypesEncoder = StateTypesEncoder(keyExprEnc, valEncoder,
-    stateName, hasTtl = true)
-  private val ttlExpirationMs =
-    StateTTL.calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
+  private val stateTypesEncoder =
+    StateTypesEncoder(keyExprEnc, valEncoder, stateName, hasTtl = true)
 
   initialize()
 
   private def initialize(): Unit = {
     store.createColFamilyIfAbsent(stateName,
-      keyExprEnc.schema, getValueSchemaWithTTL(valEncoder.schema, true),
-      NoPrefixKeyStateEncoderSpec(keyExprEnc.schema))
+      keyExprEnc.schema,
+      getValueSchemaWithTTL(valEncoder.schema, true),
+      NoPrefixKeyStateEncoderSpec(keyExprEnc.schema)
+    )
   }
 
   /** Function to check if state exists. Returns true if present and false otherwise */
@@ -76,6 +75,7 @@ class ValueStateImplWithTTL[S](
     val retRow = store.get(encodedGroupingKey, stateName)
 
     if (retRow != null) {
+      // Getting the 0th ordinal of the struct using valEncoder
       val resState = stateTypesEncoder.decodeValue(retRow)
 
       if (!stateTypesEncoder.isExpired(retRow, batchTimestampMs)) {
@@ -90,33 +90,19 @@ class ValueStateImplWithTTL[S](
 
   /** Function to update and overwrite state associated with given key */
   override def update(newState: S): Unit = {
+    val encodedKey = stateTypesEncoder.encodeGroupingKey()
+
+    val ttlExpirationMs = StateTTL
+      .calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
     val encodedValue = stateTypesEncoder.encodeValue(newState, ttlExpirationMs)
-    val serializedGroupingKey = stateTypesEncoder.encodeGroupingKey()
-    store.put(serializedGroupingKey,
-      encodedValue, stateName)
-    TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
-    upsertTTLForStateKey(ttlExpirationMs, serializedGroupingKey)
+
+    updatePrimaryAndSecondaryIndices(encodedKey, encodedValue, ttlExpirationMs)
   }
 
   /** Function to remove state for given key */
   override def clear(): Unit = {
-    store.remove(stateTypesEncoder.encodeGroupingKey(), stateName)
-    TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows")
-    clearTTLState()
-  }
-
-  def clearIfExpired(groupingKey: UnsafeRow): Long = {
-    val retRow = store.get(groupingKey, stateName)
-
-    var result = 0L
-    if (retRow != null) {
-      if (stateTypesEncoder.isExpired(retRow, batchTimestampMs)) {
-        store.remove(groupingKey, stateName)
-        TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows")
-        result = 1L
-      }
-    }
-    result
+    val groupingKey = stateTypesEncoder.encodeGroupingKey()
+    clearAllStateForElementKey(groupingKey)
   }
 
   /*
@@ -161,11 +147,16 @@ class ValueStateImplWithTTL[S](
   }
 
   /**
-   * Get all ttl values stored in ttl state for current implicit
-   * grouping key.
+   * Get the TTL value stored in TTL state for the current implicit grouping key,
+   * if it exists.
    */
-  private[sql] def getValuesInTTLState(): Iterator[Long] = {
-    getValuesInTTLState(stateTypesEncoder.encodeGroupingKey())
+  private[sql] def getValueInTTLState(): Option[Long] = {
+    val groupingKey = stateTypesEncoder.encodeGroupingKey()
+    val ttlRowsForGroupingKey = getTTLRows().filter(_.elementKey == groupingKey).toSeq
+
+    assert(ttlRowsForGroupingKey.size <= 1, "Multiple TTLRows found for grouping key " +
+      s"$groupingKey. Expected at most 1. Found: ${ttlRowsForGroupingKey.mkString(", ")}.")
+    ttlRowsForGroupingKey.headOption.map(_.expirationMs)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkPropagator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkPropagator.scala
index f0950063b1613..aaf8cbd69ea20 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkPropagator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkPropagator.scala
@@ -124,12 +124,14 @@ class UseSingleWatermarkPropagator extends WatermarkPropagator {
 /**
  * This implementation simulates propagation of watermark among operators.
  *
- * The simulation algorithm traverses the physical plan tree via post-order (children first) to
- * calculate (input watermark, output watermark) for all nodes.
+ * It is considered a "simulation" because watermarks are not being physically sent between
+ * operators, but rather propagated up the tree via post-order (children first) traversal of
+ * the query plan. This allows Structured Streaming to determine the new (input watermark, output
+ * watermark) for all nodes.
  *
  * For each node, below logic is applied:
  *
- * - Input watermark for specific node is decided by `min(input watermarks from all children)`.
+ * - Input watermark for specific node is decided by `min(output watermarks from all children)`.
  *   -- Children providing no input watermark (DEFAULT_WATERMARK_MS) are excluded.
  *   -- If there is no valid input watermark from children, input watermark = DEFAULT_WATERMARK_MS.
  * - Output watermark for specific node is decided as following:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index ae06e82335b12..2deccb845fea2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -291,7 +291,8 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       newMap
     }
     catch {
-      case e: SparkException if e.getCondition.contains("CANNOT_LOAD_STATE_STORE") =>
+      case e: SparkException
+        if Option(e.getCondition).exists(_.contains("CANNOT_LOAD_STATE_STORE")) =>
         throw e
       case e: OutOfMemoryError =>
         throw QueryExecutionErrors.notEnoughMemoryToLoadStore(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
index f8e9885cef14e..56f253b523358 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
@@ -77,19 +77,6 @@ class RocksDB(
 
   import RocksDB._
 
-  case class RocksDBSnapshot(
-      checkpointDir: File,
-      version: Long,
-      numKeys: Long,
-      columnFamilyMapping: Map[String, Short],
-      maxColumnFamilyId: Short,
-      dfsFileSuffix: String,
-      fileMapping: Map[String, RocksDBSnapshotFile]) {
-    def close(): Unit = {
-      silentDeleteRecursively(checkpointDir, s"Free up local checkpoint of snapshot $version")
-    }
-  }
-
   @volatile private var lastSnapshotVersion = 0L
 
   RocksDBLoader.loadLibrary()
@@ -158,7 +145,7 @@ class RocksDB(
   @volatile private var db: NativeRocksDB = _
   @volatile private var changelogWriter: Option[StateStoreChangelogWriter] = None
   private val enableChangelogCheckpointing: Boolean = conf.enableChangelogCheckpointing
-  @volatile private var loadedVersion = -1L   // -1 = nothing valid is loaded
+  @volatile protected var loadedVersion: Long = -1L   // -1 = nothing valid is loaded
 
   // variables to manage checkpoint ID. Once a checkpointing finishes, it needs to return
   // `lastCommittedStateStoreCkptId` as the committed checkpointID, as well as
@@ -170,10 +157,11 @@ class RocksDB(
   // we have to use a new one. We have to update `sessionStateStoreCkptId` if we reload a previous
   // batch version, as we would have to use a new checkpointID for re-committing a version.
   // The reusing is to help debugging but is not required for the algorithm to work.
-  private var lastCommitBasedStateStoreCkptId: Option[String] = None
-  private var lastCommittedStateStoreCkptId: Option[String] = None
-  private var loadedStateStoreCkptId: Option[String] = None
-  private var sessionStateStoreCkptId: Option[String] = None
+  protected var lastCommitBasedStateStoreCkptId: Option[String] = None
+  protected var lastCommittedStateStoreCkptId: Option[String] = None
+  protected var loadedStateStoreCkptId: Option[String] = None
+  protected var sessionStateStoreCkptId: Option[String] = None
+  protected[sql] val lineageManager: RocksDBLineageManager = new RocksDBLineageManager
 
   @volatile private var numKeysOnLoadedVersion = 0L
   @volatile private var numKeysOnWritingVersion = 0L
@@ -278,70 +266,129 @@ class RocksDB(
   // We send snapshots that needs to be uploaded by the maintenance thread to this queue
   private val snapshotsToUploadQueue = new ConcurrentLinkedQueue[RocksDBSnapshot]()
 
+  /**
+   * Read the lineage from the changelog files. It first get the changelog reader
+   * of the correct changelog version and then read the lineage information from the file.
+   * The changelog file is named as version_stateStoreCkptId.changelog
+   * @param version version of the changelog file, used to load changelog file.
+   * @param stateStoreCkptId uniqueId of the changelog file, used to load changelog file.
+   * @return the lineage stored in the changelog file
+   */
+  private def getLineageFromChangelogFile(
+      version: Long,
+      stateStoreCkptId: Option[String]): Array[LineageItem] = {
+    var changelogReader: StateStoreChangelogReader = null
+    var currLineage: Array[LineageItem] = Array.empty
+    try {
+      changelogReader = fileManager.getChangelogReader(version, stateStoreCkptId)
+      currLineage = changelogReader.lineage
+      logInfo(log"Loading lineage: " +
+        log"${MDC(LogKeys.LINEAGE, lineageManager)} from " +
+        log"changelog version: ${MDC(LogKeys.VERSION_NUM, version)} " +
+        log"uniqueId: ${MDC(LogKeys.UUID, stateStoreCkptId.getOrElse(""))}.")
+    } finally {
+      if (changelogReader != null) {
+        changelogReader.closeIfNeeded()
+      }
+    }
+    currLineage
+  }
+
+
   /**
    * Load the given version of data in a native RocksDB instance.
    * Note that this will copy all the necessary file from DFS to local disk as needed,
    * and possibly restart the native RocksDB instance.
    */
-  def load(
+  private def loadWithCheckpointId(
       version: Long,
-      stateStoreCkptId: Option[String] = None,
+      stateStoreCkptId: Option[String],
       readOnly: Boolean = false): RocksDB = {
-    assert(version >= 0)
-    acquire(LoadStore)
-    recordedMetrics = None
-    logInfo(log"Loading ${MDC(LogKeys.VERSION_NUM, version)}")
+    // An array contains lineage information from [snapShotVersion, version]
+    // (inclusive in both ends)
+    var currVersionLineage: Array[LineageItem] = lineageManager.getLineageForCurrVersion()
     try {
-      if (loadedVersion != version ||
-        (enableStateStoreCheckpointIds && stateStoreCkptId.isDefined &&
-        (loadedStateStoreCkptId.isEmpty || stateStoreCkptId.get != loadedStateStoreCkptId.get))) {
+      if (loadedVersion != version || (loadedStateStoreCkptId.isEmpty ||
+          stateStoreCkptId.get != loadedStateStoreCkptId.get)) {
         closeDB(ignoreException = false)
-        val latestSnapshotVersion = fileManager.getLatestSnapshotVersion(version)
+
+        val (latestSnapshotVersion, latestSnapshotUniqueId) = {
+          // Special handling when version is 0.
+          // When loading the very first version (0), stateStoreCkptId does not need to be defined
+          // because there won't be 0.changelog / 0.zip file created in RocksDB under v2.
+          if (version == 0) {
+            assert(stateStoreCkptId.isEmpty,
+              "stateStoreCkptId should be empty when version is zero")
+            (0L, None)
+          // When there is a snapshot file, it is the ground truth, we can skip
+          // reconstructing the lineage from changelog file.
+          } else if (fileManager.existsSnapshotFile(version, stateStoreCkptId)) {
+            currVersionLineage = Array(LineageItem(version, stateStoreCkptId.get))
+            (version, stateStoreCkptId)
+          } else {
+            currVersionLineage = getLineageFromChangelogFile(version, stateStoreCkptId) :+
+              LineageItem(version, stateStoreCkptId.get)
+            currVersionLineage = currVersionLineage.sortBy(_.version)
+
+            val latestSnapshotVersionsAndUniqueId =
+              fileManager.getLatestSnapshotVersionAndUniqueIdFromLineage(currVersionLineage)
+            latestSnapshotVersionsAndUniqueId match {
+              case Some(pair) => (pair._1, Option(pair._2))
+              case None if currVersionLineage.head.version == 1L =>
+                logDebug(log"Cannot find latest snapshot based on lineage but first version " +
+                  log"is 1, use 0 as default. Lineage: ${MDC(LogKeys.LINEAGE, lineageManager)}")
+                (0L, None)
+              case _ =>
+                throw QueryExecutionErrors.cannotFindBaseSnapshotCheckpoint(
+                  printLineageItems(currVersionLineage))
+            }
+          }
+        }
+
+        logInfo(log"Loaded latestSnapshotVersion: ${
+          MDC(LogKeys.SNAPSHOT_VERSION, latestSnapshotVersion)}, latestSnapshotUniqueId: ${
+          MDC(LogKeys.UUID, latestSnapshotUniqueId)}")
+
         val metadata = fileManager.loadCheckpointFromDfs(latestSnapshotVersion,
-          workingDir, rocksDBFileMapping)
+          workingDir, rocksDBFileMapping, latestSnapshotUniqueId)
+
         loadedVersion = latestSnapshotVersion
 
         // reset the last snapshot version to the latest available snapshot version
         lastSnapshotVersion = latestSnapshotVersion
+        lineageManager.resetLineage(currVersionLineage)
 
         // Initialize maxVersion upon successful load from DFS
         fileManager.setMaxSeenVersion(version)
 
-        setInitialCFInfo()
-        metadata.columnFamilyMapping.foreach { mapping =>
-          colFamilyNameToIdMap.putAll(mapping.asJava)
-        }
+        openLocalRocksDB(metadata)
 
-        metadata.maxColumnFamilyId.foreach { maxId =>
-          maxColumnFamilyId.set(maxId)
+        if (loadedVersion != version) {
+          val versionsAndUniqueIds = currVersionLineage.collect {
+              case i if i.version > loadedVersion && i.version <= version =>
+                (i.version, Option(i.checkpointUniqueId))
+            }
+          replayChangelog(versionsAndUniqueIds)
+          loadedVersion = version
+          lineageManager.resetLineage(currVersionLineage)
         }
-        openDB()
-        numKeysOnWritingVersion = if (!conf.trackTotalNumberOfRows) {
-            // we don't track the total number of rows - discard the number being track
-            -1L
-          } else if (metadata.numKeys < 0) {
-            // we track the total number of rows, but the snapshot doesn't have tracking number
-            // need to count keys now
-            countKeys()
-          } else {
-            metadata.numKeys
-          }
-        if (loadedVersion != version) replayChangelog(version)
         // After changelog replay the numKeysOnWritingVersion will be updated to
         // the correct number of keys in the loaded version.
         numKeysOnLoadedVersion = numKeysOnWritingVersion
         fileManagerMetrics = fileManager.latestLoadCheckpointMetrics
       }
-      if (enableStateStoreCheckpointIds) {
-        lastCommitBasedStateStoreCkptId = None
-        loadedStateStoreCkptId = stateStoreCkptId
-        sessionStateStoreCkptId = Some(java.util.UUID.randomUUID.toString)
-      }
+
+      lastCommitBasedStateStoreCkptId = None
+      loadedStateStoreCkptId = stateStoreCkptId
+      sessionStateStoreCkptId = Some(java.util.UUID.randomUUID.toString)
       lastCommittedStateStoreCkptId = None
+
       if (conf.resetStatsOnLoad) {
         nativeStats.reset
       }
-      logInfo(log"Loaded ${MDC(LogKeys.VERSION_NUM, version)}")
+
+      logInfo(log"Loaded ${MDC(LogKeys.VERSION_NUM, version)} " +
+        log"with uniqueId ${MDC(LogKeys.UUID, stateStoreCkptId)}")
     } catch {
       case t: Throwable =>
         loadedVersion = -1  // invalidate loaded data
@@ -349,6 +396,67 @@ class RocksDB(
         lastCommittedStateStoreCkptId = None
         loadedStateStoreCkptId = None
         sessionStateStoreCkptId = None
+        lineageManager.clear()
+        throw t
+    }
+    if (enableChangelogCheckpointing && !readOnly) {
+      // Make sure we don't leak resource.
+      changelogWriter.foreach(_.abort())
+      // Initialize the changelog writer with lineage info
+      // The lineage stored in changelog files should normally start with
+      // the version of a snapshot, except for the first few versions.
+      // Because they are solely loaded from changelog file.
+      // (e.g. with default minDeltasForSnapshot, there is only 1_uuid1.changelog, no 1_uuid1.zip)
+      // It should end with exactly one version before the change log's version.
+      changelogWriter = Some(fileManager.getChangeLogWriter(
+        version + 1,
+        useColumnFamilies,
+        sessionStateStoreCkptId,
+        Some(currVersionLineage)))
+    }
+    this
+  }
+
+  private def loadWithoutCheckpointId(
+      version: Long,
+      readOnly: Boolean = false): RocksDB = {
+    try {
+      if (loadedVersion != version) {
+        closeDB(ignoreException = false)
+        val latestSnapshotVersion = fileManager.getLatestSnapshotVersion(version)
+        val metadata = fileManager.loadCheckpointFromDfs(
+          latestSnapshotVersion,
+          workingDir,
+          rocksDBFileMapping)
+
+        loadedVersion = latestSnapshotVersion
+
+        // reset the last snapshot version to the latest available snapshot version
+        lastSnapshotVersion = latestSnapshotVersion
+
+        // Initialize maxVersion upon successful load from DFS
+        fileManager.setMaxSeenVersion(version)
+
+        openLocalRocksDB(metadata)
+
+        if (loadedVersion != version) {
+          val versionsAndUniqueIds: Array[(Long, Option[String])] =
+            (loadedVersion + 1 to version).map((_, None)).toArray
+          replayChangelog(versionsAndUniqueIds)
+          loadedVersion = version
+        }
+        // After changelog replay the numKeysOnWritingVersion will be updated to
+        // the correct number of keys in the loaded version.
+        numKeysOnLoadedVersion = numKeysOnWritingVersion
+        fileManagerMetrics = fileManager.latestLoadCheckpointMetrics
+      }
+      if (conf.resetStatsOnLoad) {
+        nativeStats.reset
+      }
+      logInfo(log"Loaded ${MDC(LogKeys.VERSION_NUM, version)}")
+    } catch {
+      case t: Throwable =>
+        loadedVersion = -1  // invalidate loaded data
         throw t
     }
     if (enableChangelogCheckpointing && !readOnly) {
@@ -359,6 +467,48 @@ class RocksDB(
     this
   }
 
+  /**
+   * Initialize key metrics based on the metadata loaded from DFS and open local RocksDB.
+   */
+  private def openLocalRocksDB(metadata: RocksDBCheckpointMetadata): Unit = {
+    setInitialCFInfo()
+    metadata.columnFamilyMapping.foreach { mapping =>
+      colFamilyNameToIdMap.putAll(mapping.asJava)
+    }
+
+    metadata.maxColumnFamilyId.foreach { maxId =>
+      maxColumnFamilyId.set(maxId)
+    }
+    openDB()
+    numKeysOnWritingVersion = if (!conf.trackTotalNumberOfRows) {
+      // we don't track the total number of rows - discard the number being track
+      -1L
+    } else if (metadata.numKeys < 0) {
+      // we track the total number of rows, but the snapshot doesn't have tracking number
+      // need to count keys now
+      countKeys()
+    } else {
+      metadata.numKeys
+    }
+  }
+
+  def load(
+      version: Long,
+      stateStoreCkptId: Option[String] = None,
+      readOnly: Boolean = false): RocksDB = {
+    assert(version >= 0)
+    acquire(LoadStore)
+    recordedMetrics = None
+    logInfo(log"Loading ${MDC(LogKeys.VERSION_NUM, version)} with stateStoreCkptId: ${
+      MDC(LogKeys.UUID, stateStoreCkptId.getOrElse(""))}")
+    if (stateStoreCkptId.isDefined || enableStateStoreCheckpointIds && version == 0) {
+      loadWithCheckpointId(version, stateStoreCkptId, readOnly)
+    } else {
+      loadWithoutCheckpointId(version, readOnly)
+    }
+    this
+  }
+
   /**
    * Load from the start snapshot version and apply all the changelog records to reach the
    * end version. Note that this will copy all the necessary files from DFS to local disk as needed,
@@ -417,7 +567,12 @@ class RocksDB(
     } else {
       metadata.numKeys
     }
-    if (loadedVersion != endVersion) replayChangelog(endVersion)
+    if (loadedVersion != endVersion) {
+      val versionsAndUniqueIds: Array[(Long, Option[String])] =
+        (loadedVersion + 1 to endVersion).map((_, None)).toArray
+      replayChangelog(versionsAndUniqueIds)
+      loadedVersion = endVersion
+    }
     // After changelog replay the numKeysOnWritingVersion will be updated to
     // the correct number of keys in the loaded version.
     numKeysOnLoadedVersion = numKeysOnWritingVersion
@@ -431,16 +586,23 @@ class RocksDB(
   /**
    * Replay change log from the loaded version to the target version.
    */
-  private def replayChangelog(endVersion: Long): Unit = {
+  private def replayChangelog(versionsAndUniqueIds: Array[(Long, Option[String])]): Unit = {
+    assert(!versionsAndUniqueIds.isEmpty && versionsAndUniqueIds.head._1 == loadedVersion + 1,
+      s"Replay changelog should start from one version after loadedVersion: $loadedVersion," +
+        s" but it is not."
+    )
+
     logInfo(log"Replaying changelog from version " +
       log"${MDC(LogKeys.LOADED_VERSION, loadedVersion)} -> " +
-      log"${MDC(LogKeys.END_VERSION, endVersion)}")
-    for (v <- loadedVersion + 1 to endVersion) {
-      logInfo(log"Replaying changelog on version " +
-        log"${MDC(LogKeys.VERSION_NUM, v)}")
+      log"${MDC(LogKeys.END_VERSION, versionsAndUniqueIds.lastOption.map(_._1))}")
+
+    versionsAndUniqueIds.foreach { case (v, uniqueId) =>
+      logInfo(log"replaying changelog from version ${MDC(LogKeys.VERSION_NUM, v)} with " +
+        log"unique Id: ${MDC(LogKeys.UUID, uniqueId)}")
+
       var changelogReader: StateStoreChangelogReader = null
       try {
-        changelogReader = fileManager.getChangelogReader(v, useColumnFamilies)
+        changelogReader = fileManager.getChangelogReader(v, uniqueId)
         changelogReader.foreach { case (recordType, key, value) =>
           recordType match {
             case RecordType.PUT_RECORD =>
@@ -457,7 +619,6 @@ class RocksDB(
         if (changelogReader != null) changelogReader.closeIfNeeded()
       }
     }
-    loadedVersion = endVersion
   }
 
   /**
@@ -496,7 +657,6 @@ class RocksDB(
    * @note This update is not committed to disk until commit() is called.
    */
   def merge(key: Array[Byte], value: Array[Byte]): Unit = {
-
     if (conf.trackTotalNumberOfRows) {
       val oldValue = db.get(readOptions, key)
       if (oldValue == null) {
@@ -527,7 +687,6 @@ class RocksDB(
    * Get an iterator of all committed and uncommitted key-value pairs.
    */
   def iterator(): Iterator[ByteArrayPair] = {
-
     val iter = db.newIterator()
     logInfo(log"Getting iterator from version ${MDC(LogKeys.LOADED_VERSION, loadedVersion)}")
     iter.seekToFirst()
@@ -612,46 +771,11 @@ class RocksDB(
     try {
       logInfo(log"Flushing updates for ${MDC(LogKeys.VERSION_NUM, newVersion)}")
 
-      var compactTimeMs = 0L
-      var flushTimeMs = 0L
-      var checkpointTimeMs = 0L
       var snapshot: Option[RocksDBSnapshot] = None
-
       if (shouldCreateSnapshot() || shouldForceSnapshot.get()) {
-        // Need to flush the change to disk before creating a checkpoint
-        // because rocksdb wal is disabled.
-        logInfo(log"Flushing updates for ${MDC(LogKeys.VERSION_NUM, newVersion)}")
-        flushTimeMs = timeTakenMs {
-          db.flush(flushOptions)
-        }
-
-        if (conf.compactOnCommit) {
-          logInfo("Compacting")
-          compactTimeMs = timeTakenMs {
-            db.compactRange()
-          }
-        }
-
-        checkpointTimeMs = timeTakenMs {
-          val checkpointDir = createTempDir("checkpoint")
-          logInfo(log"Creating checkpoint for ${MDC(LogKeys.VERSION_NUM, newVersion)} " +
-            log"in ${MDC(LogKeys.PATH, checkpointDir)}")
-          // Make sure the directory does not exist. Native RocksDB fails if the directory to
-          // checkpoint exists.
-          Utils.deleteRecursively(checkpointDir)
-          // We no longer pause background operation before creating a RocksDB checkpoint because
-          // it is unnecessary. The captured snapshot will still be consistent with ongoing
-          // background operations.
-          val cp = Checkpoint.create(db)
-          cp.createCheckpoint(checkpointDir.toString)
-          // if changelog checkpointing is disabled, the snapshot is uploaded synchronously
-          // inside the uploadSnapshot() called below.
-          // If changelog checkpointing is enabled, snapshot will be uploaded asynchronously
-          // during state store maintenance.
-          snapshot = Some(createSnapshot(checkpointDir, newVersion,
-            colFamilyNameToIdMap.asScala.toMap, maxColumnFamilyId.get().toShort))
-          lastSnapshotVersion = newVersion
-        }
+        val (newSnapshot, snapshotLatency) = createSnapshot(newVersion, sessionStateStoreCkptId)
+        snapshot = newSnapshot
+        commitLatencyMs ++= snapshotLatency
       }
 
       logInfo(log"Syncing checkpoint for ${MDC(LogKeys.VERSION_NUM, newVersion)} to DFS")
@@ -663,12 +787,7 @@ class RocksDB(
           var isUploaded = false
           if (shouldForceSnapshot.get()) {
             assert(snapshot.isDefined)
-            fileManagerMetrics = uploadSnapshot(
-              snapshot.get,
-              fileManager,
-              rocksDBFileMapping.snapshotsPendingUpload,
-              loggingId
-            )
+            uploadSnapshot(snapshot.get)
             isUploaded = true
             shouldForceSnapshot.set(false)
           }
@@ -686,15 +805,22 @@ class RocksDB(
         } else {
           assert(changelogWriter.isEmpty)
           assert(snapshot.isDefined)
-          fileManagerMetrics = uploadSnapshot(
-            snapshot.get,
-            fileManager,
-            rocksDBFileMapping.snapshotsPendingUpload,
-            loggingId
-          )
+          uploadSnapshot(snapshot.get)
         }
       }
 
+      if (enableStateStoreCheckpointIds) {
+        lastCommitBasedStateStoreCkptId = loadedStateStoreCkptId
+        lastCommittedStateStoreCkptId = sessionStateStoreCkptId
+        loadedStateStoreCkptId = sessionStateStoreCkptId
+        lineageManager.appendLineageItem(LineageItem(newVersion, sessionStateStoreCkptId.get))
+        logInfo(log"Update checkpoint IDs and lineage: ${MDC(
+          LogKeys.LOADED_CHECKPOINT_ID, loadedStateStoreCkptId)}," +
+          log" ${MDC(LogKeys.LAST_COMMITTED_CHECKPOINT_ID, lastCommittedStateStoreCkptId)}," +
+          log" ${MDC(LogKeys.LAST_COMMIT_BASED_CHECKPOINT_ID, lastCommitBasedStateStoreCkptId)}," +
+          log" ${MDC(LogKeys.LINEAGE, lineageManager)}")
+      }
+
       // Set maxVersion when checkpoint files are synced to DFS successfully
       // We need to handle this explicitly in RocksDB as we could use different
       // changeLogWriter instances in fileManager instance when committing
@@ -702,15 +828,7 @@ class RocksDB(
 
       numKeysOnLoadedVersion = numKeysOnWritingVersion
       loadedVersion = newVersion
-      if (enableStateStoreCheckpointIds) {
-        lastCommitBasedStateStoreCkptId = loadedStateStoreCkptId
-        lastCommittedStateStoreCkptId = sessionStateStoreCkptId
-        loadedStateStoreCkptId = sessionStateStoreCkptId
-      }
       commitLatencyMs ++= Map(
-        "flush" -> flushTimeMs,
-        "compact" -> compactTimeMs,
-        "checkpoint" -> checkpointTimeMs,
         "fileSync" -> fileSyncTimeMs
       )
       recordedMetrics = Some(metrics)
@@ -736,6 +854,69 @@ class RocksDB(
     } else true
   }
 
+  private def createSnapshot(
+      version: Long,
+      checkpointUniqueId: Option[String]): (Option[RocksDBSnapshot], Map[String, Long]) = {
+    // Need to flush the change to disk before creating a checkpoint
+    // because rocksdb wal is disabled.
+    logInfo(log"Flushing updates for ${MDC(LogKeys.VERSION_NUM, version)}")
+    val flushTimeMs = timeTakenMs {
+      db.flush(flushOptions)
+    }
+    val compactTimeMs = if (conf.compactOnCommit) {
+      logInfo(log"Compacting")
+      timeTakenMs { db.compactRange() }
+    } else 0L
+
+    var snapshot: Option[RocksDBSnapshot] = None
+
+    val checkpointTimeMs = timeTakenMs {
+      val checkpointDir = createTempDir("checkpoint")
+      logInfo(log"Creating checkpoint for ${MDC(LogKeys.VERSION_NUM, version)} in " +
+        log"${MDC(LogKeys.CHECKPOINT_PATH, checkpointDir)}")
+      // Make sure the directory does not exist. Native RocksDB fails if the directory to
+      // checkpoint exists.
+      Utils.deleteRecursively(checkpointDir)
+      // We no longer pause background operation before creating a RocksDB checkpoint because
+      // it is unnecessary. The captured snapshot will still be consistent with ongoing
+      // background operations.
+      val cp = Checkpoint.create(db)
+      cp.createCheckpoint(checkpointDir.toString)
+
+      val (dfsFileSuffix, immutableFileMapping) = rocksDBFileMapping.createSnapshotFileMapping(
+        fileManager, checkpointDir, version)
+      val newSnapshot = Some(RocksDBSnapshot(
+        checkpointDir,
+        version,
+        numKeysOnWritingVersion,
+        colFamilyNameToIdMap.asScala.toMap,
+        maxColumnFamilyId.get().toShort,
+        dfsFileSuffix,
+        immutableFileMapping,
+        checkpointUniqueId))
+
+        snapshot = newSnapshot
+        lastSnapshotVersion = version
+      }
+
+    (snapshot,
+      Map(
+        "flush" -> flushTimeMs,
+        "compact" -> compactTimeMs,
+        "checkpoint" -> checkpointTimeMs
+      )
+    )
+  }
+
+  private[sql] def uploadSnapshot(snapshot: RocksDBSnapshot): Unit = {
+    fileManagerMetrics = uploadSnapshot(
+      snapshot,
+      fileManager,
+      rocksDBFileMapping.snapshotsPendingUpload,
+      loggingId
+    )
+  }
+
   /**
    * Drop uncommitted changes, and roll back to previous version.
    */
@@ -748,6 +929,7 @@ class RocksDB(
       lastCommittedStateStoreCkptId = None
       loadedStateStoreCkptId = None
       sessionStateStoreCkptId = None
+      lineageManager.clear()
       changelogWriter.foreach(_.abort())
       // Make sure changelogWriter gets recreated next time.
       changelogWriter = None
@@ -772,16 +954,13 @@ class RocksDB(
       }
 
       if (mostRecentSnapshot.isDefined) {
-        fileManagerMetrics = uploadSnapshot(
-          mostRecentSnapshot.get,
-          fileManager,
-          rocksDBFileMapping.snapshotsPendingUpload,
-          loggingId
-        )
+        uploadSnapshot(mostRecentSnapshot.get)
       }
     }
     val cleanupTime = timeTakenMs {
-      fileManager.deleteOldVersions(conf.minVersionsToRetain, conf.minVersionsToDelete)
+      fileManager.deleteOldVersions(
+        numVersionsToRetain = conf.minVersionsToRetain,
+        minVersionsToDelete = conf.minVersionsToDelete)
     }
     logInfo(log"Cleaned old data, time taken: ${MDC(LogKeys.TIME_UNITS, cleanupTime)} ms")
   }
@@ -916,18 +1095,6 @@ class RocksDB(
     rocksDBMetricsOpt
   }
 
-  private def createSnapshot(
-      checkpointDir: File,
-      version: Long,
-      columnFamilyMapping: Map[String, Short],
-      maxColumnFamilyId: Short): RocksDBSnapshot = {
-    val (dfsFileSuffix, immutableFileMapping) = rocksDBFileMapping.createSnapshotFileMapping(
-      fileManager, checkpointDir, version)
-
-    RocksDBSnapshot(checkpointDir, version, numKeysOnWritingVersion,
-      columnFamilyMapping, maxColumnFamilyId, dfsFileSuffix, immutableFileMapping)
-  }
-
   /**
    * Function to acquire RocksDB instance lock that allows for synchronized access to the state
    * store instance
@@ -1036,6 +1203,49 @@ class RocksDB(
     Option(acquiredThreadInfo).map(_.copy())
   }
 
+  /** Upload the snapshot to DFS and remove it from snapshots pending */
+  private def uploadSnapshot(
+      snapshot: RocksDBSnapshot,
+      fileManager: RocksDBFileManager,
+      snapshotsPendingUpload: Set[RocksDBVersionSnapshotInfo],
+      loggingId: String): RocksDBFileManagerMetrics = {
+    var fileManagerMetrics: RocksDBFileManagerMetrics = null
+    try {
+      val uploadTime = timeTakenMs {
+        fileManager.saveCheckpointToDfs(
+          snapshot.checkpointDir,
+          snapshot.version,
+          snapshot.numKeys,
+          snapshot.fileMapping,
+          Some(snapshot.columnFamilyMapping),
+          Some(snapshot.maxColumnFamilyId),
+          snapshot.uniqueId
+        )
+        fileManagerMetrics = fileManager.latestSaveCheckpointMetrics
+
+        val snapshotInfo = RocksDBVersionSnapshotInfo(snapshot.version, snapshot.dfsFileSuffix)
+        // We are only removing the uploaded snapshot info from the pending set,
+        // to let the file mapping (i.e. query threads) know that the snapshot (i.e. and its files)
+        // have been uploaded to DFS. We don't touch the file mapping here to avoid corrupting it.
+        snapshotsPendingUpload.remove(snapshotInfo)
+      }
+      // This is relative aggressive because that even if the uploading succeeds,
+      // it is not necessarily the one written to the commit log. But we can always load lineage
+      // from commit log so it is fine.
+      lineageManager.resetLineage(lineageManager.getLineageForCurrVersion()
+        .filter(i => i.version >= snapshot.version))
+      logInfo(log"${MDC(LogKeys.LOG_ID, loggingId)}: " +
+        log"Upload snapshot of version ${MDC(LogKeys.VERSION_NUM, snapshot.version)}, " +
+        log"with uniqueId: ${MDC(LogKeys.UUID, snapshot.uniqueId)} " +
+        log"time taken: ${MDC(LogKeys.TIME_UNITS, uploadTime)} ms. " +
+        log"Current lineage: ${MDC(LogKeys.LINEAGE, lineageManager)}")
+    } finally {
+      snapshot.close()
+    }
+
+    fileManagerMetrics
+  }
+
   /** Create a native RocksDB logger that forwards native logs to log4j with correct log levels. */
   private def createLogger(): Logger = {
     val dbLogger = new Logger(rocksDbOptions.infoLogLevel()) {
@@ -1072,6 +1282,24 @@ class RocksDB(
     Utils.createDirectory(localRootDir.getAbsolutePath, prefix)
   }
 
+  override protected def logName: String = s"${super.logName} $loggingId"
+}
+
+object RocksDB extends Logging {
+  case class RocksDBSnapshot(
+      checkpointDir: File,
+      version: Long,
+      numKeys: Long,
+      columnFamilyMapping: Map[String, Short],
+      maxColumnFamilyId: Short,
+      dfsFileSuffix: String,
+      fileMapping: Map[String, RocksDBSnapshotFile],
+      uniqueId: Option[String] = None) {
+    def close(): Unit = {
+      silentDeleteRecursively(checkpointDir, s"Free up local checkpoint of snapshot $version")
+    }
+  }
+
   /** Attempt to delete recursively, and log the error if any */
   private def silentDeleteRecursively(file: File, msg: String): Unit = {
     try {
@@ -1083,40 +1311,9 @@ class RocksDB(
     }
   }
 
-  override protected def logName: String = s"${super.logName} $loggingId"
-}
-
-object RocksDB extends Logging {
-
-  /** Upload the snapshot to DFS and remove it from snapshots pending */
-  private def uploadSnapshot(
-      snapshot: RocksDB#RocksDBSnapshot,
-      fileManager: RocksDBFileManager,
-      snapshotsPendingUpload: Set[RocksDBVersionSnapshotInfo],
-      loggingId: String): RocksDBFileManagerMetrics = {
-    var fileManagerMetrics: RocksDBFileManagerMetrics = null
-    try {
-      val uploadTime = timeTakenMs {
-        fileManager.saveCheckpointToDfs(snapshot.checkpointDir,
-          snapshot.version, snapshot.numKeys, snapshot.fileMapping,
-          Some(snapshot.columnFamilyMapping), Some(snapshot.maxColumnFamilyId))
-        fileManagerMetrics = fileManager.latestSaveCheckpointMetrics
-
-        val snapshotInfo = RocksDBVersionSnapshotInfo(snapshot.version, snapshot.dfsFileSuffix)
-        // We are only removing the uploaded snapshot info from the pending set,
-        // to let the file mapping (i.e. query threads) know that the snapshot (i.e. and its files)
-        // have been uploaded to DFS. We don't touch the file mapping here to avoid corrupting it.
-        snapshotsPendingUpload.remove(snapshotInfo)
-      }
-      logInfo(log"${MDC(LogKeys.LOG_ID, loggingId)}: Upload snapshot of version " +
-        log"${MDC(LogKeys.VERSION_NUM, snapshot.version)}," +
-        log" time taken: ${MDC(LogKeys.TIME_UNITS, uploadTime)} ms")
-    } finally {
-      snapshot.close()
-    }
-
-    fileManagerMetrics
-  }
+  private def printLineageItems(lineage: Array[LineageItem]): String = lineage.map {
+    case LineageItem(l, optStr) => s"$l:$optStr"
+  }.mkString(" ")
 
   /** Records the duration of running `body` for the next query progress update. */
   private def timeTakenMs(body: => Unit): Long = Utils.timeTakenMs(body)._2
@@ -1533,3 +1730,40 @@ case class AcquiredThreadInfo(
   }
 }
 
+/**
+ * A helper class to manage the lineage information when checkpoint unique id is enabled.
+ * "lineage" is an array of LineageItem (version, uniqueId) pair.
+ *
+ * The first item of "lineage" should normally be the version of a snapshot, except
+ * for the first few versions. Because they are solely loaded from changelog file.
+ * (i.e. with default minDeltasForSnapshot, there is only 1_uuid1.changelog, no 1_uuid1.zip)
+ *
+ * The last item of "lineage" corresponds to one version before the to-be-committed version.
+ */
+private[sql] class RocksDBLineageManager {
+  @volatile private var lineage: Array[LineageItem] = Array.empty
+
+  override def toString: String = lineage.map {
+    case LineageItem(version, uuid) => s"$version: $uuid"
+  }.mkString(" ")
+
+  def appendLineageItem(item: LineageItem): Unit = {
+    lineage = lineage :+ item
+  }
+
+  def resetLineage(newLineage: Array[LineageItem]): Unit = {
+    lineage = newLineage
+  }
+
+  def getLineageForCurrVersion(): Array[LineageItem] = {
+    lineage.clone()
+  }
+
+  def contains(item: LineageItem): Boolean = {
+    lineage.contains(item)
+  }
+
+  def clear(): Unit = {
+    lineage = Array.empty
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
index 6b13ff31c9d50..e42a46dfbe15a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
@@ -41,6 +41,7 @@ import org.apache.spark.internal.{Logging, LogKeys, MDC, MessageWithContext}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.Utils
 
@@ -151,60 +152,79 @@ class RocksDBFileManager(
   private var minSeenVersion = 1L
 
   @volatile private var rootDirChecked: Boolean = false
-  private val versionToRocksDBFiles = new ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]]
 
-  private def getChangelogVersion(useColumnFamilies: Boolean): Short = {
-    val changelogVersion: Short = if (useColumnFamilies) {
-      2
-    } else {
-      1
+  // (version, checkpointUniqueId) -> immutable files
+  private val versionToRocksDBFiles =
+    new ConcurrentHashMap[(Long, Option[String]), Seq[RocksDBImmutableFile]]()
+
+  /**
+   * Get the changelog version based on rocksDB features.
+   * @return the version of changelog
+   */
+  private def getChangelogWriterVersion(
+      useColumnFamilies: Boolean,
+      stateStoreCheckpointIdEnabled: Boolean): Short = {
+    (useColumnFamilies, stateStoreCheckpointIdEnabled) match {
+      case (false, false) => 1
+      case (true, false) => 2
+      case (false, true) => 3
+      case _ => 4
     }
-    changelogVersion
   }
 
   def getChangeLogWriter(
       version: Long,
-      useColumnFamilies: Boolean = false): StateStoreChangelogWriter = {
-    val changelogFile = dfsChangelogFile(version)
+      useColumnFamilies: Boolean = false,
+      checkpointUniqueId: Option[String] = None,
+      stateStoreCheckpointIdLineage: Option[Array[LineageItem]] = None
+    ): StateStoreChangelogWriter = {
+    val changelogFile = dfsChangelogFile(version, checkpointUniqueId)
     if (!rootDirChecked) {
       val rootDir = new Path(dfsRootDir)
       if (!fm.exists(rootDir)) fm.mkdirs(rootDir)
       rootDirChecked = true
     }
 
-    val changelogVersion = getChangelogVersion(useColumnFamilies)
+    val enableStateStoreCheckpointIds = checkpointUniqueId.isDefined
+    val changelogVersion = getChangelogWriterVersion(
+      useColumnFamilies, enableStateStoreCheckpointIds)
+
     val changelogWriter = changelogVersion match {
       case 1 =>
         new StateStoreChangelogWriterV1(fm, changelogFile, codec)
       case 2 =>
         new StateStoreChangelogWriterV2(fm, changelogFile, codec)
+      case 3 =>
+        assert(enableStateStoreCheckpointIds && stateStoreCheckpointIdLineage.isDefined,
+          "StateStoreChangelogWriterV3 should only be initialized when " +
+            "state store checkpoint unique id is enabled")
+        new StateStoreChangelogWriterV3(fm, changelogFile, codec, stateStoreCheckpointIdLineage.get)
+      case 4 =>
+        assert(enableStateStoreCheckpointIds && stateStoreCheckpointIdLineage.isDefined,
+          "StateStoreChangelogWriterV4 should only be initialized when " +
+            "state store checkpoint unique id is enabled")
+        new StateStoreChangelogWriterV4(fm, changelogFile, codec, stateStoreCheckpointIdLineage.get)
       case _ =>
         throw QueryExecutionErrors.invalidChangeLogWriterVersion(changelogVersion)
     }
+
+    logInfo(log"Loaded change log reader version " +
+      log"${MDC(LogKeys.FILE_VERSION, changelogWriter.version)}")
+
     changelogWriter
   }
 
   // Get the changelog file at version
   def getChangelogReader(
       version: Long,
-      useColumnFamilies: Boolean = false): StateStoreChangelogReader = {
-    val changelogFile = dfsChangelogFile(version)
-
-    // Note that ideally we should get the version for the reader from the
-    // changelog itself. However, since we don't record this for v1, we need to
-    // rely on external arguments to make this call today. Within the reader, we verify
-    // for the correctness of the decided/expected version. We might revisit this pattern
-    // as we add more changelog versions in the future.
-    val changelogVersion = getChangelogVersion(useColumnFamilies)
-    val changelogReader = changelogVersion match {
-      case 1 =>
-        new StateStoreChangelogReaderV1(fm, changelogFile, codec)
-      case 2 =>
-        new StateStoreChangelogReaderV2(fm, changelogFile, codec)
-      case _ =>
-        throw QueryExecutionErrors.invalidChangeLogReaderVersion(changelogVersion)
-    }
-    changelogReader
+      checkpointUniqueId: Option[String] = None): StateStoreChangelogReader = {
+    val changelogFile = dfsChangelogFile(version, checkpointUniqueId)
+    val reader = new StateStoreChangelogReaderFactory(fm, changelogFile, codec)
+      .constructChangelogReader()
+
+    logInfo(log"Loaded change log reader version ${MDC(LogKeys.FILE_VERSION, reader.version)}")
+
+    reader
   }
 
   /**
@@ -230,13 +250,15 @@ class RocksDBFileManager(
       numKeys: Long,
       fileMapping: Map[String, RocksDBSnapshotFile],
       columnFamilyMapping: Option[Map[String, Short]] = None,
-      maxColumnFamilyId: Option[Short] = None): Unit = {
+      maxColumnFamilyId: Option[Short] = None,
+      checkpointUniqueId: Option[String] = None): Unit = {
     logFilesInDir(checkpointDir, log"Saving checkpoint files " +
       log"for version ${MDC(LogKeys.VERSION_NUM, version)}")
     val (localImmutableFiles, localOtherFiles) = listRocksDBFiles(checkpointDir)
-    val rocksDBFiles = saveImmutableFilesToDfs(version, localImmutableFiles, fileMapping)
-    val metadata = RocksDBCheckpointMetadata(
-      rocksDBFiles, numKeys, columnFamilyMapping, maxColumnFamilyId)
+    val rocksDBFiles = saveImmutableFilesToDfs(
+      version, localImmutableFiles, fileMapping, checkpointUniqueId)
+    val metadata = RocksDBCheckpointMetadata(rocksDBFiles, numKeys, columnFamilyMapping,
+      maxColumnFamilyId)
     val metadataFile = localMetadataFile(checkpointDir)
     metadata.writeToFile(metadataFile)
     logInfo(log"Written metadata for version ${MDC(LogKeys.VERSION_NUM, version)}:\n" +
@@ -255,8 +277,9 @@ class RocksDBFileManager(
         rootDirChecked = true
       }
     }
-    zipToDfsFile(localOtherFiles :+ metadataFile, dfsBatchZipFile(version))
-    logInfo(log"Saved checkpoint file for version ${MDC(LogKeys.VERSION_NUM, version)}")
+    zipToDfsFile(localOtherFiles :+ metadataFile, dfsBatchZipFile(version, checkpointUniqueId))
+    logInfo(log"Saved checkpoint file for version ${MDC(LogKeys.VERSION_NUM, version)} " +
+      log"checkpointUniqueId: ${MDC(LogKeys.UUID, checkpointUniqueId.getOrElse(""))}")
   }
 
   /**
@@ -268,12 +291,14 @@ class RocksDBFileManager(
   def loadCheckpointFromDfs(
       version: Long,
       localDir: File,
-      rocksDBFileMapping: RocksDBFileMapping): RocksDBCheckpointMetadata = {
-    logInfo(log"Loading checkpoint files for version ${MDC(LogKeys.VERSION_NUM, version)}")
+      rocksDBFileMapping: RocksDBFileMapping,
+      checkpointUniqueId: Option[String] = None): RocksDBCheckpointMetadata = {
+    logInfo(log"Loading checkpoint files for version ${MDC(LogKeys.VERSION_NUM, version)} " +
+      log"checkpointUniqueId: ${MDC(LogKeys.UUID, checkpointUniqueId.getOrElse(""))}")
     // The unique ids of SST files are checked when opening a rocksdb instance. The SST files
     // in larger versions can't be reused even if they have the same size and name because
     // they belong to another rocksdb instance.
-    versionToRocksDBFiles.keySet().removeIf(_ >= version)
+    versionToRocksDBFiles.keySet().removeIf(_._1 >= version)
     val metadata = if (version == 0) {
       if (localDir.exists) Utils.deleteRecursively(localDir)
       localDir.mkdirs()
@@ -281,7 +306,7 @@ class RocksDBFileManager(
     } else {
       // Delete all non-immutable files in local dir, and unzip new ones from DFS commit file
       listRocksDBFiles(localDir)._2.foreach(_.delete())
-      Utils.unzipFilesFromFile(fs, dfsBatchZipFile(version), localDir)
+      Utils.unzipFilesFromFile(fs, dfsBatchZipFile(version, checkpointUniqueId), localDir)
 
       // Copy the necessary immutable files
       val metadataFile = localMetadataFile(localDir)
@@ -289,7 +314,7 @@ class RocksDBFileManager(
       logInfo(log"Read metadata for version ${MDC(LogKeys.VERSION_NUM, version)}:\n" +
         log"${MDC(LogKeys.METADATA_JSON, metadata.prettyJson)}")
       loadImmutableFilesFromDfs(metadata.immutableFiles, localDir, rocksDBFileMapping, version)
-      versionToRocksDBFiles.put(version, metadata.immutableFiles)
+      versionToRocksDBFiles.put((version, checkpointUniqueId), metadata.immutableFiles)
       metadataFile.delete()
       metadata
     }
@@ -298,6 +323,17 @@ class RocksDBFileManager(
     metadata
   }
 
+  // Return if there is a snapshot file at the corresponding version
+  // and optionally with checkpointunique id, e.g. version.zip or version_uniqueId.zip
+  def existsSnapshotFile(version: Long, checkpointUniqueId: Option[String] = None): Boolean = {
+    if (!rootDirChecked) {
+      val path = new Path(dfsRootDir)
+      if (!fm.exists(path)) fm.mkdirs(path)
+      rootDirChecked = true
+    }
+    fm.exists(dfsBatchZipFile(version, checkpointUniqueId))
+  }
+
   // Get latest snapshot version <= version
   def getLatestSnapshotVersion(version: Long): Long = {
     val path = new Path(dfsRootDir)
@@ -316,20 +352,52 @@ class RocksDBFileManager(
     }
   }
 
+  /**
+   * Based on the ground truth lineage loaded from changelog file (lineage), this function
+   * does file listing to find all snapshot (version, uniqueId) pairs, and finds
+   * the ground truth latest snapshot (version, uniqueId) the db instance needs to load.
+   *
+   * @param lineage The ground truth lineage loaded from changelog file, sorted by id
+   * @return The ground truth latest snapshot (version, uniqueId) the db instance needs to load,
+   *         when the return value is None it means ther is no such snapshot found.
+   */
+  def getLatestSnapshotVersionAndUniqueIdFromLineage(
+      lineage: Array[LineageItem]): Option[(Long, String)] = {
+    val path = new Path(dfsRootDir)
+    if (fm.exists(path)) {
+      fm.list(path, onlyZipFiles)
+        .map(_.getPath.getName.stripSuffix(".zip").split("_"))
+        .collect {
+          case Array(ver, id) if lineage.contains(LineageItem(ver.toLong, id)) =>
+            (ver.toLong, id)
+        }
+        .sortBy(_._1)
+        .reverse
+        .headOption
+    } else {
+      None
+    }
+  }
 
   /** Get the latest version available in the DFS directory. If no data present, it returns 0. */
   def getLatestVersion(): Long = {
     val path = new Path(dfsRootDir)
     if (fm.exists(path)) {
       val files = fm.list(path).map(_.getPath)
-      val changelogFileVersions = files
-        .filter(onlyChangelogFiles.accept)
-        .map(_.getName.stripSuffix(".changelog"))
-        .map(_.toLong)
-      val snapshotFileVersions = files
-        .filter(onlyZipFiles.accept)
-        .map(_.getName.stripSuffix(".zip"))
-        .map(_.toLong)
+      val changelogFileVersions = files.filter(onlyChangelogFiles.accept)
+        .map { fileName =>
+          fileName.getName.stripSuffix(".changelog").split("_") match {
+            case Array(version, _) => version.toLong
+            case Array(version) => version.toLong
+          }
+        }
+      val snapshotFileVersions = files.filter(onlyZipFiles.accept)
+        .map { fileName =>
+          fileName.getName.stripSuffix(".zip").split("_") match {
+            case Array(version, _) => version.toLong
+            case Array(version) => version.toLong
+          }
+        }
       val versions = changelogFileVersions ++ snapshotFileVersions
       versions.foldLeft(0L)(math.max)
     } else {
@@ -370,15 +438,18 @@ class RocksDBFileManager(
     }
   }
 
-  private def deleteChangelogFiles(versionsToDelete: Array[Long]): Unit = {
-    versionsToDelete.foreach { version =>
+  private def deleteChangelogFiles(
+      versionsAndUniqueIdsToDelete: Array[(Long, Option[String])]): Unit = {
+    versionsAndUniqueIdsToDelete.foreach { case (version, uniqueId) =>
       try {
-        fm.delete(dfsChangelogFile(version))
-        logInfo(log"Deleted changelog file ${MDC(LogKeys.VERSION_NUM, version)}")
+        fm.delete(dfsChangelogFile(version, uniqueId))
+        logInfo(log"Deleted changelog file ${MDC(LogKeys.VERSION_NUM, version)} uniqueId: " +
+          log"${MDC(LogKeys.UUID, uniqueId.getOrElse(""))}")
       } catch {
         case e: Exception =>
           logWarning(
-            log"Error deleting changelog file for version ${MDC(LogKeys.FILE_VERSION, version)}", e)
+            log"Error deleting changelog file for version ${MDC(LogKeys.FILE_VERSION, version)} " +
+              log"uniqueId: ${MDC(LogKeys.UUID, uniqueId.getOrElse(""))}", e)
       }
     }
   }
@@ -468,38 +539,43 @@ class RocksDBFileManager(
     val snapshotFiles = allFiles.filter(file => onlyZipFiles.accept(file))
     val changelogFiles = allFiles.filter(file => onlyChangelogFiles.accept(file))
     // All versions present in DFS, sorted
-    val sortedSnapshotVersions = snapshotFiles
-      .map(_.getName.stripSuffix(".zip"))
-      .map(_.toLong)
-      .sorted
+    val sortedSnapshotVersionsAndUniqueIds = snapshotFiles
+      .map(_.getName.stripSuffix(".zip").split("_"))
+      .map {
+        case Array(version, uniqueId) => (version.toLong, Some(uniqueId))
+        case Array(version) => (version.toLong, None)
+      }
+      .sortBy(_._1)
 
     // Return if no versions generated yet
-    if (sortedSnapshotVersions.isEmpty) return
+    if (sortedSnapshotVersionsAndUniqueIds.isEmpty) return
 
     // Find the versions to delete
-    val maxSnapshotVersionPresent = sortedSnapshotVersions.last
+    val maxSnapshotVersionPresent = sortedSnapshotVersionsAndUniqueIds.last._1
 
     // In order to reconstruct numVersionsToRetain version, retain the latest snapshot
     // that satisfies (version <= maxSnapshotVersionPresent - numVersionsToRetain + 1).
     // If none of the snapshots satisfy the condition, minVersionToRetain will be 0 and
     // no version gets deleted.
-    val minVersionToRetain = sortedSnapshotVersions
+    val minVersionToRetain = sortedSnapshotVersionsAndUniqueIds
+      .map(_._1)
       .filter(_ <= maxSnapshotVersionPresent - numVersionsToRetain + 1)
       .foldLeft(0L)(math.max)
 
     // When snapshotVersionToDelete is non-empty, there are at least 2 snapshot versions.
     // We only delete orphan files when there are at least 2 versions,
     // which avoid deleting files for running tasks.
-    val snapshotVersionsToDelete = sortedSnapshotVersions.filter(_ < minVersionToRetain)
-    if (snapshotVersionsToDelete.isEmpty) return
-
+    val snapshotVersionsAndUniqueIdsToDelete = sortedSnapshotVersionsAndUniqueIds
+      .filter(_._1 < minVersionToRetain)
+    val snapshotVersionsToDelete = snapshotVersionsAndUniqueIdsToDelete.map(_._1)
+    if (snapshotVersionsAndUniqueIdsToDelete.isEmpty) return
 
     // Resolve RocksDB files for all the versions and find the max version each file is used
     val fileToMaxUsedVersion = new mutable.HashMap[String, Long]
-    sortedSnapshotVersions.foreach { version =>
-      val files = Option(versionToRocksDBFiles.get(version)).getOrElse {
-        val newResolvedFiles = getImmutableFilesFromVersionZip(version)
-        versionToRocksDBFiles.put(version, newResolvedFiles)
+    sortedSnapshotVersionsAndUniqueIds.foreach { case (version, uniqueId) =>
+      val files = Option(versionToRocksDBFiles.get((version, uniqueId))).getOrElse {
+        val newResolvedFiles = getImmutableFilesFromVersionZip(version, uniqueId)
+        versionToRocksDBFiles.put((version, uniqueId), newResolvedFiles)
         newResolvedFiles
       }
       files.foreach(f => fileToMaxUsedVersion(f.dfsFileName) =
@@ -542,11 +618,11 @@ class RocksDBFileManager(
     }
 
     // Delete the version files and forget about them
-    snapshotVersionsToDelete.foreach { version =>
-      val versionFile = dfsBatchZipFile(version)
+    snapshotVersionsAndUniqueIdsToDelete.foreach { case (version, uniqueId) =>
+      val versionFile = dfsBatchZipFile(version, uniqueId)
       try {
         fm.delete(versionFile)
-        versionToRocksDBFiles.remove(version)
+        versionToRocksDBFiles.remove((version, uniqueId))
         logDebug(s"Deleted version $version")
       } catch {
         case e: Exception =>
@@ -558,10 +634,16 @@ class RocksDBFileManager(
       log"(failed to delete" +
       log"${MDC(LogKeys.NUM_FILES_FAILED_TO_DELETE, failedToDelete)} files) " +
       log"not used in versions >= ${MDC(LogKeys.MIN_VERSION_NUM, minVersionToRetain)}")
-    val changelogVersionsToDelete = changelogFiles
-      .map(_.getName.stripSuffix(".changelog")).map(_.toLong)
-      .filter(_ < minVersionToRetain)
-    deleteChangelogFiles(changelogVersionsToDelete)
+
+    val changelogVersionsAndUniqueIdsToDelete: Array[(Long, Option[String])] = changelogFiles
+      .map(_.getName.stripSuffix(".changelog").split("_"))
+      .map {
+        case Array(version, uniqueId) => (version.toLong, Option(uniqueId))
+        case Array(version) => (version.toLong, None)
+      }
+      .filter(_._1 < minVersionToRetain)
+
+    deleteChangelogFiles(changelogVersionsAndUniqueIdsToDelete)
 
     // Always set minSeenVersion for regular deletion frequency even if deletion fails.
     // This is safe because subsequent calls retry deleting old version files
@@ -572,10 +654,12 @@ class RocksDBFileManager(
   private def saveImmutableFilesToDfs(
       version: Long,
       localFiles: Seq[File],
-      fileMappings: Map[String, RocksDBSnapshotFile]): Seq[RocksDBImmutableFile] = {
+      fileMappings: Map[String, RocksDBSnapshotFile],
+      checkpointUniqueId: Option[String] = None): Seq[RocksDBImmutableFile] = {
     // Get the immutable files used in previous versions, as some of those uploaded files can be
     // reused for this version
-    logInfo(log"Saving RocksDB files to DFS for ${MDC(LogKeys.VERSION_NUM, version)}")
+    logInfo(log"Saving RocksDB files to DFS for version ${MDC(LogKeys.VERSION_NUM, version)} " +
+      log"uniqueId: ${MDC(LogKeys.UUID, checkpointUniqueId.getOrElse(""))}")
 
     var bytesCopied = 0L
     var filesCopied = 0L
@@ -611,7 +695,7 @@ class RocksDBFileManager(
       log"(${MDC(LogKeys.NUM_BYTES, bytesCopied)} bytes) from local to" +
       log" DFS for version ${MDC(LogKeys.VERSION_NUM, version)}. " +
       log"${MDC(LogKeys.NUM_FILES_REUSED, filesReused)} files reused without copying.")
-    versionToRocksDBFiles.put(version, immutableFiles)
+    versionToRocksDBFiles.put((version, checkpointUniqueId), immutableFiles)
     saveCheckpointMetrics = RocksDBFileManagerMetrics(
       bytesCopied = bytesCopied,
       filesCopied = filesCopied,
@@ -699,10 +783,11 @@ class RocksDBFileManager(
   }
 
   /** Get the SST files required for a version from the version zip file in DFS */
-  private def getImmutableFilesFromVersionZip(version: Long): Seq[RocksDBImmutableFile] = {
+  private def getImmutableFilesFromVersionZip(
+      version: Long, checkpointUniqueId: Option[String] = None): Seq[RocksDBImmutableFile] = {
     Utils.deleteRecursively(localTempDir)
     localTempDir.mkdirs()
-    Utils.unzipFilesFromFile(fs, dfsBatchZipFile(version), localTempDir)
+    Utils.unzipFilesFromFile(fs, dfsBatchZipFile(version, checkpointUniqueId), localTempDir)
     val metadataFile = localMetadataFile(localTempDir)
     val metadata = RocksDBCheckpointMetadata.readFromFile(metadataFile)
     metadata.immutableFiles
@@ -774,10 +859,14 @@ class RocksDBFileManager(
     immutableFile.dfsFileName.substring(suffixStart + 1, suffixEnd)
   }
 
-  private def dfsBatchZipFile(version: Long): Path = new Path(s"$dfsRootDir/$version.zip")
+  private def dfsBatchZipFile(version: Long, checkpointUniqueId: Option[String] = None): Path =
+    checkpointUniqueId.map(id => new Path(s"$dfsRootDir/${version}_$id.zip"))
+      .getOrElse(new Path(s"$dfsRootDir/$version.zip"))
   // We use changelog suffix intentionally so that we can tell the difference from changelog file of
   // HDFSBackedStateStore which is named version.delta.
-  private def dfsChangelogFile(version: Long): Path = new Path(s"$dfsRootDir/$version.changelog")
+  private def dfsChangelogFile(version: Long, checkpointUniqueId: Option[String] = None): Path =
+    checkpointUniqueId.map(id => new Path(s"$dfsRootDir/${version}_$id.changelog"))
+      .getOrElse(new Path(s"$dfsRootDir/$version.changelog"))
 
   private def localMetadataFile(parentDir: File): File = new File(parentDir, "metadata")
 
@@ -873,7 +962,7 @@ case class RocksDBCheckpointMetadata(
 
 /** Helper class for [[RocksDBCheckpointMetadata]] */
 object RocksDBCheckpointMetadata {
-  val VERSION = 1
+  val VERSION = SQLConf.get.stateStoreCheckpointFormatVersion
 
   implicit val format: Formats = Serialization.formats(NoTypeHints)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala
index 4c7a226e0973f..46b4ad205c2fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala
@@ -17,14 +17,22 @@
 
 package org.apache.spark.sql.execution.streaming.state
 
+import java.io.ByteArrayOutputStream
 import java.lang.Double.{doubleToRawLongBits, longBitsToDouble}
 import java.lang.Float.{floatToRawIntBits, intBitsToFloat}
 import java.nio.{ByteBuffer, ByteOrder}
 
+import org.apache.avro.Schema
+import org.apache.avro.generic.{GenericData, GenericDatumReader, GenericDatumWriter, GenericRecord}
+import org.apache.avro.io.{DecoderFactory, EncoderFactory}
+
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.avro.{AvroDeserializer, AvroOptions, AvroSerializer, SchemaConverters}
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{BoundReference, JoinedRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter
-import org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider.{STATE_ENCODING_NUM_VERSION_BYTES, STATE_ENCODING_VERSION, VIRTUAL_COL_FAMILY_PREFIX_BYTES}
+import org.apache.spark.sql.execution.streaming.StateStoreColumnFamilySchemaUtils
+import org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider.{SCHEMA_ID_PREFIX_BYTES, STATE_ENCODING_NUM_VERSION_BYTES, STATE_ENCODING_VERSION, VIRTUAL_COL_FAMILY_PREFIX_BYTES}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
 
@@ -43,90 +51,208 @@ sealed trait RocksDBValueStateEncoder {
   def decodeValues(valueBytes: Array[Byte]): Iterator[UnsafeRow]
 }
 
-abstract class RocksDBKeyStateEncoderBase(
-    useColumnFamilies: Boolean,
-    virtualColFamilyId: Option[Short] = None) extends RocksDBKeyStateEncoder {
-  def offsetForColFamilyPrefix: Int =
-    if (useColumnFamilies) VIRTUAL_COL_FAMILY_PREFIX_BYTES else 0
+/**
+ * Contains schema version information for both key and value schemas in a state store.
+ * This information is used to support schema evolution, allowing state schemas to be
+ * modified over time while maintaining compatibility with existing state data.
+ *
+ * @param keySchemaId   A unique identifier for the version of the key schema.
+ *                      Used to track and handle changes to the key schema structure.
+ * @param valueSchemaId A unique identifier for the version of the value schema.
+ *                      Used to track and handle changes to the value schema structure.
+ */
+case class StateSchemaInfo(
+    keySchemaId: Short,
+    valueSchemaId: Short
+)
+
+/**
+ * Represents a row of state data along with its schema version.
+ * Used during state storage operations to track which schema version was used
+ * to encode the data, enabling proper decoding even when schemas have evolved.
+ *
+ * @param schemaId The version identifier for the schema that was used to encode this row.
+ *                 This could be either a key schema ID or value schema ID depending on context.
+ * @param bytes    The actual encoded data bytes for this row. When using Avro encoding,
+ *                 these bytes contain the Avro-serialized data. For UnsafeRow encoding,
+ *                 these contain the binary-encoded row data.
+ */
+case class StateSchemaIdRow(
+    schemaId: Short,
+    bytes: Array[Byte]
+)
 
+/**
+ * The DataEncoder can encode UnsafeRows into raw bytes in two ways:
+ *    - Using the direct byte layout of the UnsafeRow
+ *    - Converting the UnsafeRow into an Avro row, and encoding that
+ * In both of these cases, the raw bytes that are written into RockDB have
+ * headers, footers and other metadata, but they also have data that is provided
+ * by the callers. The metadata in each row does not need to be written as Avro or UnsafeRow,
+ * but the actual data provided by the caller does.
+ * The classes that use this trait require specialized partial encoding which makes them much
+ * easier to cache and use, which is why each DataEncoder deals with multiple schemas.
+ */
+trait DataEncoder {
   /**
-   * Get Byte Array for the virtual column family id that is used as prefix for
-   * key state rows.
+   * Encodes a complete key row into bytes. Used as the primary key for state lookups.
+   *
+   * @param row An UnsafeRow containing all key columns as defined in the keySchema
+   * @return Serialized byte array representation of the key
    */
-  override def getColumnFamilyIdBytes(): Array[Byte] = {
-    assert(useColumnFamilies, "Cannot return virtual Column Family Id Bytes" +
-      " because multiple Column is not supported for this encoder")
-    val encodedBytes = new Array[Byte](VIRTUAL_COL_FAMILY_PREFIX_BYTES)
-    Platform.putShort(encodedBytes, Platform.BYTE_ARRAY_OFFSET, virtualColFamilyId.get)
-    encodedBytes
-  }
+  def encodeKey(row: UnsafeRow): Array[Byte]
 
   /**
-   * Encode and put column family Id as a prefix to a pre-allocated byte array.
+   * Encodes the non-prefix portion of a key row. Used with prefix scan and
+   * range scan state lookups where the key is split into prefix and remaining portions.
    *
-   * @param numBytes - size of byte array to be created for storing key row (without
-   *                 column family prefix)
-   * @return Array[Byte] for an array byte to put encoded key bytes
-   *         Int for a starting offset to put the encoded key bytes
+   * For prefix scans: Encodes columns after the prefix columns
+   * For range scans: Encodes columns not included in the ordering columns
+   *
+   * @param row An UnsafeRow containing only the remaining key columns
+   * @return Serialized byte array of the remaining key portion
+   * @throws UnsupportedOperationException if called on an encoder that doesn't support split keys
    */
-  protected def encodeColumnFamilyPrefix(numBytes: Int): (Array[Byte], Int) = {
-    val encodedBytes = new Array[Byte](numBytes + offsetForColFamilyPrefix)
-    var offset = Platform.BYTE_ARRAY_OFFSET
-    if (useColumnFamilies) {
-      Platform.putShort(encodedBytes, Platform.BYTE_ARRAY_OFFSET, virtualColFamilyId.get)
-      offset = Platform.BYTE_ARRAY_OFFSET + offsetForColFamilyPrefix
-    }
-    (encodedBytes, offset)
-  }
+  def encodeRemainingKey(row: UnsafeRow): Array[Byte]
 
   /**
-   * Get starting offset for decoding an encoded key byte array.
+   * Encodes key columns used for range scanning, ensuring proper sort order in RocksDB.
+   *
+   * This method handles special encoding for numeric types to maintain correct sort order:
+   * - Adds sign byte markers for numeric types
+   * - Flips bits for negative floating point values
+   * - Preserves null ordering
+   *
+   * @param row An UnsafeRow containing the columns needed for range scan
+   *            (specified by orderingOrdinals)
+   * @return Serialized bytes that will maintain correct sort order in RocksDB
+   * @throws UnsupportedOperationException if called on an encoder that doesn't support range scans
    */
-  protected def decodeKeyStartOffset: Int = {
-    if (useColumnFamilies) {
-      Platform.BYTE_ARRAY_OFFSET + VIRTUAL_COL_FAMILY_PREFIX_BYTES
-    } else Platform.BYTE_ARRAY_OFFSET
-  }
+  def encodePrefixKeyForRangeScan(row: UnsafeRow): Array[Byte]
+
+  /**
+   * Encodes a value row into bytes.
+   *
+   * @param row An UnsafeRow containing the value columns as defined in the valueSchema
+   * @return Serialized byte array representation of the value
+   */
+  def encodeValue(row: UnsafeRow): Array[Byte]
+
+  /**
+   * Decodes a complete key from its serialized byte form.
+   *
+   * For NoPrefixKeyStateEncoder: Decodes the entire key
+   * For PrefixKeyScanStateEncoder: Decodes only the prefix portion
+   *
+   * @param bytes Serialized byte array containing the encoded key
+   * @return UnsafeRow containing the decoded key columns
+   * @throws UnsupportedOperationException for unsupported encoder types
+   */
+  def decodeKey(bytes: Array[Byte]): UnsafeRow
+
+  /**
+   * Decodes the remaining portion of a split key from its serialized form.
+   *
+   * For PrefixKeyScanStateEncoder: Decodes columns after the prefix
+   * For RangeKeyScanStateEncoder: Decodes non-ordering columns
+   *
+   * @param bytes Serialized byte array containing the encoded remaining key portion
+   * @return UnsafeRow containing the decoded remaining key columns
+   * @throws UnsupportedOperationException if called on an encoder that doesn't support split keys
+   */
+  def decodeRemainingKey(bytes: Array[Byte]): UnsafeRow
+
+  /**
+   * Decodes range scan key bytes back into an UnsafeRow, preserving proper ordering.
+   *
+   * This method reverses the special encoding done by encodePrefixKeyForRangeScan:
+   * - Interprets sign byte markers
+   * - Reverses bit flipping for negative floating point values
+   * - Handles null values
+   *
+   * @param bytes Serialized byte array containing the encoded range scan key
+   * @return UnsafeRow containing the decoded range scan columns
+   * @throws UnsupportedOperationException if called on an encoder that doesn't support range scans
+   */
+  def decodePrefixKeyForRangeScan(bytes: Array[Byte]): UnsafeRow
+
+  /**
+   * Decodes a value from its serialized byte form.
+   *
+   * @param bytes Serialized byte array containing the encoded value
+   * @return UnsafeRow containing the decoded value columns
+   */
+  def decodeValue(bytes: Array[Byte]): UnsafeRow
+
+  def supportsSchemaEvolution: Boolean
 }
 
-object RocksDBStateEncoder {
-  def getKeyEncoder(
-      keyStateEncoderSpec: KeyStateEncoderSpec,
-      useColumnFamilies: Boolean,
-      virtualColFamilyId: Option[Short] = None): RocksDBKeyStateEncoder = {
-    // Return the key state encoder based on the requested type
-    keyStateEncoderSpec match {
-      case NoPrefixKeyStateEncoderSpec(keySchema) =>
-        new NoPrefixKeyStateEncoder(keySchema, useColumnFamilies, virtualColFamilyId)
+abstract class RocksDBDataEncoder(
+    keyStateEncoderSpec: KeyStateEncoderSpec,
+    valueSchema: StructType) extends DataEncoder {
 
-      case PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey) =>
-        new PrefixKeyScanStateEncoder(keySchema, numColsPrefixKey,
-          useColumnFamilies, virtualColFamilyId)
+  val keySchema = keyStateEncoderSpec.keySchema
+  val reusedKeyRow = new UnsafeRow(keyStateEncoderSpec.keySchema.length)
+  val reusedValueRow = new UnsafeRow(valueSchema.length)
 
-      case RangeKeyScanStateEncoderSpec(keySchema, orderingOrdinals) =>
-        new RangeKeyScanStateEncoder(keySchema, orderingOrdinals,
-          useColumnFamilies, virtualColFamilyId)
+  // bit masks used for checking sign or flipping all bits for negative float/double values
+  val floatFlipBitMask = 0xFFFFFFFF
+  val floatSignBitMask = 0x80000000
 
-      case _ =>
-        throw new IllegalArgumentException(s"Unsupported key state encoder spec: " +
-          s"$keyStateEncoderSpec")
-    }
+  val doubleFlipBitMask = 0xFFFFFFFFFFFFFFFFL
+  val doubleSignBitMask = 0x8000000000000000L
+
+  // Byte markers used to identify whether the value is null, negative or positive
+  // To ensure sorted ordering, we use the lowest byte value for negative numbers followed by
+  // positive numbers and then null values.
+  val negativeValMarker: Byte = 0x00.toByte
+  val positiveValMarker: Byte = 0x01.toByte
+  val nullValMarker: Byte = 0x02.toByte
+
+  def encodeWithStateSchemaId(schemaIdRow: StateSchemaIdRow): Array[Byte] = {
+    // Create result array big enough for all prefixes plus data
+    val data = schemaIdRow.bytes
+    val schemaId = schemaIdRow.schemaId
+    val result = new Array[Byte](SCHEMA_ID_PREFIX_BYTES + data.length)
+    var offset = Platform.BYTE_ARRAY_OFFSET
+
+    Platform.putShort(result, offset, schemaId)
+    offset += SCHEMA_ID_PREFIX_BYTES
+
+    // Write the actual data
+    Platform.copyMemory(
+      data, Platform.BYTE_ARRAY_OFFSET,
+      result, offset,
+      data.length
+    )
+    result
   }
 
-  def getValueEncoder(
-      valueSchema: StructType,
-      useMultipleValuesPerKey: Boolean): RocksDBValueStateEncoder = {
-    if (useMultipleValuesPerKey) {
-      new MultiValuedStateEncoder(valueSchema)
-    } else {
-      new SingleValueStateEncoder(valueSchema)
-    }
+  def decodeStateSchemaIdRow(bytes: Array[Byte]): StateSchemaIdRow = {
+    var offset = Platform.BYTE_ARRAY_OFFSET
+
+    // Read column family ID if present
+    val schemaId = Platform.getShort(bytes, offset)
+    offset += SCHEMA_ID_PREFIX_BYTES
+
+    // Extract the actual data
+    val dataLength = bytes.length - SCHEMA_ID_PREFIX_BYTES
+    val data = new Array[Byte](dataLength)
+    Platform.copyMemory(
+      bytes, offset,
+      data, Platform.BYTE_ARRAY_OFFSET,
+      dataLength
+    )
+
+    StateSchemaIdRow(schemaId, data)
   }
 
-  def getColumnFamilyIdBytes(virtualColFamilyId: Short): Array[Byte] = {
-    val encodedBytes = new Array[Byte](VIRTUAL_COL_FAMILY_PREFIX_BYTES)
-    Platform.putShort(encodedBytes, Platform.BYTE_ARRAY_OFFSET, virtualColFamilyId)
-    encodedBytes
+  def unsupportedOperationForKeyStateEncoder(
+      operation: String
+  ): UnsupportedOperationException = {
+    new UnsupportedOperationException(
+      s"Method $operation not supported for encoder spec type " +
+        s"${keyStateEncoderSpec.getClass.getSimpleName}")
   }
 
   /**
@@ -150,39 +276,937 @@ object RocksDBStateEncoder {
       val row = new UnsafeRow(numFields)
       decodeToUnsafeRow(bytes, row)
     } else {
-      null
+      null
+    }
+  }
+
+  def decodeToUnsafeRow(bytes: Array[Byte], reusedRow: UnsafeRow): UnsafeRow = {
+    if (bytes != null) {
+      // Platform.BYTE_ARRAY_OFFSET is the recommended way refer to the 1st offset. See Platform.
+      reusedRow.pointTo(
+        bytes,
+        Platform.BYTE_ARRAY_OFFSET + STATE_ENCODING_NUM_VERSION_BYTES,
+        bytes.length - STATE_ENCODING_NUM_VERSION_BYTES)
+      reusedRow
+    } else {
+      null
+    }
+  }
+}
+
+class UnsafeRowDataEncoder(
+    keyStateEncoderSpec: KeyStateEncoderSpec,
+    valueSchema: StructType,
+    stateSchemaInfo: Option[StateSchemaInfo]
+) extends RocksDBDataEncoder(keyStateEncoderSpec, valueSchema) {
+
+  override def supportsSchemaEvolution: Boolean = false
+
+  override def encodeKey(row: UnsafeRow): Array[Byte] = {
+    encodeUnsafeRow(row)
+  }
+
+  override def encodeRemainingKey(row: UnsafeRow): Array[Byte] = {
+    encodeUnsafeRow(row)
+  }
+
+  override def encodePrefixKeyForRangeScan(row: UnsafeRow): Array[Byte] = {
+    assert(keyStateEncoderSpec.isInstanceOf[RangeKeyScanStateEncoderSpec])
+    val rsk = keyStateEncoderSpec.asInstanceOf[RangeKeyScanStateEncoderSpec]
+    val rangeScanKeyFieldsWithOrdinal = rsk.orderingOrdinals.map { ordinal =>
+      val field = rsk.keySchema(ordinal)
+      (field, ordinal)
+    }
+    val writer = new UnsafeRowWriter(rsk.orderingOrdinals.length)
+    writer.resetRowWriter()
+    rangeScanKeyFieldsWithOrdinal.zipWithIndex.foreach { case (fieldWithOrdinal, idx) =>
+      val field = fieldWithOrdinal._1
+      val value = row.get(idx, field.dataType)
+      // Note that we cannot allocate a smaller buffer here even if the value is null
+      // because the effective byte array is considered variable size and needs to have
+      // the same size across all rows for the ordering to work as expected.
+      val bbuf = ByteBuffer.allocate(field.dataType.defaultSize + 1)
+      bbuf.order(ByteOrder.BIG_ENDIAN)
+      if (value == null) {
+        bbuf.put(nullValMarker)
+        writer.write(idx, bbuf.array())
+      } else {
+        field.dataType match {
+          case BooleanType =>
+          case ByteType =>
+            val byteVal = value.asInstanceOf[Byte]
+            val signCol = if (byteVal < 0) {
+              negativeValMarker
+            } else {
+              positiveValMarker
+            }
+            bbuf.put(signCol)
+            bbuf.put(byteVal)
+            writer.write(idx, bbuf.array())
+
+          case ShortType =>
+            val shortVal = value.asInstanceOf[Short]
+            val signCol = if (shortVal < 0) {
+              negativeValMarker
+            } else {
+              positiveValMarker
+            }
+            bbuf.put(signCol)
+            bbuf.putShort(shortVal)
+            writer.write(idx, bbuf.array())
+
+          case IntegerType =>
+            val intVal = value.asInstanceOf[Int]
+            val signCol = if (intVal < 0) {
+              negativeValMarker
+            } else {
+              positiveValMarker
+            }
+            bbuf.put(signCol)
+            bbuf.putInt(intVal)
+            writer.write(idx, bbuf.array())
+
+          case LongType =>
+            val longVal = value.asInstanceOf[Long]
+            val signCol = if (longVal < 0) {
+              negativeValMarker
+            } else {
+              positiveValMarker
+            }
+            bbuf.put(signCol)
+            bbuf.putLong(longVal)
+            writer.write(idx, bbuf.array())
+
+          case FloatType =>
+            val floatVal = value.asInstanceOf[Float]
+            val rawBits = floatToRawIntBits(floatVal)
+            // perform sign comparison using bit manipulation to ensure NaN values are handled
+            // correctly
+            if ((rawBits & floatSignBitMask) != 0) {
+              // for negative values, we need to flip all the bits to ensure correct ordering
+              val updatedVal = rawBits ^ floatFlipBitMask
+              bbuf.put(negativeValMarker)
+              // convert the bits back to float
+              bbuf.putFloat(intBitsToFloat(updatedVal))
+            } else {
+              bbuf.put(positiveValMarker)
+              bbuf.putFloat(floatVal)
+            }
+            writer.write(idx, bbuf.array())
+
+          case DoubleType =>
+            val doubleVal = value.asInstanceOf[Double]
+            val rawBits = doubleToRawLongBits(doubleVal)
+            // perform sign comparison using bit manipulation to ensure NaN values are handled
+            // correctly
+            if ((rawBits & doubleSignBitMask) != 0) {
+              // for negative values, we need to flip all the bits to ensure correct ordering
+              val updatedVal = rawBits ^ doubleFlipBitMask
+              bbuf.put(negativeValMarker)
+              // convert the bits back to double
+              bbuf.putDouble(longBitsToDouble(updatedVal))
+            } else {
+              bbuf.put(positiveValMarker)
+              bbuf.putDouble(doubleVal)
+            }
+            writer.write(idx, bbuf.array())
+        }
+      }
+    }
+    encodeUnsafeRow(writer.getRow())
+  }
+
+  override def encodeValue(row: UnsafeRow): Array[Byte] = encodeUnsafeRow(row)
+
+  override def decodeKey(bytes: Array[Byte]): UnsafeRow = {
+    keyStateEncoderSpec match {
+      case NoPrefixKeyStateEncoderSpec(_) =>
+        decodeToUnsafeRow(bytes, reusedKeyRow)
+      case PrefixKeyScanStateEncoderSpec(_, numColsPrefixKey) =>
+        decodeToUnsafeRow(bytes, numFields = numColsPrefixKey)
+      case _ => throw unsupportedOperationForKeyStateEncoder("decodeKey")
+    }
+  }
+
+  override def decodeRemainingKey(bytes: Array[Byte]): UnsafeRow = {
+    keyStateEncoderSpec match {
+      case PrefixKeyScanStateEncoderSpec(_, numColsPrefixKey) =>
+        decodeToUnsafeRow(bytes, numFields = numColsPrefixKey)
+      case RangeKeyScanStateEncoderSpec(_, orderingOrdinals) =>
+        decodeToUnsafeRow(bytes, keySchema.length - orderingOrdinals.length)
+      case _ => throw unsupportedOperationForKeyStateEncoder("decodeRemainingKey")
+    }
+  }
+
+  override def decodePrefixKeyForRangeScan(bytes: Array[Byte]): UnsafeRow = {
+    assert(keyStateEncoderSpec.isInstanceOf[RangeKeyScanStateEncoderSpec])
+    val rsk = keyStateEncoderSpec.asInstanceOf[RangeKeyScanStateEncoderSpec]
+    val writer = new UnsafeRowWriter(rsk.orderingOrdinals.length)
+    val rangeScanKeyFieldsWithOrdinal = rsk.orderingOrdinals.map { ordinal =>
+      val field = rsk.keySchema(ordinal)
+      (field, ordinal)
+    }
+    writer.resetRowWriter()
+    val row = decodeToUnsafeRow(bytes, numFields = rsk.orderingOrdinals.length)
+    rangeScanKeyFieldsWithOrdinal.zipWithIndex.foreach { case (fieldWithOrdinal, idx) =>
+      val field = fieldWithOrdinal._1
+
+      val value = row.getBinary(idx)
+      val bbuf = ByteBuffer.wrap(value.asInstanceOf[Array[Byte]])
+      bbuf.order(ByteOrder.BIG_ENDIAN)
+      val isNullOrSignCol = bbuf.get()
+      if (isNullOrSignCol == nullValMarker) {
+        // set the column to null and skip reading the next byte(s)
+        writer.setNullAt(idx)
+      } else {
+        field.dataType match {
+          case BooleanType =>
+          case ByteType =>
+            writer.write(idx, bbuf.get)
+
+          case ShortType =>
+            writer.write(idx, bbuf.getShort)
+
+          case IntegerType =>
+            writer.write(idx, bbuf.getInt)
+
+          case LongType =>
+            writer.write(idx, bbuf.getLong)
+
+          case FloatType =>
+            if (isNullOrSignCol == negativeValMarker) {
+              // if the number is negative, get the raw binary bits for the float
+              // and flip the bits back
+              val updatedVal = floatToRawIntBits(bbuf.getFloat) ^ floatFlipBitMask
+              writer.write(idx, intBitsToFloat(updatedVal))
+            } else {
+              writer.write(idx, bbuf.getFloat)
+            }
+
+          case DoubleType =>
+            if (isNullOrSignCol == negativeValMarker) {
+              // if the number is negative, get the raw binary bits for the double
+              // and flip the bits back
+              val updatedVal = doubleToRawLongBits(bbuf.getDouble) ^ doubleFlipBitMask
+              writer.write(idx, longBitsToDouble(updatedVal))
+            } else {
+              writer.write(idx, bbuf.getDouble)
+            }
+        }
+      }
+    }
+    writer.getRow()
+  }
+
+  override def decodeValue(bytes: Array[Byte]): UnsafeRow = decodeToUnsafeRow(bytes, reusedValueRow)
+}
+
+/**
+ * Encoder that uses Avro for serializing state store data with schema evolution support.
+ * The encoded format varies depending on the key type and whether it's a key or value:
+ *
+ * For prefix and range scan keys:
+ * |--prefix---|--schemaId (2 bytes)--|--remainingKeyBytes (avro-encoded)--|
+ * where:
+ * - prefix: Variable length prefix for scan operations
+ * - schemaId: 2 byte short integer identifying the schema version
+ * - remainingKeyBytes: Avro-encoded remaining key data
+ *
+ * For no-prefix keys and values:
+ * |--schemaId (2 bytes)--|--avroEncodedBytes--|
+ * where:
+ * - schemaId: 2 byte short integer identifying the schema version
+ * - avroEncodedBytes: Variable length Avro-encoded data
+ *
+ * The schema ID allows the state store to identify which schema version was used
+ * to encode the data, enabling proper decoding even when schemas have evolved over time.
+ *
+ * @param keyStateEncoderSpec Specification for how to encode keys (prefix/range scan)
+ * @param valueSchema Schema for the values to be encoded
+ * @param stateSchemaInfo Schema version information for both keys and values
+ */
+class AvroStateEncoder(
+    keyStateEncoderSpec: KeyStateEncoderSpec,
+    valueSchema: StructType,
+    stateSchemaInfo: Option[StateSchemaInfo]
+) extends RocksDBDataEncoder(keyStateEncoderSpec, valueSchema) with Logging {
+
+  private val avroEncoder = createAvroEnc(keyStateEncoderSpec, valueSchema)
+  // Avro schema used by the avro encoders
+  private lazy val keyAvroType: Schema = SchemaConverters.toAvroType(keySchema)
+  private lazy val keyProj = UnsafeProjection.create(keySchema)
+
+  private lazy val valueAvroType: Schema = SchemaConverters.toAvroType(valueSchema)
+  private lazy val valueProj = UnsafeProjection.create(valueSchema)
+
+  // Prefix Key schema and projection definitions used by the Avro Serializers
+  // and Deserializers
+  private lazy val prefixKeySchema = keyStateEncoderSpec match {
+    case PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey) =>
+      StructType(keySchema.take (numColsPrefixKey))
+    case _ => throw unsupportedOperationForKeyStateEncoder("prefixKeySchema")
+  }
+  private lazy val prefixKeyAvroType = SchemaConverters.toAvroType(prefixKeySchema)
+  private lazy val prefixKeyProj = UnsafeProjection.create(prefixKeySchema)
+
+  // Range Key schema nd projection definitions used by the Avro Serializers and
+  // Deserializers
+  private lazy val rangeScanKeyFieldsWithOrdinal = keyStateEncoderSpec match {
+    case RangeKeyScanStateEncoderSpec(keySchema, orderingOrdinals) =>
+      orderingOrdinals.map { ordinal =>
+        val field = keySchema(ordinal)
+        (field, ordinal)
+      }
+    case _ =>
+      throw unsupportedOperationForKeyStateEncoder("rangeScanKey")
+  }
+
+  private lazy val rangeScanAvroSchema = StateStoreColumnFamilySchemaUtils.convertForRangeScan(
+    StructType(rangeScanKeyFieldsWithOrdinal.map(_._1).toArray))
+
+  private lazy val rangeScanAvroType = SchemaConverters.toAvroType(rangeScanAvroSchema)
+
+  private lazy val rangeScanAvroProjection = UnsafeProjection.create(rangeScanAvroSchema)
+
+  // Existing remainder key schema definitions
+  // Remaining Key schema and projection definitions used by the Avro Serializers
+  // and Deserializers
+  private lazy val remainingKeySchema = keyStateEncoderSpec match {
+    case PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey) =>
+      StructType(keySchema.drop(numColsPrefixKey))
+    case RangeKeyScanStateEncoderSpec(keySchema, orderingOrdinals) =>
+      StructType(0.until(keySchema.length).diff(orderingOrdinals).map(keySchema(_)))
+    case _ => throw unsupportedOperationForKeyStateEncoder("remainingKeySchema")
+  }
+
+  private lazy val remainingKeyAvroType = SchemaConverters.toAvroType(remainingKeySchema)
+
+  private lazy val remainingKeyAvroProjection = UnsafeProjection.create(remainingKeySchema)
+
+  private def getAvroSerializer(schema: StructType): AvroSerializer = {
+    val avroType = SchemaConverters.toAvroType(schema)
+    new AvroSerializer(schema, avroType, nullable = false)
+  }
+
+  private def getAvroDeserializer(schema: StructType): AvroDeserializer = {
+    val avroType = SchemaConverters.toAvroType(schema)
+    val avroOptions = AvroOptions(Map.empty)
+    new AvroDeserializer(avroType, schema,
+      avroOptions.datetimeRebaseModeInRead, avroOptions.useStableIdForUnionType,
+      avroOptions.stableIdPrefixForUnionType, avroOptions.recursiveFieldMaxDepth)
+  }
+
+  /**
+   * Creates an AvroEncoder that handles both key and value serialization/deserialization.
+   * This method sets up the complete encoding infrastructure needed for state store operations.
+   *
+   * The encoder handles different key encoding specifications:
+   * - NoPrefixKeyStateEncoderSpec: Simple key encoding without prefix
+   * - PrefixKeyScanStateEncoderSpec: Keys with prefix for efficient scanning
+   * - RangeKeyScanStateEncoderSpec: Keys with ordering requirements for range scans
+   *
+   * For prefix scan cases, it also creates separate encoders for the suffix portion of keys.
+   *
+   * @param keyStateEncoderSpec Specification for how to encode keys
+   * @param valueSchema Schema for the values to be encoded
+   * @return An AvroEncoder containing all necessary serializers and deserializers
+   */
+  private def createAvroEnc(
+      keyStateEncoderSpec: KeyStateEncoderSpec,
+      valueSchema: StructType): AvroEncoder = {
+    val valueSerializer = getAvroSerializer(valueSchema)
+    val valueDeserializer = getAvroDeserializer(valueSchema)
+
+    // Get key schema based on encoder spec type
+    val keySchema = keyStateEncoderSpec match {
+      case NoPrefixKeyStateEncoderSpec(schema) =>
+        schema
+      case PrefixKeyScanStateEncoderSpec(schema, numColsPrefixKey) =>
+        StructType(schema.take(numColsPrefixKey))
+      case RangeKeyScanStateEncoderSpec(schema, orderingOrdinals) =>
+        val remainingSchema = {
+          0.until(schema.length).diff(orderingOrdinals).map { ordinal =>
+            schema(ordinal)
+          }
+        }
+        StructType(remainingSchema)
+    }
+
+    // Handle suffix key schema for prefix scan case
+    val suffixKeySchema = keyStateEncoderSpec match {
+      case PrefixKeyScanStateEncoderSpec(schema, numColsPrefixKey) =>
+        Some(StructType(schema.drop(numColsPrefixKey)))
+      case _ =>
+        None
+    }
+
+    val keySerializer = getAvroSerializer(keySchema)
+    val keyDeserializer = getAvroDeserializer(keySchema)
+
+    // Create the AvroEncoder with all components
+    AvroEncoder(
+      keySerializer,
+      keyDeserializer,
+      valueSerializer,
+      valueDeserializer,
+      suffixKeySchema.map(getAvroSerializer),
+      suffixKeySchema.map(getAvroDeserializer)
+    )
+  }
+
+  override def supportsSchemaEvolution: Boolean = true
+
+  /**
+   * This method takes an UnsafeRow, and serializes to a byte array using Avro encoding.
+   */
+  def encodeUnsafeRowToAvro(
+      row: UnsafeRow,
+      avroSerializer: AvroSerializer,
+      valueAvroType: Schema,
+      out: ByteArrayOutputStream): Array[Byte] = {
+    // InternalRow -> Avro.GenericDataRecord
+    val avroData =
+      avroSerializer.serialize(row)
+    out.reset()
+    val encoder = EncoderFactory.get().directBinaryEncoder(out, null)
+    val writer = new GenericDatumWriter[Any](
+      valueAvroType) // Defining Avro writer for this struct type
+    writer.write(avroData, encoder) // Avro.GenericDataRecord -> byte array
+    encoder.flush()
+    out.toByteArray
+  }
+
+  /**
+   * This method takes a byte array written using Avro encoding, and
+   * deserializes to an UnsafeRow using the Avro deserializer
+   */
+  def decodeFromAvroToUnsafeRow(
+      valueBytes: Array[Byte],
+      avroDeserializer: AvroDeserializer,
+      valueAvroType: Schema,
+      valueProj: UnsafeProjection): UnsafeRow = {
+    if (valueBytes != null) {
+      val reader = new GenericDatumReader[Any](valueAvroType)
+      val decoder = DecoderFactory.get().binaryDecoder(
+        valueBytes, 0, valueBytes.length, null)
+      // bytes -> Avro.GenericDataRecord
+      val genericData = reader.read(null, decoder)
+      // Avro.GenericDataRecord -> InternalRow
+      val internalRow = avroDeserializer.deserialize(
+        genericData).orNull.asInstanceOf[InternalRow]
+      // InternalRow -> UnsafeRow
+      valueProj.apply(internalRow)
+    } else {
+      null
+    }
+  }
+
+  private val out = new ByteArrayOutputStream
+
+  override def encodeKey(row: UnsafeRow): Array[Byte] = {
+    keyStateEncoderSpec match {
+      case NoPrefixKeyStateEncoderSpec(_) =>
+        val avroRow =
+          encodeUnsafeRowToAvro(row, avroEncoder.keySerializer, keyAvroType, out)
+        // prepend stateSchemaId to the Avro-encoded key portion for NoPrefixKeys
+        encodeWithStateSchemaId(
+          StateSchemaIdRow(stateSchemaInfo.get.keySchemaId, avroRow))
+      case PrefixKeyScanStateEncoderSpec(_, _) =>
+        encodeUnsafeRowToAvro(row, avroEncoder.keySerializer, prefixKeyAvroType, out)
+      case _ => throw unsupportedOperationForKeyStateEncoder("encodeKey")
+    }
+  }
+
+  override def encodeRemainingKey(row: UnsafeRow): Array[Byte] = {
+    val avroRow = keyStateEncoderSpec match {
+      case PrefixKeyScanStateEncoderSpec(_, _) =>
+        encodeUnsafeRowToAvro(row, avroEncoder.suffixKeySerializer.get, remainingKeyAvroType, out)
+      case RangeKeyScanStateEncoderSpec(_, _) =>
+        encodeUnsafeRowToAvro(row, avroEncoder.keySerializer, remainingKeyAvroType, out)
+      case _ => throw unsupportedOperationForKeyStateEncoder("encodeRemainingKey")
+    }
+    // prepend stateSchemaId to the remaining key portion
+    encodeWithStateSchemaId(
+      StateSchemaIdRow(stateSchemaInfo.get.keySchemaId, avroRow))
+  }
+
+  /**
+   * Encodes an UnsafeRow into an Avro-compatible byte array format for range scan operations.
+   *
+   * This method transforms row data into a binary format that preserves ordering when
+   * used in range scans.
+   * For each field in the row:
+   * - A marker byte is written to indicate null status or sign (for numeric types)
+   * - The value is written in big-endian format
+   *
+   * Special handling is implemented for:
+   * - Null values: marked with nullValMarker followed by zero bytes
+   * - Negative numbers: marked with negativeValMarker
+   * - Floating point numbers: bit manipulation to handle sign and NaN values correctly
+   *
+   * @param row The UnsafeRow to encode
+   * @param avroType The Avro schema defining the structure for encoding
+   * @return Array[Byte] containing the Avro-encoded data that preserves ordering for range scans
+   * @throws UnsupportedOperationException if a field's data type is not supported for range
+   *                                       scan encoding
+   */
+  override def encodePrefixKeyForRangeScan(row: UnsafeRow): Array[Byte] = {
+    val record = new GenericData.Record(rangeScanAvroType)
+    var fieldIdx = 0
+    rangeScanKeyFieldsWithOrdinal.zipWithIndex.foreach { case (fieldWithOrdinal, idx) =>
+      val field = fieldWithOrdinal._1
+      val value = row.get(idx, field.dataType)
+
+      // Create marker byte buffer
+      val markerBuffer = ByteBuffer.allocate(1)
+      markerBuffer.order(ByteOrder.BIG_ENDIAN)
+
+      if (value == null) {
+        markerBuffer.put(nullValMarker)
+        record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+        record.put(fieldIdx + 1, ByteBuffer.wrap(new Array[Byte](field.dataType.defaultSize)))
+      } else {
+        field.dataType match {
+          case BooleanType =>
+            markerBuffer.put(positiveValMarker)
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+            val valueBuffer = ByteBuffer.allocate(1)
+            valueBuffer.put(if (value.asInstanceOf[Boolean]) 1.toByte else 0.toByte)
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case ByteType =>
+            val byteVal = value.asInstanceOf[Byte]
+            markerBuffer.put(if (byteVal < 0) negativeValMarker else positiveValMarker)
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+
+            val valueBuffer = ByteBuffer.allocate(1)
+            valueBuffer.order(ByteOrder.BIG_ENDIAN)
+            valueBuffer.put(byteVal)
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case ShortType =>
+            val shortVal = value.asInstanceOf[Short]
+            markerBuffer.put(if (shortVal < 0) negativeValMarker else positiveValMarker)
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+
+            val valueBuffer = ByteBuffer.allocate(2)
+            valueBuffer.order(ByteOrder.BIG_ENDIAN)
+            valueBuffer.putShort(shortVal)
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case IntegerType =>
+            val intVal = value.asInstanceOf[Int]
+            markerBuffer.put(if (intVal < 0) negativeValMarker else positiveValMarker)
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+
+            val valueBuffer = ByteBuffer.allocate(4)
+            valueBuffer.order(ByteOrder.BIG_ENDIAN)
+            valueBuffer.putInt(intVal)
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case LongType =>
+            val longVal = value.asInstanceOf[Long]
+            markerBuffer.put(if (longVal < 0) negativeValMarker else positiveValMarker)
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+
+            val valueBuffer = ByteBuffer.allocate(8)
+            valueBuffer.order(ByteOrder.BIG_ENDIAN)
+            valueBuffer.putLong(longVal)
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case FloatType =>
+            val floatVal = value.asInstanceOf[Float]
+            val rawBits = floatToRawIntBits(floatVal)
+            markerBuffer.put(if ((rawBits & floatSignBitMask) != 0) {
+              negativeValMarker
+            } else {
+              positiveValMarker
+            })
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+
+            val valueBuffer = ByteBuffer.allocate(4)
+            valueBuffer.order(ByteOrder.BIG_ENDIAN)
+            if ((rawBits & floatSignBitMask) != 0) {
+              val updatedVal = rawBits ^ floatFlipBitMask
+              valueBuffer.putFloat(intBitsToFloat(updatedVal))
+            } else {
+              valueBuffer.putFloat(floatVal)
+            }
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case DoubleType =>
+            val doubleVal = value.asInstanceOf[Double]
+            val rawBits = doubleToRawLongBits(doubleVal)
+            markerBuffer.put(if ((rawBits & doubleSignBitMask) != 0) {
+              negativeValMarker
+            } else {
+              positiveValMarker
+            })
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+
+            val valueBuffer = ByteBuffer.allocate(8)
+            valueBuffer.order(ByteOrder.BIG_ENDIAN)
+            if ((rawBits & doubleSignBitMask) != 0) {
+              val updatedVal = rawBits ^ doubleFlipBitMask
+              valueBuffer.putDouble(longBitsToDouble(updatedVal))
+            } else {
+              valueBuffer.putDouble(doubleVal)
+            }
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case _ => throw new UnsupportedOperationException(
+            s"Range scan encoding not supported for data type: ${field.dataType}")
+        }
+      }
+      fieldIdx += 2
+    }
+
+    out.reset()
+    val writer = new GenericDatumWriter[GenericRecord](rangeScanAvroType)
+    val encoder = EncoderFactory.get().binaryEncoder(out, null)
+    writer.write(record, encoder)
+    encoder.flush()
+    out.toByteArray
+  }
+
+  override def encodeValue(row: UnsafeRow): Array[Byte] = {
+    val avroRow = encodeUnsafeRowToAvro(row, avroEncoder.valueSerializer, valueAvroType, out)
+    // prepend stateSchemaId to the Avro-encoded value portion
+    encodeWithStateSchemaId(StateSchemaIdRow(stateSchemaInfo.get.valueSchemaId, avroRow))
+  }
+
+  override def decodeKey(bytes: Array[Byte]): UnsafeRow = {
+    keyStateEncoderSpec match {
+      case NoPrefixKeyStateEncoderSpec(_) =>
+        val schemaIdRow = decodeStateSchemaIdRow(bytes)
+        decodeFromAvroToUnsafeRow(
+          schemaIdRow.bytes, avroEncoder.keyDeserializer, keyAvroType, keyProj)
+      case PrefixKeyScanStateEncoderSpec(_, _) =>
+        decodeFromAvroToUnsafeRow(
+          bytes, avroEncoder.keyDeserializer, prefixKeyAvroType, prefixKeyProj)
+      case _ => throw unsupportedOperationForKeyStateEncoder("decodeKey")
+    }
+  }
+
+
+  override def decodeRemainingKey(bytes: Array[Byte]): UnsafeRow = {
+    val schemaIdRow = decodeStateSchemaIdRow(bytes)
+    keyStateEncoderSpec match {
+      case PrefixKeyScanStateEncoderSpec(_, _) =>
+        decodeFromAvroToUnsafeRow(schemaIdRow.bytes,
+          avroEncoder.suffixKeyDeserializer.get, remainingKeyAvroType, remainingKeyAvroProjection)
+      case RangeKeyScanStateEncoderSpec(_, _) =>
+        decodeFromAvroToUnsafeRow(
+          schemaIdRow.bytes,
+          avroEncoder.keyDeserializer, remainingKeyAvroType, remainingKeyAvroProjection)
+      case _ => throw unsupportedOperationForKeyStateEncoder("decodeRemainingKey")
+    }
+  }
+
+  /**
+   * Decodes an Avro-encoded byte array back into an UnsafeRow for range scan operations.
+   *
+   * This method reverses the encoding process performed by encodePrefixKeyForRangeScan:
+   * - Reads the marker byte to determine null status or sign
+   * - Reconstructs the original values from big-endian format
+   * - Handles special cases for floating point numbers by reversing bit manipulations
+   *
+   * The decoding process preserves the original data types and values, including:
+   * - Null values marked by nullValMarker
+   * - Sign information for numeric types
+   * - Proper restoration of negative floating point values
+   *
+   * @param bytes The Avro-encoded byte array to decode
+   * @param avroType The Avro schema defining the structure for decoding
+   * @return UnsafeRow containing the decoded data
+   * @throws UnsupportedOperationException if a field's data type is not supported for range
+   *                                       scan decoding
+   */
+  override def decodePrefixKeyForRangeScan(bytes: Array[Byte]): UnsafeRow = {
+    val reader = new GenericDatumReader[GenericRecord](rangeScanAvroType)
+    val decoder = DecoderFactory.get().binaryDecoder(bytes, 0, bytes.length, null)
+    val record = reader.read(null, decoder)
+
+    val rowWriter = new UnsafeRowWriter(rangeScanKeyFieldsWithOrdinal.length)
+    rowWriter.resetRowWriter()
+
+    var fieldIdx = 0
+    rangeScanKeyFieldsWithOrdinal.zipWithIndex.foreach { case (fieldWithOrdinal, idx) =>
+      val field = fieldWithOrdinal._1
+
+      val markerBytes = record.get(fieldIdx).asInstanceOf[ByteBuffer].array()
+      val markerBuf = ByteBuffer.wrap(markerBytes)
+      markerBuf.order(ByteOrder.BIG_ENDIAN)
+      val marker = markerBuf.get()
+
+      if (marker == nullValMarker) {
+        rowWriter.setNullAt(idx)
+      } else {
+        field.dataType match {
+          case BooleanType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            rowWriter.write(idx, bytes(0) == 1)
+
+          case ByteType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            val valueBuf = ByteBuffer.wrap(bytes)
+            valueBuf.order(ByteOrder.BIG_ENDIAN)
+            rowWriter.write(idx, valueBuf.get())
+
+          case ShortType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            val valueBuf = ByteBuffer.wrap(bytes)
+            valueBuf.order(ByteOrder.BIG_ENDIAN)
+            rowWriter.write(idx, valueBuf.getShort())
+
+          case IntegerType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            val valueBuf = ByteBuffer.wrap(bytes)
+            valueBuf.order(ByteOrder.BIG_ENDIAN)
+            rowWriter.write(idx, valueBuf.getInt())
+
+          case LongType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            val valueBuf = ByteBuffer.wrap(bytes)
+            valueBuf.order(ByteOrder.BIG_ENDIAN)
+            rowWriter.write(idx, valueBuf.getLong())
+
+          case FloatType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            val valueBuf = ByteBuffer.wrap(bytes)
+            valueBuf.order(ByteOrder.BIG_ENDIAN)
+            if (marker == negativeValMarker) {
+              val floatVal = valueBuf.getFloat
+              val updatedVal = floatToRawIntBits(floatVal) ^ floatFlipBitMask
+              rowWriter.write(idx, intBitsToFloat(updatedVal))
+            } else {
+              rowWriter.write(idx, valueBuf.getFloat())
+            }
+
+          case DoubleType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            val valueBuf = ByteBuffer.wrap(bytes)
+            valueBuf.order(ByteOrder.BIG_ENDIAN)
+            if (marker == negativeValMarker) {
+              val doubleVal = valueBuf.getDouble
+              val updatedVal = doubleToRawLongBits(doubleVal) ^ doubleFlipBitMask
+              rowWriter.write(idx, longBitsToDouble(updatedVal))
+            } else {
+              rowWriter.write(idx, valueBuf.getDouble())
+            }
+
+          case _ => throw new UnsupportedOperationException(
+            s"Range scan decoding not supported for data type: ${field.dataType}")
+        }
+      }
+      fieldIdx += 2
+    }
+
+    rowWriter.getRow()
+  }
+
+  override def decodeValue(bytes: Array[Byte]): UnsafeRow = {
+    val schemaIdRow = decodeStateSchemaIdRow(bytes)
+    decodeFromAvroToUnsafeRow(
+      schemaIdRow.bytes, avroEncoder.valueDeserializer, valueAvroType, valueProj)
+  }
+}
+
+/**
+ * Information about a RocksDB column family used for state storage.
+ *
+ * @param colFamilyName The name of the column family in RocksDB
+ * @param virtualColumnFamilyId A unique identifier for the virtual column family,
+ *                              used as a prefix in encoded state rows to distinguish
+ *                              between different column families
+ */
+case class ColumnFamilyInfo(
+    colFamilyName: String,
+    virtualColumnFamilyId: Short
+)
+
+/**
+ * Metadata prefixes stored at the beginning of encoded state rows.
+ * These prefixes allow for schema evolution and column family organization
+ * in the state store.
+ *
+ * @param columnFamilyId Optional identifier for the virtual column family.
+ *                       When present, allows organizing state data into
+ *                       different column families in RocksDB.
+ */
+case class StateRowPrefix(
+    columnFamilyId: Option[Short]
+)
+
+class StateRowPrefixEncoder(
+    useColumnFamilies: Boolean,
+    columnFamilyInfo: Option[ColumnFamilyInfo]
+) {
+
+  private val numColFamilyBytes = if (useColumnFamilies) {
+    VIRTUAL_COL_FAMILY_PREFIX_BYTES
+  } else {
+    0
+  }
+
+  def getNumPrefixBytes: Int = numColFamilyBytes
+
+  val out = new ByteArrayOutputStream
+
+  /**
+   * Get Byte Array for the virtual column family id that is used as prefix for
+   * key state rows.
+   */
+  def getColumnFamilyIdBytes(): Array[Byte] = {
+    assert(useColumnFamilies, "Cannot return virtual Column Family Id Bytes" +
+      " because multiple Column is not supported for this encoder")
+    val encodedBytes = new Array[Byte](VIRTUAL_COL_FAMILY_PREFIX_BYTES)
+    val virtualColFamilyId = columnFamilyInfo.get.virtualColumnFamilyId
+    Platform.putShort(encodedBytes, Platform.BYTE_ARRAY_OFFSET, virtualColFamilyId)
+    encodedBytes
+  }
+
+  /**
+   * Encodes a state row by adding schema and column family ID prefixes if enabled.
+   *
+   * @param data The byte array containing the data to be prefixed
+   * @return A new byte array containing the prefixed data. If no prefixing is needed
+   *         (neither schema evolution nor column families are enabled), returns a copy
+   *         of the input array to maintain consistency with the prefixed case.
+   */
+  def encodeStateRowWithPrefix(data: Array[Byte]): Array[Byte] = {
+    // Create result array big enough for all prefixes plus data
+    val result = new Array[Byte](getNumPrefixBytes + data.length)
+    var offset = Platform.BYTE_ARRAY_OFFSET
+
+    // Write column family ID if enabled
+    if (useColumnFamilies) {
+      val colFamilyId = columnFamilyInfo.get.virtualColumnFamilyId
+      Platform.putShort(result, offset, colFamilyId)
+      offset += VIRTUAL_COL_FAMILY_PREFIX_BYTES
+    }
+
+    // Write the actual data
+    Platform.copyMemory(
+      data, Platform.BYTE_ARRAY_OFFSET,
+      result, offset,
+      data.length
+    )
+
+    result
+  }
+
+  def decodeStateRowPrefix(stateRow: Array[Byte]): StateRowPrefix = {
+    var offset = Platform.BYTE_ARRAY_OFFSET
+
+    // Read column family ID if present
+    val colFamilyId = if (useColumnFamilies) {
+      val id = Platform.getShort(stateRow, offset)
+      offset += VIRTUAL_COL_FAMILY_PREFIX_BYTES
+      Some(id)
+    } else {
+      None
+    }
+
+    StateRowPrefix(colFamilyId)
+  }
+
+  def decodeStateRowData(stateRow: Array[Byte]): Array[Byte] = {
+    val offset = Platform.BYTE_ARRAY_OFFSET + getNumPrefixBytes
+
+    // Extract the actual data
+    val dataLength = stateRow.length - getNumPrefixBytes
+    val data = new Array[Byte](dataLength)
+    Platform.copyMemory(
+      stateRow, offset,
+      data, Platform.BYTE_ARRAY_OFFSET,
+      dataLength
+    )
+    data
+  }
+}
+
+/**
+ * Factory object for creating state encoders used by RocksDB state store.
+ *
+ * The encoders created by this object handle serialization and deserialization of state data,
+ * supporting both key and value encoding with various access patterns
+ * (e.g., prefix scan, range scan).
+ */
+object RocksDBStateEncoder extends Logging {
+
+  /**
+   * Creates a key encoder based on the specified encoding strategy and configuration.
+   *
+   * @param dataEncoder The underlying encoder that handles the actual data encoding/decoding
+   * @param keyStateEncoderSpec Specification defining the key encoding strategy
+   *                            (no prefix, prefix scan, or range scan)
+   * @param useColumnFamilies Whether to use RocksDB column families for storage
+   * @param virtualColFamilyId Optional column family identifier when column families are enabled
+   * @return A configured RocksDBKeyStateEncoder instance
+   */
+  def getKeyEncoder(
+      dataEncoder: RocksDBDataEncoder,
+      keyStateEncoderSpec: KeyStateEncoderSpec,
+      useColumnFamilies: Boolean,
+      columnFamilyInfo: Option[ColumnFamilyInfo] = None): RocksDBKeyStateEncoder = {
+    keyStateEncoderSpec.toEncoder(dataEncoder, useColumnFamilies, columnFamilyInfo)
+  }
+
+  /**
+   * Creates a value encoder that supports either single or multiple values per key.
+   *
+   * @param dataEncoder The underlying encoder that handles the actual data encoding/decoding
+   * @param valueSchema Schema defining the structure of values to be encoded
+   * @param useMultipleValuesPerKey If true, creates an encoder that can handle multiple values
+   *                                per key; if false, creates an encoder for single values
+   * @return A configured RocksDBValueStateEncoder instance
+   */
+  def getValueEncoder(
+      dataEncoder: RocksDBDataEncoder,
+      valueSchema: StructType,
+      useMultipleValuesPerKey: Boolean): RocksDBValueStateEncoder = {
+    if (useMultipleValuesPerKey) {
+      new MultiValuedStateEncoder(dataEncoder, valueSchema)
+    } else {
+      new SingleValueStateEncoder(dataEncoder, valueSchema)
     }
   }
 
-  def decodeToUnsafeRow(bytes: Array[Byte], reusedRow: UnsafeRow): UnsafeRow = {
-    if (bytes != null) {
-      // Platform.BYTE_ARRAY_OFFSET is the recommended way refer to the 1st offset. See Platform.
-      reusedRow.pointTo(
-        bytes,
-        Platform.BYTE_ARRAY_OFFSET + STATE_ENCODING_NUM_VERSION_BYTES,
-        bytes.length - STATE_ENCODING_NUM_VERSION_BYTES)
-      reusedRow
-    } else {
-      null
-    }
+  /**
+   * Encodes a virtual column family ID into a byte array suitable for RocksDB.
+   *
+   * This method creates a fixed-size byte array prefixed with the virtual column family ID,
+   * which is used to partition data within RocksDB.
+   *
+   * @param virtualColFamilyId The column family identifier to encode
+   * @return A byte array containing the encoded column family ID
+   */
+  def getColumnFamilyIdBytes(virtualColFamilyId: Short): Array[Byte] = {
+    val encodedBytes = new Array[Byte](VIRTUAL_COL_FAMILY_PREFIX_BYTES)
+    Platform.putShort(encodedBytes, Platform.BYTE_ARRAY_OFFSET, virtualColFamilyId)
+    encodedBytes
   }
 }
 
 /**
  * RocksDB Key Encoder for UnsafeRow that supports prefix scan
  *
+ * @param dataEncoder - the encoder that handles actual encoding/decoding of data
  * @param keySchema - schema of the key to be encoded
  * @param numColsPrefixKey - number of columns to be used for prefix key
  * @param useColumnFamilies - if column family is enabled for this encoder
  */
 class PrefixKeyScanStateEncoder(
+    dataEncoder: RocksDBDataEncoder,
     keySchema: StructType,
     numColsPrefixKey: Int,
     useColumnFamilies: Boolean = false,
-    virtualColFamilyId: Option[Short] = None)
-  extends RocksDBKeyStateEncoderBase(useColumnFamilies, virtualColFamilyId) {
-
-  import RocksDBStateEncoder._
+    columnFamilyInfo: Option[ColumnFamilyInfo] = None)
+  extends StateRowPrefixEncoder(
+    useColumnFamilies,
+    columnFamilyInfo
+  ) with RocksDBKeyStateEncoder with Logging {
 
   private val prefixKeyFieldsWithIdx: Seq[(StructField, Int)] = {
     keySchema.zipWithIndex.take(numColsPrefixKey)
@@ -210,43 +1234,53 @@ class PrefixKeyScanStateEncoder(
   private val joinedRowOnKey = new JoinedRow()
 
   override def encodeKey(row: UnsafeRow): Array[Byte] = {
-    val prefixKeyEncoded = encodeUnsafeRow(extractPrefixKey(row))
-    val remainingEncoded = encodeUnsafeRow(remainingKeyProjection(row))
+    // First encode prefix and remaining key parts
+    val prefixKeyEncoded = dataEncoder.encodeKey(extractPrefixKey(row))
+    val remainingEncoded = dataEncoder.encodeRemainingKey(remainingKeyProjection(row))
 
-    val (encodedBytes, startingOffset) = encodeColumnFamilyPrefix(
-      prefixKeyEncoded.length + remainingEncoded.length + 4
+    // Combine prefix key and remaining key into single array
+    val combinedData = new Array[Byte](4 + prefixKeyEncoded.length + remainingEncoded.length)
+    Platform.putInt(combinedData, Platform.BYTE_ARRAY_OFFSET, prefixKeyEncoded.length)
+    Platform.copyMemory(
+      prefixKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
+      combinedData, Platform.BYTE_ARRAY_OFFSET + 4,
+      prefixKeyEncoded.length
+    )
+    Platform.copyMemory(
+      remainingEncoded, Platform.BYTE_ARRAY_OFFSET,
+      combinedData, Platform.BYTE_ARRAY_OFFSET + 4 + prefixKeyEncoded.length,
+      remainingEncoded.length
     )
 
-    Platform.putInt(encodedBytes, startingOffset, prefixKeyEncoded.length)
-    Platform.copyMemory(prefixKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
-      encodedBytes, startingOffset + 4, prefixKeyEncoded.length)
-    // NOTE: We don't put the length of remainingEncoded as we can calculate later
-    // on deserialization.
-    Platform.copyMemory(remainingEncoded, Platform.BYTE_ARRAY_OFFSET,
-      encodedBytes, startingOffset + 4 + prefixKeyEncoded.length,
-      remainingEncoded.length)
-
-    encodedBytes
+    // Add state row prefix using encoder
+    encodeStateRowWithPrefix(combinedData)
   }
 
   override def decodeKey(keyBytes: Array[Byte]): UnsafeRow = {
-    val prefixKeyEncodedLen = Platform.getInt(keyBytes, decodeKeyStartOffset)
-    val prefixKeyEncoded = new Array[Byte](prefixKeyEncodedLen)
-    Platform.copyMemory(keyBytes, decodeKeyStartOffset + 4,
-      prefixKeyEncoded, Platform.BYTE_ARRAY_OFFSET, prefixKeyEncodedLen)
+    // First decode the metadata prefixes and get the actual key data
+    val keyData = decodeStateRowData(keyBytes)
 
-    // Here we calculate the remainingKeyEncodedLen leveraging the length of keyBytes
-    val remainingKeyEncodedLen = keyBytes.length - 4 - prefixKeyEncodedLen -
-      offsetForColFamilyPrefix
+    // Get prefix key length from the start of the actual key data
+    val prefixKeyEncodedLen = Platform.getInt(keyData, Platform.BYTE_ARRAY_OFFSET)
+    val prefixKeyEncoded = new Array[Byte](prefixKeyEncodedLen)
+    Platform.copyMemory(
+      keyData, Platform.BYTE_ARRAY_OFFSET + 4,
+      prefixKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
+      prefixKeyEncodedLen
+    )
 
+    // Calculate remaining key length and extract it
+    val remainingKeyEncodedLen = keyData.length - 4 - prefixKeyEncodedLen
     val remainingKeyEncoded = new Array[Byte](remainingKeyEncodedLen)
-    Platform.copyMemory(keyBytes, decodeKeyStartOffset + 4 + prefixKeyEncodedLen,
-      remainingKeyEncoded, Platform.BYTE_ARRAY_OFFSET, remainingKeyEncodedLen)
-
-    val prefixKeyDecoded = decodeToUnsafeRow(prefixKeyEncoded, numFields = numColsPrefixKey)
-    val remainingKeyDecoded = decodeToUnsafeRow(remainingKeyEncoded,
-      numFields = keySchema.length - numColsPrefixKey)
+    Platform.copyMemory(
+      keyData, Platform.BYTE_ARRAY_OFFSET + 4 + prefixKeyEncodedLen,
+      remainingKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
+      remainingKeyEncodedLen
+    )
 
+    // Decode both parts and combine
+    val prefixKeyDecoded = dataEncoder.decodeKey(prefixKeyEncoded)
+    val remainingKeyDecoded = dataEncoder.decodeRemainingKey(remainingKeyEncoded)
     restoreKeyProjection(joinedRowOnKey.withLeft(prefixKeyDecoded).withRight(remainingKeyDecoded))
   }
 
@@ -255,15 +1289,19 @@ class PrefixKeyScanStateEncoder(
   }
 
   override def encodePrefixKey(prefixKey: UnsafeRow): Array[Byte] = {
-    val prefixKeyEncoded = encodeUnsafeRow(prefixKey)
-    val (prefix, startingOffset) = encodeColumnFamilyPrefix(
-      prefixKeyEncoded.length + 4
+    // First encode the prefix key part
+    val prefixKeyEncoded = dataEncoder.encodeKey(prefixKey)
+
+    // Create array with length prefix
+    val dataWithLength = new Array[Byte](4 + prefixKeyEncoded.length)
+    Platform.putInt(dataWithLength, Platform.BYTE_ARRAY_OFFSET, prefixKeyEncoded.length)
+    Platform.copyMemory(
+      prefixKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
+      dataWithLength, Platform.BYTE_ARRAY_OFFSET + 4,
+      prefixKeyEncoded.length
     )
 
-    Platform.putInt(prefix, startingOffset, prefixKeyEncoded.length)
-    Platform.copyMemory(prefixKeyEncoded, Platform.BYTE_ARRAY_OFFSET, prefix,
-      startingOffset + 4, prefixKeyEncoded.length)
-    prefix
+    encodeStateRowWithPrefix(dataWithLength)
   }
 
   override def supportPrefixKeyScan: Boolean = true
@@ -296,18 +1334,21 @@ class PrefixKeyScanStateEncoder(
  * the right lexicographical ordering. For the rationale around this, please check the link
  * here: https://en.wikipedia.org/wiki/IEEE_754#Design_rationale
  *
+ * @param dataEncoder - the encoder that handles the actual encoding/decoding of data
  * @param keySchema - schema of the key to be encoded
  * @param orderingOrdinals - the ordinals for which the range scan is constructed
  * @param useColumnFamilies - if column family is enabled for this encoder
  */
 class RangeKeyScanStateEncoder(
+    dataEncoder: RocksDBDataEncoder,
     keySchema: StructType,
     orderingOrdinals: Seq[Int],
     useColumnFamilies: Boolean = false,
-    virtualColFamilyId: Option[Short] = None)
-  extends RocksDBKeyStateEncoderBase(useColumnFamilies, virtualColFamilyId) {
-
-  import RocksDBStateEncoder._
+    columnFamilyInfo: Option[ColumnFamilyInfo] = None)
+  extends StateRowPrefixEncoder(
+    useColumnFamilies,
+    columnFamilyInfo
+  ) with RocksDBKeyStateEncoder with Logging {
 
   private val rangeScanKeyFieldsWithOrdinal: Seq[(StructField, Int)] = {
     orderingOrdinals.map { ordinal =>
@@ -381,266 +1422,88 @@ class RangeKeyScanStateEncoder(
     rangeScanKeyProjection(key)
   }
 
-  // bit masks used for checking sign or flipping all bits for negative float/double values
-  private val floatFlipBitMask = 0xFFFFFFFF
-  private val floatSignBitMask = 0x80000000
-
-  private val doubleFlipBitMask = 0xFFFFFFFFFFFFFFFFL
-  private val doubleSignBitMask = 0x8000000000000000L
-
-  // Byte markers used to identify whether the value is null, negative or positive
-  // To ensure sorted ordering, we use the lowest byte value for negative numbers followed by
-  // positive numbers and then null values.
-  private val negativeValMarker: Byte = 0x00.toByte
-  private val positiveValMarker: Byte = 0x01.toByte
-  private val nullValMarker: Byte = 0x02.toByte
-
-  // Rewrite the unsafe row by replacing fixed size fields with BIG_ENDIAN encoding
-  // using byte arrays.
-  // To handle "null" values, we prepend a byte to the byte array indicating whether the value
-  // is null or not. If the value is null, we write the null byte followed by zero bytes.
-  // If the value is not null, we write the null byte followed by the value.
-  // Note that setting null for the index on the unsafeRow is not feasible as it would change
-  // the sorting order on iteration.
-  // Also note that the same byte is used to indicate whether the value is negative or not.
-  private def encodePrefixKeyForRangeScan(row: UnsafeRow): UnsafeRow = {
-    val writer = new UnsafeRowWriter(orderingOrdinals.length)
-    writer.resetRowWriter()
-    rangeScanKeyFieldsWithOrdinal.zipWithIndex.foreach { case (fieldWithOrdinal, idx) =>
-      val field = fieldWithOrdinal._1
-      val value = row.get(idx, field.dataType)
-      // Note that we cannot allocate a smaller buffer here even if the value is null
-      // because the effective byte array is considered variable size and needs to have
-      // the same size across all rows for the ordering to work as expected.
-      val bbuf = ByteBuffer.allocate(field.dataType.defaultSize + 1)
-      bbuf.order(ByteOrder.BIG_ENDIAN)
-      if (value == null) {
-        bbuf.put(nullValMarker)
-        writer.write(idx, bbuf.array())
-      } else {
-        field.dataType match {
-          case BooleanType =>
-          case ByteType =>
-            val byteVal = value.asInstanceOf[Byte]
-            val signCol = if (byteVal < 0) {
-              negativeValMarker
-            } else {
-              positiveValMarker
-            }
-            bbuf.put(signCol)
-            bbuf.put(byteVal)
-            writer.write(idx, bbuf.array())
-
-          case ShortType =>
-            val shortVal = value.asInstanceOf[Short]
-            val signCol = if (shortVal < 0) {
-              negativeValMarker
-            } else {
-              positiveValMarker
-            }
-            bbuf.put(signCol)
-            bbuf.putShort(shortVal)
-            writer.write(idx, bbuf.array())
-
-          case IntegerType =>
-            val intVal = value.asInstanceOf[Int]
-            val signCol = if (intVal < 0) {
-              negativeValMarker
-            } else {
-              positiveValMarker
-            }
-            bbuf.put(signCol)
-            bbuf.putInt(intVal)
-            writer.write(idx, bbuf.array())
-
-          case LongType =>
-            val longVal = value.asInstanceOf[Long]
-            val signCol = if (longVal < 0) {
-              negativeValMarker
-            } else {
-              positiveValMarker
-            }
-            bbuf.put(signCol)
-            bbuf.putLong(longVal)
-            writer.write(idx, bbuf.array())
-
-          case FloatType =>
-            val floatVal = value.asInstanceOf[Float]
-            val rawBits = floatToRawIntBits(floatVal)
-            // perform sign comparison using bit manipulation to ensure NaN values are handled
-            // correctly
-            if ((rawBits & floatSignBitMask) != 0) {
-              // for negative values, we need to flip all the bits to ensure correct ordering
-              val updatedVal = rawBits ^ floatFlipBitMask
-              bbuf.put(negativeValMarker)
-              // convert the bits back to float
-              bbuf.putFloat(intBitsToFloat(updatedVal))
-            } else {
-              bbuf.put(positiveValMarker)
-              bbuf.putFloat(floatVal)
-            }
-            writer.write(idx, bbuf.array())
-
-          case DoubleType =>
-            val doubleVal = value.asInstanceOf[Double]
-            val rawBits = doubleToRawLongBits(doubleVal)
-            // perform sign comparison using bit manipulation to ensure NaN values are handled
-            // correctly
-            if ((rawBits & doubleSignBitMask) != 0) {
-              // for negative values, we need to flip all the bits to ensure correct ordering
-              val updatedVal = rawBits ^ doubleFlipBitMask
-              bbuf.put(negativeValMarker)
-              // convert the bits back to double
-              bbuf.putDouble(longBitsToDouble(updatedVal))
-            } else {
-              bbuf.put(positiveValMarker)
-              bbuf.putDouble(doubleVal)
-            }
-            writer.write(idx, bbuf.array())
-        }
-      }
-    }
-    writer.getRow()
-  }
-
-  // Rewrite the unsafe row by converting back from BIG_ENDIAN byte arrays to the
-  // original data types.
-  // For decode, we extract the byte array from the UnsafeRow, and then read the first byte
-  // to determine if the value is null or not. If the value is null, we set the ordinal on
-  // the UnsafeRow to null. If the value is not null, we read the rest of the bytes to get the
-  // actual value.
-  // For negative float/double values, we need to flip all the bits back to get the original value.
-  private def decodePrefixKeyForRangeScan(row: UnsafeRow): UnsafeRow = {
-    val writer = new UnsafeRowWriter(orderingOrdinals.length)
-    writer.resetRowWriter()
-    rangeScanKeyFieldsWithOrdinal.zipWithIndex.foreach { case (fieldWithOrdinal, idx) =>
-      val field = fieldWithOrdinal._1
-
-      val value = row.getBinary(idx)
-      val bbuf = ByteBuffer.wrap(value.asInstanceOf[Array[Byte]])
-      bbuf.order(ByteOrder.BIG_ENDIAN)
-      val isNullOrSignCol = bbuf.get()
-      if (isNullOrSignCol == nullValMarker) {
-        // set the column to null and skip reading the next byte(s)
-        writer.setNullAt(idx)
-      } else {
-        field.dataType match {
-          case BooleanType =>
-          case ByteType =>
-            writer.write(idx, bbuf.get)
-
-          case ShortType =>
-            writer.write(idx, bbuf.getShort)
-
-          case IntegerType =>
-            writer.write(idx, bbuf.getInt)
-
-          case LongType =>
-            writer.write(idx, bbuf.getLong)
-
-          case FloatType =>
-            if (isNullOrSignCol == negativeValMarker) {
-              // if the number is negative, get the raw binary bits for the float
-              // and flip the bits back
-              val updatedVal = floatToRawIntBits(bbuf.getFloat) ^ floatFlipBitMask
-              writer.write(idx, intBitsToFloat(updatedVal))
-            } else {
-              writer.write(idx, bbuf.getFloat)
-            }
-
-          case DoubleType =>
-            if (isNullOrSignCol == negativeValMarker) {
-              // if the number is negative, get the raw binary bits for the double
-              // and flip the bits back
-              val updatedVal = doubleToRawLongBits(bbuf.getDouble) ^ doubleFlipBitMask
-              writer.write(idx, longBitsToDouble(updatedVal))
-            } else {
-              writer.write(idx, bbuf.getDouble)
-            }
-        }
-      }
-    }
-    writer.getRow()
-  }
-
   override def encodeKey(row: UnsafeRow): Array[Byte] = {
-    // This prefix key has the columns specified by orderingOrdinals
+    // First encode the range scan ordered prefix
     val prefixKey = extractPrefixKey(row)
-    val rangeScanKeyEncoded = encodeUnsafeRow(encodePrefixKeyForRangeScan(prefixKey))
+    val rangeScanKeyEncoded = dataEncoder.encodePrefixKeyForRangeScan(prefixKey)
 
-    val result = if (orderingOrdinals.length < keySchema.length) {
-      val remainingEncoded = encodeUnsafeRow(remainingKeyProjection(row))
-      val (encodedBytes, startingOffset) = encodeColumnFamilyPrefix(
-        rangeScanKeyEncoded.length + remainingEncoded.length + 4
-      )
+    // We have remaining key parts to encode
+    val remainingEncoded = dataEncoder.encodeRemainingKey(remainingKeyProjection(row))
 
-      Platform.putInt(encodedBytes, startingOffset,
-        rangeScanKeyEncoded.length)
-      Platform.copyMemory(rangeScanKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
-        encodedBytes, startingOffset + 4, rangeScanKeyEncoded.length)
-      // NOTE: We don't put the length of remainingEncoded as we can calculate later
-      // on deserialization.
-      Platform.copyMemory(remainingEncoded, Platform.BYTE_ARRAY_OFFSET,
-        encodedBytes, startingOffset + 4 + rangeScanKeyEncoded.length,
-        remainingEncoded.length)
-      encodedBytes
-    } else {
-      // if the num of ordering cols is same as num of key schema cols, we don't need to
-      // encode the remaining key as it's empty.
-      val (encodedBytes, startingOffset) = encodeColumnFamilyPrefix(
-        rangeScanKeyEncoded.length + 4
-      )
+    // Combine range scan key and remaining key with length prefix
+    val combinedData = new Array[Byte](4 + rangeScanKeyEncoded.length + remainingEncoded.length)
 
-      Platform.putInt(encodedBytes, startingOffset,
-        rangeScanKeyEncoded.length)
-      Platform.copyMemory(rangeScanKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
-        encodedBytes, startingOffset + 4, rangeScanKeyEncoded.length)
-      encodedBytes
-    }
-    result
+    // Write length of range scan key
+    Platform.putInt(combinedData, Platform.BYTE_ARRAY_OFFSET, rangeScanKeyEncoded.length)
+
+    // Write range scan key
+    Platform.copyMemory(
+      rangeScanKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
+      combinedData, Platform.BYTE_ARRAY_OFFSET + 4,
+      rangeScanKeyEncoded.length
+    )
+    // Write remaining key
+    Platform.copyMemory(
+      remainingEncoded, Platform.BYTE_ARRAY_OFFSET,
+      combinedData, Platform.BYTE_ARRAY_OFFSET + 4 + rangeScanKeyEncoded.length,
+      remainingEncoded.length
+    )
+
+    encodeStateRowWithPrefix(combinedData)
   }
 
   override def decodeKey(keyBytes: Array[Byte]): UnsafeRow = {
-    val prefixKeyEncodedLen = Platform.getInt(keyBytes, decodeKeyStartOffset)
+    // First decode metadata prefixes to get the actual key data
+    val keyData = decodeStateRowData(keyBytes)
+
+    // Get range scan key length and extract it
+    val prefixKeyEncodedLen = Platform.getInt(keyData, Platform.BYTE_ARRAY_OFFSET)
     val prefixKeyEncoded = new Array[Byte](prefixKeyEncodedLen)
-    Platform.copyMemory(keyBytes, decodeKeyStartOffset + 4,
-      prefixKeyEncoded, Platform.BYTE_ARRAY_OFFSET, prefixKeyEncodedLen)
+    Platform.copyMemory(
+      keyData, Platform.BYTE_ARRAY_OFFSET + 4,
+      prefixKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
+      prefixKeyEncodedLen
+    )
 
-    val prefixKeyDecodedForRangeScan = decodeToUnsafeRow(prefixKeyEncoded,
-      numFields = orderingOrdinals.length)
-    val prefixKeyDecoded = decodePrefixKeyForRangeScan(prefixKeyDecodedForRangeScan)
+    // Decode the range scan prefix key
+    val prefixKeyDecoded = dataEncoder.decodePrefixKeyForRangeScan(prefixKeyEncoded)
 
     if (orderingOrdinals.length < keySchema.length) {
-      // Here we calculate the remainingKeyEncodedLen leveraging the length of keyBytes
-      val remainingKeyEncodedLen = keyBytes.length - 4 -
-        prefixKeyEncodedLen - offsetForColFamilyPrefix
-
+      // We have remaining key parts to decode
+      val remainingKeyEncodedLen = keyData.length - 4 - prefixKeyEncodedLen
       val remainingKeyEncoded = new Array[Byte](remainingKeyEncodedLen)
-      Platform.copyMemory(keyBytes, decodeKeyStartOffset + 4 + prefixKeyEncodedLen,
+      Platform.copyMemory(
+        keyData, Platform.BYTE_ARRAY_OFFSET + 4 + prefixKeyEncodedLen,
         remainingKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
-        remainingKeyEncodedLen)
+        remainingKeyEncodedLen
+      )
 
-      val remainingKeyDecoded = decodeToUnsafeRow(remainingKeyEncoded,
-        numFields = keySchema.length - orderingOrdinals.length)
+      // Decode remaining key
+      val remainingKeyDecoded = dataEncoder.decodeRemainingKey(remainingKeyEncoded)
 
+      // Combine the parts and restore full key
       val joined = joinedRowOnKey.withLeft(prefixKeyDecoded).withRight(remainingKeyDecoded)
-      val restored = restoreKeyProjection(joined)
-      restored
+      restoreKeyProjection(joined)
     } else {
-      // if the number of ordering cols is same as the number of key schema cols, we only
-      // return the prefix key decoded unsafe row.
+      // No remaining key parts - return just the prefix key
       prefixKeyDecoded
     }
   }
 
   override def encodePrefixKey(prefixKey: UnsafeRow): Array[Byte] = {
-    val rangeScanKeyEncoded = encodeUnsafeRow(encodePrefixKeyForRangeScan(prefixKey))
-    val (prefix, startingOffset) = encodeColumnFamilyPrefix(rangeScanKeyEncoded.length + 4)
+    // First encode the range scan ordered prefix
+    val rangeScanKeyEncoded = dataEncoder.encodePrefixKeyForRangeScan(prefixKey)
+
+    // Add length prefix
+    val dataWithLength = new Array[Byte](4 + rangeScanKeyEncoded.length)
+    Platform.putInt(dataWithLength, Platform.BYTE_ARRAY_OFFSET, rangeScanKeyEncoded.length)
+    Platform.copyMemory(
+      rangeScanKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
+      dataWithLength, Platform.BYTE_ARRAY_OFFSET + 4,
+      rangeScanKeyEncoded.length
+    )
 
-    Platform.putInt(prefix, startingOffset, rangeScanKeyEncoded.length)
-    Platform.copyMemory(rangeScanKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
-      prefix, startingOffset + 4, rangeScanKeyEncoded.length)
-    prefix
+    encodeStateRowWithPrefix(dataWithLength)
   }
 
   override def supportPrefixKeyScan: Boolean = true
@@ -659,53 +1522,56 @@ class RangeKeyScanStateEncoder(
  *    then the generated array byte will be N+1 bytes.
  */
 class NoPrefixKeyStateEncoder(
+    dataEncoder: RocksDBDataEncoder,
     keySchema: StructType,
     useColumnFamilies: Boolean = false,
-    virtualColFamilyId: Option[Short] = None)
-  extends RocksDBKeyStateEncoderBase(useColumnFamilies, virtualColFamilyId) {
-
-  import RocksDBStateEncoder._
-
-  // Reusable objects
-  private val keyRow = new UnsafeRow(keySchema.size)
+    columnFamilyInfo: Option[ColumnFamilyInfo] = None)
+  extends StateRowPrefixEncoder(
+    useColumnFamilies,
+    columnFamilyInfo
+  ) with RocksDBKeyStateEncoder with Logging {
 
   override def encodeKey(row: UnsafeRow): Array[Byte] = {
     if (!useColumnFamilies) {
-      encodeUnsafeRow(row)
+      encodeStateRowWithPrefix(dataEncoder.encodeKey(row))
     } else {
-      val bytesToEncode = row.getBytes
-      val (encodedBytes, startingOffset) = encodeColumnFamilyPrefix(
-        bytesToEncode.length +
-          STATE_ENCODING_NUM_VERSION_BYTES
-      )
+      // First encode the row with the data encoder
+      val rowBytes = dataEncoder.encodeKey(row)
 
-      Platform.putByte(encodedBytes, startingOffset, STATE_ENCODING_VERSION)
-      // Platform.BYTE_ARRAY_OFFSET is the recommended way to memcopy b/w byte arrays. See Platform.
+      // Create data array with version byte
+      val dataWithVersion = new Array[Byte](STATE_ENCODING_NUM_VERSION_BYTES + rowBytes.length)
+      Platform.putByte(dataWithVersion, Platform.BYTE_ARRAY_OFFSET, STATE_ENCODING_VERSION)
       Platform.copyMemory(
-        bytesToEncode, Platform.BYTE_ARRAY_OFFSET,
-        encodedBytes, startingOffset + STATE_ENCODING_NUM_VERSION_BYTES, bytesToEncode.length)
-      encodedBytes
+        rowBytes, Platform.BYTE_ARRAY_OFFSET,
+        dataWithVersion, Platform.BYTE_ARRAY_OFFSET + STATE_ENCODING_NUM_VERSION_BYTES,
+        rowBytes.length
+      )
+
+      encodeStateRowWithPrefix(dataWithVersion)
     }
   }
 
-  /**
-   * Decode byte array for a key to a UnsafeRow.
-   * @note The UnsafeRow returned is reused across calls, and the UnsafeRow just points to
-   *       the given byte array.
-   */
   override def decodeKey(keyBytes: Array[Byte]): UnsafeRow = {
-    if (useColumnFamilies) {
-      if (keyBytes != null) {
-        // Platform.BYTE_ARRAY_OFFSET is the recommended way refer to the 1st offset. See Platform.
-        keyRow.pointTo(
-          keyBytes,
-          decodeKeyStartOffset + STATE_ENCODING_NUM_VERSION_BYTES,
-          keyBytes.length - STATE_ENCODING_NUM_VERSION_BYTES - VIRTUAL_COL_FAMILY_PREFIX_BYTES)
-        keyRow
-      } else {
-        null
-      }
-    } else decodeToUnsafeRow(keyBytes, keyRow)
+    if (!useColumnFamilies) {
+      dataEncoder.decodeKey(decodeStateRowData(keyBytes))
+    } else if (keyBytes == null) {
+      null
+    } else {
+      // First decode the metadata prefixes
+      val dataWithVersion = decodeStateRowData(keyBytes)
+
+      // Skip version byte to get to actual data
+      val dataLength = dataWithVersion.length - STATE_ENCODING_NUM_VERSION_BYTES
+
+      // Extract data bytes and decode using data encoder
+      val dataBytes = new Array[Byte](dataLength)
+      Platform.copyMemory(
+        dataWithVersion, Platform.BYTE_ARRAY_OFFSET + STATE_ENCODING_NUM_VERSION_BYTES,
+        dataBytes, Platform.BYTE_ARRAY_OFFSET,
+        dataLength
+      )
+      dataEncoder.decodeKey(dataBytes)
+    }
   }
 
   override def supportPrefixKeyScan: Boolean = false
@@ -728,35 +1594,42 @@ class NoPrefixKeyStateEncoder(
  * merged in RocksDB using merge operation, and all merged values can be read using decodeValues
  * operation.
  */
-class MultiValuedStateEncoder(valueSchema: StructType)
+class MultiValuedStateEncoder(
+    dataEncoder: RocksDBDataEncoder,
+    valueSchema: StructType)
   extends RocksDBValueStateEncoder with Logging {
 
-  import RocksDBStateEncoder._
-
-  // Reusable objects
-  private val valueRow = new UnsafeRow(valueSchema.size)
-
   override def encodeValue(row: UnsafeRow): Array[Byte] = {
-    val bytes = encodeUnsafeRow(row)
-    val numBytes = bytes.length
+    // First encode the row using either Avro or UnsafeRow encoding
+    val rowBytes = dataEncoder.encodeValue(row)
 
-    val encodedBytes = new Array[Byte](java.lang.Integer.BYTES + bytes.length)
-    Platform.putInt(encodedBytes, Platform.BYTE_ARRAY_OFFSET, numBytes)
-    Platform.copyMemory(bytes, Platform.BYTE_ARRAY_OFFSET,
-      encodedBytes, java.lang.Integer.BYTES + Platform.BYTE_ARRAY_OFFSET, bytes.length)
+    // Create data array with length prefix
+    val dataWithLength = new Array[Byte](java.lang.Integer.BYTES + rowBytes.length)
+    Platform.putInt(dataWithLength, Platform.BYTE_ARRAY_OFFSET, rowBytes.length)
+    Platform.copyMemory(
+      rowBytes, Platform.BYTE_ARRAY_OFFSET,
+      dataWithLength, Platform.BYTE_ARRAY_OFFSET + java.lang.Integer.BYTES,
+      rowBytes.length
+    )
 
-    encodedBytes
+    dataWithLength
   }
 
   override def decodeValue(valueBytes: Array[Byte]): UnsafeRow = {
     if (valueBytes == null) {
       null
     } else {
-      val numBytes = Platform.getInt(valueBytes, Platform.BYTE_ARRAY_OFFSET)
+      // First decode the metadata prefixes
+      val dataWithLength = valueBytes
+      // Get the value length and extract value bytes
+      val numBytes = Platform.getInt(dataWithLength, Platform.BYTE_ARRAY_OFFSET)
       val encodedValue = new Array[Byte](numBytes)
-      Platform.copyMemory(valueBytes, java.lang.Integer.BYTES + Platform.BYTE_ARRAY_OFFSET,
-        encodedValue, Platform.BYTE_ARRAY_OFFSET, numBytes)
-      decodeToUnsafeRow(encodedValue, valueRow)
+      Platform.copyMemory(
+        dataWithLength, Platform.BYTE_ARRAY_OFFSET + java.lang.Integer.BYTES,
+        encodedValue, Platform.BYTE_ARRAY_OFFSET,
+        numBytes
+      )
+      dataEncoder.decodeValue(encodedValue)
     }
   }
 
@@ -768,21 +1641,23 @@ class MultiValuedStateEncoder(valueSchema: StructType)
         private var pos: Int = Platform.BYTE_ARRAY_OFFSET
         private val maxPos = Platform.BYTE_ARRAY_OFFSET + valueBytes.length
 
-        override def hasNext: Boolean = {
-          pos < maxPos
-        }
+        override def hasNext: Boolean = pos < maxPos
 
         override def next(): UnsafeRow = {
+          // Get value length
           val numBytes = Platform.getInt(valueBytes, pos)
-
           pos += java.lang.Integer.BYTES
-          val encodedValue = new Array[Byte](numBytes)
-          Platform.copyMemory(valueBytes, pos,
-            encodedValue, Platform.BYTE_ARRAY_OFFSET, numBytes)
 
+          // Extract value bytes
+          val encodedValue = new Array[Byte](numBytes)
+          Platform.copyMemory(
+            valueBytes, pos,
+            encodedValue, Platform.BYTE_ARRAY_OFFSET,
+            numBytes
+          )
           pos += numBytes
           pos += 1 // eat the delimiter character
-          decodeToUnsafeRow(encodedValue, valueRow)
+          dataEncoder.decodeValue(encodedValue)
         }
       }
     }
@@ -803,24 +1678,23 @@ class MultiValuedStateEncoder(valueSchema: StructType)
  *    (offset 0 is the version byte of value 0). That is, if the unsafe row has N bytes,
  *    then the generated array byte will be N+1 bytes.
  */
-class SingleValueStateEncoder(valueSchema: StructType)
-  extends RocksDBValueStateEncoder {
-
-  import RocksDBStateEncoder._
-
-  // Reusable objects
-  private val valueRow = new UnsafeRow(valueSchema.size)
+class SingleValueStateEncoder(
+    dataEncoder: RocksDBDataEncoder,
+    valueSchema: StructType)
+  extends RocksDBValueStateEncoder with Logging {
 
-  override def encodeValue(row: UnsafeRow): Array[Byte] = encodeUnsafeRow(row)
+  override def encodeValue(row: UnsafeRow): Array[Byte] = {
+    dataEncoder.encodeValue(row)
+  }
 
-  /**
-   * Decode byte array for a value to a UnsafeRow.
-   *
-   * @note The UnsafeRow returned is reused across calls, and the UnsafeRow just points to
-   *       the given byte array.
-   */
   override def decodeValue(valueBytes: Array[Byte]): UnsafeRow = {
-    decodeToUnsafeRow(valueBytes, valueRow)
+    if (valueBytes == null) {
+      return null
+    }
+    // First decode the metadata prefixes
+    val data = valueBytes
+    // Decode the actual value using either Avro or UnsafeRow
+    dataEncoder.decodeValue(data)
   }
 
   override def supportsMultipleValuesPerKey: Boolean = false
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
index 1fc6ab5910c6c..9fc48a60d7c6a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.streaming.state
 
 import java.io._
-import java.util.concurrent.ConcurrentHashMap
+import java.util.UUID
+import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 
 import scala.util.control.NonFatal
 
@@ -31,9 +32,9 @@ import org.apache.spark.internal.LogKeys._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.execution.streaming.CheckpointFileManager
+import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, StreamExecution}
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{NonFateSharingCache, Utils}
 
 private[sql] class RocksDBStateStoreProvider
   extends StateStoreProvider with Logging with Closeable
@@ -74,10 +75,30 @@ private[sql] class RocksDBStateStoreProvider
         isInternal: Boolean = false): Unit = {
       verifyColFamilyCreationOrDeletion("create_col_family", colFamilyName, isInternal)
       val newColFamilyId = rocksDB.createColFamilyIfAbsent(colFamilyName)
-      keyValueEncoderMap.putIfAbsent(colFamilyName,
-        (RocksDBStateEncoder.getKeyEncoder(keyStateEncoderSpec, useColumnFamilies,
-          Some(newColFamilyId)), RocksDBStateEncoder.getValueEncoder(valueSchema,
-          useMultipleValuesPerKey)))
+      val dataEncoderCacheKey = StateRowEncoderCacheKey(
+        queryRunId = getRunId(hadoopConf),
+        operatorId = stateStoreId.operatorId,
+        partitionId = stateStoreId.partitionId,
+        stateStoreName = stateStoreId.storeName,
+        colFamilyName = colFamilyName)
+
+      val dataEncoder = getDataEncoder(
+        stateStoreEncoding, dataEncoderCacheKey, keyStateEncoderSpec, valueSchema)
+
+      val columnFamilyInfo = Some(ColumnFamilyInfo(colFamilyName, newColFamilyId))
+
+      val keyEncoder = RocksDBStateEncoder.getKeyEncoder(
+        dataEncoder,
+        keyStateEncoderSpec,
+        useColumnFamilies,
+        columnFamilyInfo
+      )
+      val valueEncoder = RocksDBStateEncoder.getValueEncoder(
+        dataEncoder,
+        valueSchema,
+        useMultipleValuesPerKey
+      )
+      keyValueEncoderMap.putIfAbsent(colFamilyName, (keyEncoder, valueEncoder))
     }
 
     override def get(key: UnsafeRow, colFamilyName: String): UnsafeRow = {
@@ -364,6 +385,7 @@ private[sql] class RocksDBStateStoreProvider
     this.storeConf = storeConf
     this.hadoopConf = hadoopConf
     this.useColumnFamilies = useColumnFamilies
+    this.stateStoreEncoding = storeConf.stateStoreEncodingFormat
 
     if (useMultipleValuesPerKey) {
       require(useColumnFamilies, "Multiple values per key support requires column families to be" +
@@ -373,14 +395,35 @@ private[sql] class RocksDBStateStoreProvider
     rocksDB // lazy initialization
     var defaultColFamilyId: Option[Short] = None
 
-    if (useColumnFamilies) {
+    val dataEncoderCacheKey = StateRowEncoderCacheKey(
+      queryRunId = getRunId(hadoopConf),
+      operatorId = stateStoreId.operatorId,
+      partitionId = stateStoreId.partitionId,
+      stateStoreName = stateStoreId.storeName,
+      colFamilyName = StateStore.DEFAULT_COL_FAMILY_NAME)
+
+    val dataEncoder = getDataEncoder(
+      stateStoreEncoding, dataEncoderCacheKey, keyStateEncoderSpec, valueSchema)
+
+    val columnFamilyInfo = if (useColumnFamilies) {
       defaultColFamilyId = Some(rocksDB.createColFamilyIfAbsent(StateStore.DEFAULT_COL_FAMILY_NAME))
+      Some(ColumnFamilyInfo(StateStore.DEFAULT_COL_FAMILY_NAME, defaultColFamilyId.get))
+    } else {
+      None
     }
 
-    keyValueEncoderMap.putIfAbsent(StateStore.DEFAULT_COL_FAMILY_NAME,
-      (RocksDBStateEncoder.getKeyEncoder(keyStateEncoderSpec,
-        useColumnFamilies, defaultColFamilyId),
-        RocksDBStateEncoder.getValueEncoder(valueSchema, useMultipleValuesPerKey)))
+    val keyEncoder = RocksDBStateEncoder.getKeyEncoder(
+      dataEncoder,
+      keyStateEncoderSpec,
+      useColumnFamilies,
+      columnFamilyInfo
+    )
+    val valueEncoder = RocksDBStateEncoder.getValueEncoder(
+      dataEncoder,
+      valueSchema,
+      useMultipleValuesPerKey
+    )
+    keyValueEncoderMap.putIfAbsent(StateStore.DEFAULT_COL_FAMILY_NAME, (keyEncoder, valueEncoder))
   }
 
   override def stateStoreId: StateStoreId = stateStoreId_
@@ -396,7 +439,8 @@ private[sql] class RocksDBStateStoreProvider
       new RocksDBStateStore(version)
     }
     catch {
-      case e: SparkException if e.getCondition.contains("CANNOT_LOAD_STATE_STORE") =>
+      case e: SparkException
+        if Option(e.getCondition).exists(_.contains("CANNOT_LOAD_STATE_STORE")) =>
         throw e
       case e: OutOfMemoryError =>
         throw QueryExecutionErrors.notEnoughMemoryToLoadStore(
@@ -419,7 +463,8 @@ private[sql] class RocksDBStateStoreProvider
       new RocksDBStateStore(version)
     }
     catch {
-      case e: SparkException if e.getCondition.contains("CANNOT_LOAD_STATE_STORE") =>
+      case e: SparkException
+        if Option(e.getCondition).exists(_.contains("CANNOT_LOAD_STATE_STORE")) =>
         throw e
       case e: OutOfMemoryError =>
         throw QueryExecutionErrors.notEnoughMemoryToLoadStore(
@@ -458,6 +503,7 @@ private[sql] class RocksDBStateStoreProvider
   @volatile private var storeConf: StateStoreConf = _
   @volatile private var hadoopConf: Configuration = _
   @volatile private var useColumnFamilies: Boolean = _
+  @volatile private var stateStoreEncoding: String = _
 
   private[sql] lazy val rocksDB = {
     val dfsRootDir = stateStoreId.storeCheckpointLocation().toString
@@ -587,11 +633,80 @@ private[sql] class RocksDBStateStoreProvider
   }
 }
 
+
+case class StateRowEncoderCacheKey(
+    queryRunId: String,
+    operatorId: Long,
+    partitionId: Int,
+    stateStoreName: String,
+    colFamilyName: String
+)
+
 object RocksDBStateStoreProvider {
   // Version as a single byte that specifies the encoding of the row data in RocksDB
   val STATE_ENCODING_NUM_VERSION_BYTES = 1
   val STATE_ENCODING_VERSION: Byte = 0
   val VIRTUAL_COL_FAMILY_PREFIX_BYTES = 2
+  val SCHEMA_ID_PREFIX_BYTES = 2
+
+  private val MAX_AVRO_ENCODERS_IN_CACHE = 1000
+  private val AVRO_ENCODER_LIFETIME_HOURS = 1L
+  private val DEFAULT_SCHEMA_IDS = StateSchemaInfo(0, 0)
+
+  // Add the cache at companion object level so it persists across provider instances
+  private val dataEncoderCache: NonFateSharingCache[StateRowEncoderCacheKey, RocksDBDataEncoder] =
+    NonFateSharingCache(
+      maximumSize = MAX_AVRO_ENCODERS_IN_CACHE,
+      expireAfterAccessTime = AVRO_ENCODER_LIFETIME_HOURS,
+      expireAfterAccessTimeUnit = TimeUnit.HOURS
+    )
+
+  /**
+   * Creates and returns a data encoder for the state store based on the specified encoding type.
+   * This method handles caching of encoders to improve performance by reusing encoder instances
+   * when possible.
+   *
+   * The method supports two encoding types:
+   * - Avro: Uses Apache Avro for serialization with schema evolution support
+   * - UnsafeRow: Uses Spark's internal row format for optimal performance
+   *
+   * @param stateStoreEncoding The encoding type to use ("avro" or "unsaferow")
+   * @param encoderCacheKey A unique key for caching the encoder instance, typically combining
+   *                       query ID, operator ID, partition ID, and column family name
+   * @param keyStateEncoderSpec Specification for how to encode keys, including schema and any
+   *                           prefix/range scan requirements
+   * @param valueSchema The schema for the values to be encoded
+   * @return A RocksDBDataEncoder instance configured for the specified encoding type
+   */
+  def getDataEncoder(
+      stateStoreEncoding: String,
+      encoderCacheKey: StateRowEncoderCacheKey,
+      keyStateEncoderSpec: KeyStateEncoderSpec,
+      valueSchema: StructType): RocksDBDataEncoder = {
+    assert(Set("avro", "unsaferow").contains(stateStoreEncoding))
+    RocksDBStateStoreProvider.dataEncoderCache.get(
+      encoderCacheKey,
+      new java.util.concurrent.Callable[RocksDBDataEncoder] {
+        override def call(): RocksDBDataEncoder = {
+          if (stateStoreEncoding == "avro") {
+            new AvroStateEncoder(keyStateEncoderSpec, valueSchema, Some(DEFAULT_SCHEMA_IDS))
+          } else {
+            new UnsafeRowDataEncoder(keyStateEncoderSpec, valueSchema, None)
+          }
+        }
+      }
+    )
+  }
+
+  private def getRunId(hadoopConf: Configuration): String = {
+    val runId = hadoopConf.get(StreamExecution.RUN_ID_KEY)
+    if (runId != null) {
+      runId
+    } else {
+      assert(Utils.isTesting, "Failed to find query id/batch Id in task context")
+      UUID.randomUUID().toString
+    }
+  }
 
   // Native operation latencies report as latency in microseconds
   // as SQLMetrics support millis. Convert the value to millis
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
index 721d72b6a0991..48b15ac04f40b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
@@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.sql.avro.{AvroDeserializer, AvroSerializer}
 import org.apache.spark.sql.catalyst.util.UnsafeRowUtils
 import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, StatefulOperatorStateInfo}
 import org.apache.spark.sql.execution.streaming.state.SchemaHelper.{SchemaReader, SchemaWriter}
@@ -37,6 +38,30 @@ case class StateSchemaValidationResult(
     schemaPath: String
 )
 
+/**
+ * An Avro-based encoder used for serializing between UnsafeRow and Avro
+ *  byte arrays in RocksDB state stores.
+ *
+ * This encoder is primarily utilized by [[RocksDBStateStoreProvider]] and [[RocksDBStateEncoder]]
+ * to handle serialization and deserialization of state store data.
+ *
+ * @param keySerializer Serializer for converting state store keys to Avro format
+ * @param keyDeserializer Deserializer for converting Avro-encoded keys back to UnsafeRow
+ * @param valueSerializer Serializer for converting state store values to Avro format
+ * @param valueDeserializer Deserializer for converting Avro-encoded values back to UnsafeRow
+ * @param suffixKeySerializer Optional serializer for handling suffix keys in Avro format
+ * @param suffixKeyDeserializer Optional deserializer for converting Avro-encoded suffix
+ *                              keys back to UnsafeRow
+ */
+case class AvroEncoder(
+  keySerializer: AvroSerializer,
+  keyDeserializer: AvroDeserializer,
+  valueSerializer: AvroSerializer,
+  valueDeserializer: AvroDeserializer,
+  suffixKeySerializer: Option[AvroSerializer] = None,
+  suffixKeyDeserializer: Option[AvroDeserializer] = None
+) extends Serializable
+
 // Used to represent the schema of a column family in the state store
 case class StateStoreColFamilySchema(
     colFamilyName: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index 72bc3ca33054d..33df8ad42747c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -37,10 +37,22 @@ import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.util.UnsafeRowUtils
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
-import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo
+import org.apache.spark.sql.execution.streaming.{StatefulOperatorStateInfo, StreamExecution}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.{NextIterator, ThreadUtils, Utils}
 
+sealed trait StateStoreEncoding {
+  override def toString: String = this match {
+    case StateStoreEncoding.UnsafeRow => "unsaferow"
+    case StateStoreEncoding.Avro => "avro"
+  }
+}
+
+object StateStoreEncoding {
+  case object UnsafeRow extends StateStoreEncoding
+  case object Avro extends StateStoreEncoding
+}
+
 /**
  * Base trait for a versioned key-value store which provides read operations. Each instance of a
  * `ReadStateStore` represents a specific version of state data, and such instances are created
@@ -310,8 +322,22 @@ case class StateStoreCustomTimingMetric(name: String, desc: String) extends Stat
 }
 
 sealed trait KeyStateEncoderSpec {
+  def keySchema: StructType
   def jsonValue: JValue
   def json: String = compact(render(jsonValue))
+
+  /**
+   * Creates a RocksDBKeyStateEncoder for this specification.
+   *
+   * @param dataEncoder The encoder to handle the actual data encoding/decoding
+   * @param useColumnFamilies Whether to use RocksDB column families
+   * @param virtualColFamilyId Optional column family ID when column families are used
+   * @return A RocksDBKeyStateEncoder configured for this spec
+   */
+  def toEncoder(
+      dataEncoder: RocksDBDataEncoder,
+      useColumnFamilies: Boolean,
+      columnFamilyInfo: Option[ColumnFamilyInfo]): RocksDBKeyStateEncoder
 }
 
 object KeyStateEncoderSpec {
@@ -335,6 +361,14 @@ case class NoPrefixKeyStateEncoderSpec(keySchema: StructType) extends KeyStateEn
   override def jsonValue: JValue = {
     ("keyStateEncoderType" -> JString("NoPrefixKeyStateEncoderSpec"))
   }
+
+  override def toEncoder(
+      dataEncoder: RocksDBDataEncoder,
+      useColumnFamilies: Boolean,
+      columnFamilyInfo: Option[ColumnFamilyInfo]): RocksDBKeyStateEncoder = {
+    new NoPrefixKeyStateEncoder(
+      dataEncoder, keySchema, useColumnFamilies, columnFamilyInfo)
+  }
 }
 
 case class PrefixKeyScanStateEncoderSpec(
@@ -344,6 +378,15 @@ case class PrefixKeyScanStateEncoderSpec(
     throw StateStoreErrors.incorrectNumOrderingColsForPrefixScan(numColsPrefixKey.toString)
   }
 
+  override def toEncoder(
+      dataEncoder: RocksDBDataEncoder,
+      useColumnFamilies: Boolean,
+      columnFamilyInfo: Option[ColumnFamilyInfo]): RocksDBKeyStateEncoder = {
+    new PrefixKeyScanStateEncoder(
+      dataEncoder, keySchema, numColsPrefixKey, useColumnFamilies, columnFamilyInfo)
+  }
+
+
   override def jsonValue: JValue = {
     ("keyStateEncoderType" -> JString("PrefixKeyScanStateEncoderSpec")) ~
       ("numColsPrefixKey" -> JInt(numColsPrefixKey))
@@ -358,6 +401,14 @@ case class RangeKeyScanStateEncoderSpec(
     throw StateStoreErrors.incorrectNumOrderingColsForRangeScan(orderingOrdinals.length.toString)
   }
 
+  override def toEncoder(
+      dataEncoder: RocksDBDataEncoder,
+      useColumnFamilies: Boolean,
+      columnFamilyInfo: Option[ColumnFamilyInfo]): RocksDBKeyStateEncoder = {
+    new RangeKeyScanStateEncoder(
+      dataEncoder, keySchema, orderingOrdinals, useColumnFamilies, columnFamilyInfo)
+  }
+
   override def jsonValue: JValue = {
     ("keyStateEncoderType" -> JString("RangeKeyScanStateEncoderSpec")) ~
       ("orderingOrdinals" -> orderingOrdinals.map(JInt(_)))
@@ -746,6 +797,7 @@ object StateStore extends Logging {
       storeConf: StateStoreConf,
       hadoopConf: Configuration,
       useMultipleValuesPerKey: Boolean = false): ReadStateStore = {
+    hadoopConf.set(StreamExecution.RUN_ID_KEY, storeProviderId.queryRunId.toString)
     if (version < 0) {
       throw QueryExecutionErrors.unexpectedStateStoreVersion(version)
     }
@@ -766,9 +818,11 @@ object StateStore extends Logging {
       storeConf: StateStoreConf,
       hadoopConf: Configuration,
       useMultipleValuesPerKey: Boolean = false): StateStore = {
+    hadoopConf.set(StreamExecution.RUN_ID_KEY, storeProviderId.queryRunId.toString)
     if (version < 0) {
       throw QueryExecutionErrors.unexpectedStateStoreVersion(version)
     }
+    hadoopConf.set(StreamExecution.RUN_ID_KEY, storeProviderId.queryRunId.toString)
     val storeProvider = getStateStoreProvider(storeProviderId, keySchema, valueSchema,
       keyStateEncoderSpec, useColumnFamilies, storeConf, hadoopConf, useMultipleValuesPerKey)
     storeProvider.getStore(version, stateStoreCkptId)
@@ -923,7 +977,8 @@ object StateStore extends Logging {
           } finally {
             val duration = System.currentTimeMillis() - startTime
             val logMsg =
-              log"Finished maintenance task for provider=${MDC(LogKeys.STATE_STORE_PROVIDER, id)}" +
+              log"Finished maintenance task for " +
+                log"provider=${MDC(LogKeys.STATE_STORE_PROVIDER_ID, id)}" +
                 log" in elapsed_time=${MDC(LogKeys.TIME_UNITS, duration)}\n"
             if (duration > 5000) {
               logInfo(logMsg)
@@ -953,9 +1008,9 @@ object StateStore extends Logging {
         .map(_.reportActiveInstance(storeProviderId, host, executorId, otherProviderIds))
         .getOrElse(Seq.empty[StateStoreProviderId])
       logInfo(log"Reported that the loaded instance " +
-        log"${MDC(LogKeys.STATE_STORE_PROVIDER, storeProviderId)} is active")
+        log"${MDC(LogKeys.STATE_STORE_PROVIDER_ID, storeProviderId)} is active")
       logDebug(log"The loaded instances are going to unload: " +
-        log"${MDC(LogKeys.STATE_STORE_PROVIDER, providerIdsToUnload.mkString(", "))}")
+        log"${MDC(LogKeys.STATE_STORE_PROVIDER_IDS, providerIdsToUnload)}")
       providerIdsToUnload
     } else {
       Seq.empty[StateStoreProviderId]
@@ -987,7 +1042,7 @@ object StateStore extends Logging {
         _coordRef = StateStoreCoordinatorRef.forExecutor(env)
       }
       logInfo(log"Retrieved reference to StateStoreCoordinator: " +
-        log"${MDC(LogKeys.STATE_STORE_PROVIDER, _coordRef)}")
+        log"${MDC(LogKeys.STATE_STORE_COORDINATOR, _coordRef)}")
       Some(_coordRef)
     } else {
       _coordRef = null
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangelog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangelog.scala
index 203af9d10217e..b4fbb5560f2f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangelog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangelog.scala
@@ -24,6 +24,8 @@ import scala.util.control.NonFatal
 import com.google.common.io.ByteStreams
 import org.apache.commons.io.IOUtils
 import org.apache.hadoop.fs.{FSError, Path}
+import org.json4s._
+import org.json4s.jackson.Serialization
 
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys._
@@ -78,6 +80,14 @@ object RecordType extends Enumeration {
   }
 }
 
+/**
+ * Class for lineage item for checkpoint format V2.
+ */
+case class LineageItem(
+    version: Long,
+    checkpointUniqueId: String
+)
+
 /**
  * Base class for state store changelog writer
  * @param fm - checkpoint file manager used to manage streaming query checkpoint
@@ -89,18 +99,27 @@ abstract class StateStoreChangelogWriter(
     file: Path,
     compressionCodec: CompressionCodec) extends Logging {
 
+  implicit val formats: Formats = DefaultFormats
+
   private def compressStream(outputStream: DataOutputStream): DataOutputStream = {
     val compressed = compressionCodec.compressedOutputStream(outputStream)
     new DataOutputStream(compressed)
   }
 
+  protected var backingFileStream: CancellableFSDataOutputStream =
+    fm.createAtomic(file, overwriteIfPossible = true)
+  protected var compressedStream: DataOutputStream = compressStream(backingFileStream)
+
   protected def writeVersion(): Unit = {
     compressedStream.writeUTF(s"v${version}")
   }
 
-  protected var backingFileStream: CancellableFSDataOutputStream =
-    fm.createAtomic(file, overwriteIfPossible = true)
-  protected var compressedStream: DataOutputStream = compressStream(backingFileStream)
+  protected def writeLineage(stateStoreCheckpointIdLineage: Array[LineageItem]): Unit = {
+    assert(version >= 3,
+      "writeLineage should only be invoked with state store checkpoint id enabled (version >= 3)")
+    val lineageStr = Serialization.write(stateStoreCheckpointIdLineage)
+    compressedStream.writeUTF(lineageStr)
+  }
 
   def version: Short
 
@@ -115,9 +134,9 @@ abstract class StateStoreChangelogWriter(
       if (backingFileStream != null) backingFileStream.cancel()
       if (compressedStream != null) IOUtils.closeQuietly(compressedStream)
     } catch {
-      // Closing the compressedStream causes the stream to write/flush flush data into the
+      // Closing the compressedStream causes the stream to write/flush data into the
       // rawStream. Since the rawStream is already closed, there may be errors.
-      // Usually its an IOException. However, Hadoop's RawLocalFileSystem wraps
+      // Usually it's an IOException. However, Hadoop's RawLocalFileSystem wraps
       // IOException into FSError.
       case e: FSError if e.getCause.isInstanceOf[IOException] =>
       case NonFatal(ex) =>
@@ -152,15 +171,15 @@ class StateStoreChangelogWriterV1(
 
   override def put(key: Array[Byte], value: Array[Byte]): Unit = {
     assert(compressedStream != null)
-    compressedStream.writeInt(key.size)
+    compressedStream.writeInt(key.length)
     compressedStream.write(key)
-    compressedStream.writeInt(value.size)
+    compressedStream.writeInt(value.length)
     compressedStream.write(value)
   }
 
   override def delete(key: Array[Byte]): Unit = {
     assert(compressedStream != null)
-    compressedStream.writeInt(key.size)
+    compressedStream.writeInt(key.length)
     compressedStream.write(key)
     // -1 in the value field means record deletion.
     compressedStream.writeInt(-1)
@@ -206,7 +225,7 @@ class StateStoreChangelogWriterV2(
 
   override def version: Short = 2
 
-  // append the version field to the changelog file starting from version 2
+  // append the version field to the changelog file
   writeVersion()
 
   override def put(key: Array[Byte], value: Array[Byte]): Unit = {
@@ -216,7 +235,7 @@ class StateStoreChangelogWriterV2(
   override def delete(key: Array[Byte]): Unit = {
     assert(compressedStream != null)
     compressedStream.write(RecordType.getRecordTypeAsByte(RecordType.DELETE_RECORD))
-    compressedStream.writeInt(key.size)
+    compressedStream.writeInt(key.length)
     compressedStream.write(key)
     // -1 in the value field means record deletion.
     compressedStream.writeInt(-1)
@@ -232,9 +251,9 @@ class StateStoreChangelogWriterV2(
     assert(recordType == RecordType.PUT_RECORD || recordType == RecordType.MERGE_RECORD)
     assert(compressedStream != null)
     compressedStream.write(RecordType.getRecordTypeAsByte(recordType))
-    compressedStream.writeInt(key.size)
+    compressedStream.writeInt(key.length)
     compressedStream.write(key)
-    compressedStream.writeInt(value.size)
+    compressedStream.writeInt(value.length)
     compressedStream.write(value)
   }
 
@@ -255,6 +274,128 @@ class StateStoreChangelogWriterV2(
   }
 }
 
+/**
+ * Write changes to the key value state store instance to a changelog file.
+ * There are 2 types of records, put and delete.
+ * A put record is written as: | key length | key content | value length | value content |
+ * A delete record is written as: | key length | key content | -1 |
+ * Write an Int -1 to signal the end of file.
+ * The overall changelog format is: | put record | delete record | ... | put record | -1 |
+ * V3 is a extension of V1 for writing changelogs with version
+ * in the first line and lineage in the second line.
+ */
+class StateStoreChangelogWriterV3(
+     fm: CheckpointFileManager,
+     file: Path,
+     compressionCodec: CompressionCodec,
+     stateStoreCheckpointIdLineage: Array[LineageItem])
+  extends StateStoreChangelogWriterV1(fm, file, compressionCodec) {
+
+  override def version: Short = 3
+
+  // append the version field to the changelog file
+  writeVersion()
+
+  // Also write lineage information to the changelog, it should appear
+  // in the second line for v3 because the first line is the version
+  writeLineage(stateStoreCheckpointIdLineage)
+}
+
+/**
+ * Write changes to the key value state store instance to a changelog file.
+ * There are 3 types of data records, put, merge and delete.
+ * A put record or merge record is written as: | record type | key length
+ *    | key content | value length | value content | -1 |
+ * A delete record is written as: | record type | key length | key content | -1
+ * Write an EOF_RECORD to signal the end of file.
+ * The overall changelog format is:  version | put record | delete record
+ *                                   | ... | put record | eof record |
+ * V4 is a extension of V2 for writing changelogs with version
+ * in the first line and lineage in the second line.
+ */
+class StateStoreChangelogWriterV4(
+     fm: CheckpointFileManager,
+     file: Path,
+     compressionCodec: CompressionCodec,
+     stateStoreCheckpointIdLineage: Array[LineageItem])
+  extends StateStoreChangelogWriterV2(fm, file, compressionCodec) {
+
+  override def version: Short = 4
+
+  // Also write lineage information to the changelog, it should appear
+  // in the second line for v4 because the first line is the version
+  writeLineage(stateStoreCheckpointIdLineage)
+}
+
+/**
+ * A factory class for constructing state store readers by reading the first line
+ * of the change log file, which stores the version.
+ * Note that for changelog version 1, there is no version written.
+ * @param fm - checkpoint file manager used to manage streaming query checkpoint
+ * @param fileToRead - name of file to use to read changelog
+ * @param compressionCodec - de-compression method using for reading changelog file
+ */
+class StateStoreChangelogReaderFactory(
+    fm: CheckpointFileManager,
+    fileToRead: Path,
+    compressionCodec: CompressionCodec) extends Logging {
+
+  private def decompressStream(inputStream: DataInputStream): DataInputStream = {
+    val compressed = compressionCodec.compressedInputStream(inputStream)
+    new DataInputStream(compressed)
+  }
+
+  private lazy val sourceStream = try {
+    fm.open(fileToRead)
+  } catch {
+    case f: FileNotFoundException =>
+      throw QueryExecutionErrors.failedToReadStreamingStateFileError(fileToRead, f)
+  }
+  protected val input: DataInputStream = decompressStream(sourceStream)
+
+  private lazy val changeLogVersion: Short = {
+    try {
+      val versionStr = input.readUTF()
+      // Versions in the first line are prefixed with "v", e.g. "v2"
+      // Since there is no version written for version 1,
+      // return 1 if first line doesn't start with "v"
+      if (!versionStr.startsWith("v")) {
+        1
+      } else {
+        versionStr.stripPrefix("v").toShort
+      }
+    } catch {
+      // When there is no record being written in the changelog file in V1,
+      // the file contains a single int -1 meaning EOF, then the above readUTF()
+      // throws with EOFException and we return version 1.
+      case _: java.io.EOFException => 1
+    }
+  }
+
+  /**
+   * Construct the change log reader based on the version stored in changelog file
+   * @return StateStoreChangelogReader
+   */
+  def constructChangelogReader(): StateStoreChangelogReader = {
+    var reader: StateStoreChangelogReader = null
+    try {
+      reader = changeLogVersion match {
+        case 1 => new StateStoreChangelogReaderV1(fm, fileToRead, compressionCodec)
+        case 2 => new StateStoreChangelogReaderV2(fm, fileToRead, compressionCodec)
+        case 3 => new StateStoreChangelogReaderV3(fm, fileToRead, compressionCodec)
+        case 4 => new StateStoreChangelogReaderV4(fm, fileToRead, compressionCodec)
+        case version => throw QueryExecutionErrors.invalidChangeLogReaderVersion(version)
+      }
+    } finally {
+      if (input != null) {
+        input.close()
+        // input is not set to null because it is effectively lazy.
+      }
+    }
+    reader
+  }
+}
+
 /**
  * Base class for state store changelog reader
  * @param fm - checkpoint file manager used to manage streaming query checkpoint
@@ -267,12 +408,14 @@ abstract class StateStoreChangelogReader(
     compressionCodec: CompressionCodec)
   extends NextIterator[(RecordType.Value, Array[Byte], Array[Byte])] with Logging {
 
+  implicit val formats: Formats = DefaultFormats
+
   private def decompressStream(inputStream: DataInputStream): DataInputStream = {
     val compressed = compressionCodec.compressedInputStream(inputStream)
     new DataInputStream(compressed)
   }
 
-  private val sourceStream = try {
+  private lazy val sourceStream = try {
     fm.open(fileToRead)
   } catch {
     case f: FileNotFoundException =>
@@ -280,6 +423,26 @@ abstract class StateStoreChangelogReader(
   }
   protected val input: DataInputStream = decompressStream(sourceStream)
 
+  // This function is valid only when called upon initialization,
+  // because version is written in the first line only for version >= 2.
+  protected def readVersion(): String = input.readUTF()
+
+  protected def verifyVersion(): Unit = {
+    // ensure that the version read is correct, also updates file position
+    val changelogVersionStr = readVersion()
+    assert(changelogVersionStr == s"v${version}",
+      s"Changelog version mismatch: $changelogVersionStr != v${version}")
+  }
+
+  private def readLineage(): Array[LineageItem] = {
+    assert(version >= 3,
+      "readLineage should only be invoked with state store checkpoint id enabled (version >= 3)")
+    val lineageStr = input.readUTF()
+    Serialization.read[Array[LineageItem]](lineageStr)
+  }
+
+  lazy val lineage: Array[LineageItem] = readLineage()
+
   def version: Short
 
   override protected def close(): Unit = { if (input != null) input.close() }
@@ -352,10 +515,7 @@ class StateStoreChangelogReaderV2(
 
   override def version: Short = 2
 
-  // ensure that the version read is v2
-  val changelogVersionStr = input.readUTF()
-  assert(changelogVersionStr == "v2",
-    s"Changelog version mismatch: $changelogVersionStr != v2")
+  verifyVersion()
 
   override def getNext(): (RecordType.Value, Array[Byte], Array[Byte]) = {
     val recordType = RecordType.getRecordTypeFromByte(input.readByte())
@@ -388,6 +548,56 @@ class StateStoreChangelogReaderV2(
   }
 }
 
+/**
+ * Read an iterator of change record from the changelog file.
+ * A record is represented by tuple(recordType: RecordType.Value,
+ *  key: Array[Byte], value: Array[Byte])
+ * A put record is returned as a tuple(recordType, key, value)
+ * A delete record is return as a tuple(recordType, key, null)
+ * V3 is a extension of V1 for reading changelogs with version
+ * in the first line and lineage in the second line.
+ */
+class StateStoreChangelogReaderV3(
+     fm: CheckpointFileManager,
+     fileToRead: Path,
+     compressionCodec: CompressionCodec)
+  extends StateStoreChangelogReaderV1(fm, fileToRead, compressionCodec) {
+
+  override def version: Short = 3
+
+  verifyVersion()
+
+  // If the changelogFile is written when state store checkpoint unique id is enabled
+  // the first line would be the version and the second line would be the lineage.
+  // We should update the file position by reading from the lineage during
+  // the reader initialization.
+  lineage
+}
+
+/**
+ * Read an iterator of change record from the changelog file.
+ * A record is represented by tuple(recordType: RecordType.Value,
+ * key: Array[Byte], value: Array[Byte])
+ * A put or merge record is returned as a tuple(recordType, key, value)
+ * A delete record is return as a tuple(recordType, key, null)
+ * V4 is a extension of V2 for reading changelogs with version
+ * in the first line and lineage in the second line.
+ */
+class StateStoreChangelogReaderV4(
+     fm: CheckpointFileManager,
+     fileToRead: Path,
+     compressionCodec: CompressionCodec)
+  extends StateStoreChangelogReaderV2(fm, fileToRead, compressionCodec) {
+
+  override def version: Short = 4
+
+  // If the changelogFile is written when state store checkpoint unique id is enabled
+  // the first line would be the version and the second line would be the lineage.
+  // We should update the file position by reading from the lineage during
+  // the reader initialization.
+  lineage
+}
+
 /**
  * Base class representing a iterator that iterates over a range of changelog files in a state
  * store. In each iteration, it will return a tuple of (changeType: [[RecordType]],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
index c8af395e996d8..9d26bf8fdf2e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
@@ -83,6 +83,9 @@ class StateStoreConf(
   /** The interval of maintenance tasks. */
   val maintenanceInterval = sqlConf.streamingMaintenanceInterval
 
+  /** The interval of maintenance tasks. */
+  val stateStoreEncodingFormat = sqlConf.stateStoreEncodingFormat
+
   /**
    * When creating new state store checkpoint, which format version to use.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 59a873ef982fe..bfd5915529118 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -20,6 +20,7 @@ import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql.{ExperimentalMethods, SparkSession, UDFRegistration, _}
 import org.apache.spark.sql.artifact.ArtifactManager
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, FunctionRegistry, InvokeProcedures, ReplaceCharWithVarchar, ResolveSessionCatalog, ResolveTranspose, TableFunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.resolver.ResolverExtension
 import org.apache.spark.sql.catalyst.catalog.{FunctionExpressionBuilder, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
@@ -199,6 +200,15 @@ abstract class BaseSessionStateBuilder(
   protected def analyzer: Analyzer = new Analyzer(catalogManager) {
     override val hintResolutionRules: Seq[Rule[LogicalPlan]] =
       customHintResolutionRules
+
+    override val singlePassResolverExtensions: Seq[ResolverExtension] = Seq(
+      new DataSourceResolver(session)
+    )
+
+    override val singlePassMetadataResolverExtensions: Seq[ResolverExtension] = Seq(
+      new FileResolver(session)
+    )
+
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 64689e75e2e5e..5fd88b417ac44 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -685,6 +685,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
       optionExpression = newOptions,
       location = location,
       comment = { if (description.isEmpty) None else Some(description) },
+      collation = None,
       serde = None,
       external = tableType == CatalogTableType.EXTERNAL)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterImpl.scala
index 16f9fcf77d622..5a96db5e34bbd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterImpl.scala
@@ -209,6 +209,7 @@ final class DataFrameWriterImpl[T] private[sql](ds: Dataset[T]) extends DataFram
                 optionExpression = OptionList(Seq.empty),
                 location = extraOptions.get("path"),
                 comment = extraOptions.get(TableCatalog.PROP_COMMENT),
+                collation = extraOptions.get(TableCatalog.PROP_COLLATION),
                 serde = None,
                 external = false)
               runCommand(df.sparkSession) {
@@ -382,6 +383,11 @@ final class DataFrameWriterImpl[T] private[sql](ds: Dataset[T]) extends DataFram
     }
   }
 
+  private def hasCustomSessionCatalog: Boolean = {
+    df.sparkSession.sessionState.conf
+      .getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION) != "builtin"
+  }
+
   /**
    * Saves the content of the `DataFrame` as the specified table.
    *
@@ -425,8 +431,7 @@ final class DataFrameWriterImpl[T] private[sql](ds: Dataset[T]) extends DataFram
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
     val session = df.sparkSession
-    val canUseV2 = lookupV2Provider().isDefined || (df.sparkSession.sessionState.conf.getConf(
-        SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isDefined &&
+    val canUseV2 = lookupV2Provider().isDefined || (hasCustomSessionCatalog &&
         !df.sparkSession.sessionState.catalogManager.catalog(CatalogManager.SESSION_CATALOG_NAME)
           .isInstanceOf[CatalogExtension])
 
@@ -469,6 +474,7 @@ final class DataFrameWriterImpl[T] private[sql](ds: Dataset[T]) extends DataFram
           optionExpression = OptionList(Seq.empty),
           location = extraOptions.get("path"),
           comment = extraOptions.get(TableCatalog.PROP_COMMENT),
+          collation = extraOptions.get(TableCatalog.PROP_COLLATION),
           serde = None,
           external = false)
         ReplaceTableAsSelect(
@@ -489,6 +495,7 @@ final class DataFrameWriterImpl[T] private[sql](ds: Dataset[T]) extends DataFram
           optionExpression = OptionList(Seq.empty),
           location = extraOptions.get("path"),
           comment = extraOptions.get(TableCatalog.PROP_COMMENT),
+          collation = extraOptions.get(TableCatalog.PROP_COLLATION),
           serde = None,
           external = false)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterV2Impl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterV2Impl.scala
index 0a19e6c47afa9..86ea55bc59b7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterV2Impl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterV2Impl.scala
@@ -150,6 +150,7 @@ final class DataFrameWriterV2Impl[T] private[sql](table: String, ds: Dataset[T])
       optionExpression = OptionList(Seq.empty),
       location = None,
       comment = None,
+      collation = None,
       serde = None,
       external = false)
     runCommand(
@@ -215,6 +216,7 @@ final class DataFrameWriterV2Impl[T] private[sql](table: String, ds: Dataset[T])
       optionExpression = OptionList(Seq.empty),
       location = None,
       comment = None,
+      collation = None,
       serde = None,
       external = false)
     runCommand(ReplaceTableAsSelect(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/MergeIntoWriterImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/MergeIntoWriterImpl.scala
index bb8146e3e0e33..2f1a34648a470 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/MergeIntoWriterImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/MergeIntoWriterImpl.scala
@@ -44,7 +44,7 @@ class MergeIntoWriterImpl[T] private[sql] (table: String, ds: Dataset[T], on: Co
   private val df: DataFrame = ds.toDF()
 
   private[sql] val sparkSession = ds.sparkSession
-  import sparkSession.RichColumn
+  import sparkSession.toRichColumn
 
   private val tableName = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(table)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/RuntimeConfigImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/RuntimeConfigImpl.scala
index 1739b86c8dcb4..b2004215a99f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/RuntimeConfigImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/RuntimeConfigImpl.scala
@@ -21,7 +21,7 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.SPARK_DOC_ROOT
 import org.apache.spark.annotation.Stable
-import org.apache.spark.internal.config.{ConfigEntry, DEFAULT_PARALLELISM}
+import org.apache.spark.internal.config.{ConfigEntry, DEFAULT_PARALLELISM, OptionalConfigEntry}
 import org.apache.spark.sql.RuntimeConfig
 import org.apache.spark.sql.errors.QueryCompilationErrors
 
@@ -41,6 +41,12 @@ class RuntimeConfigImpl private[sql](val sqlConf: SQLConf = new SQLConf) extends
     sqlConf.setConfString(key, value)
   }
 
+  /** @inheritdoc */
+  override private[sql] def set[T](entry: ConfigEntry[T], value: T): Unit = {
+    requireNonStaticConf(entry.key)
+    sqlConf.setConf(entry, value)
+  }
+
   /** @inheritdoc */
   @throws[NoSuchElementException]("if the key is not set and there is no default value")
   def get(key: String): String = {
@@ -57,6 +63,18 @@ class RuntimeConfigImpl private[sql](val sqlConf: SQLConf = new SQLConf) extends
     sqlConf.getAllConfs
   }
 
+  /** @inheritdoc */
+  override private[sql] def get[T](entry: ConfigEntry[T]): T =
+    sqlConf.getConf(entry)
+
+  /** @inheritdoc */
+  override private[sql] def get[T](entry: OptionalConfigEntry[T]): Option[T] =
+    sqlConf.getConf(entry)
+
+  /** @inheritdoc */
+  override private[sql] def get[T](entry: ConfigEntry[T], default: T): T =
+    sqlConf.getConf(entry, default)
+
   private[sql] def getAllAsJava: java.util.Map[String, String] = {
     getAll.asJava
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala
index 64eacba1c6bf3..8f37f5c32de34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.sql.internal
 
-import scala.language.implicitConversions
-
 import UserDefinedFunctionUtils.toScalaUDF
 
 import org.apache.spark.SparkException
@@ -88,9 +86,6 @@ private[sql] trait ColumnNodeToExpressionConverter extends (ColumnNode => Expres
             isDistinct = isDistinct,
             isInternal = isInternal)
 
-        case LazyOuterReference(nameParts, planId, _) =>
-          convertLazyOuterReference(nameParts, planId)
-
         case Alias(child, Seq(name), None, _) =>
           expressions.Alias(apply(child), name)(
             nonInheritableMetadataKeys = Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY))
@@ -193,6 +188,9 @@ private[sql] trait ColumnNodeToExpressionConverter extends (ColumnNode => Expres
             case _ => transformed
           }
 
+        case l: LazyExpression =>
+          analysis.LazyExpression(apply(l.child))
+
         case node =>
           throw SparkException.internalError("Unsupported ColumnNode: " + node)
       }
@@ -248,16 +246,6 @@ private[sql] trait ColumnNodeToExpressionConverter extends (ColumnNode => Expres
     }
     attribute
   }
-
-  private def convertLazyOuterReference(
-      nameParts: Seq[String],
-      planId: Option[Long]): analysis.LazyOuterReference = {
-    val lazyOuterReference = analysis.LazyOuterReference(nameParts)
-    if (planId.isDefined) {
-      lazyOuterReference.setTagValue(LogicalPlan.PLAN_ID_TAG, planId.get)
-    }
-    lazyOuterReference
-  }
 }
 
 private[sql] object ColumnNodeToExpressionConverter extends ColumnNodeToExpressionConverter {
@@ -285,6 +273,8 @@ private[sql] case class ExpressionColumnNode private(
   }
 
   override def sql: String = expression.sql
+
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
 }
 
 private[sql] object ExpressionColumnNode {
@@ -312,13 +302,14 @@ private[spark] object ExpressionUtils {
   /**
    * Create an Expression backed Column.
    */
-  implicit def column(e: Expression): Column = Column(ExpressionColumnNode(e))
+  def column(e: Expression): Column = Column(ExpressionColumnNode(e))
 
   /**
-   * Create an ColumnNode backed Expression. Please not that this has to be converted to an actual
-   * Expression before it is used.
+   * Create an ColumnNode backed Expression. This can only be used for expressions that will be
+   * used to construct a [[Column]]. In all other cases please use `SparkSession.expression(...)`,
+   * `SparkSession.toRichColumn(...)`, or `org.apache.spark.sql.classic.ColumnConversions`.
    */
-  implicit def expression(c: Column): Expression = ColumnNodeExpression(c.node)
+  def expression(c: Column): Expression = ColumnNodeExpression(c.node)
 
   /**
    * Returns the expression either with an existing or auto assigned name.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index bce9c67042782..c1b79f8017419 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -310,6 +310,10 @@ private case class PostgresDialect()
         case _ => super.visitExtract(field, source)
       }
     }
+
+    override def visitBinaryArithmetic(name: String, l: String, r: String): String = {
+      l + " " + name.replace('^', '#') + " " + r
+    }
   }
 
   override def compileExpression(expr: Expression): Option[String] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecution.scala
new file mode 100644
index 0000000000000..2b15a6c55fa97
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecution.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.scripting
+
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.{CommandResult, CompoundBody}
+
+/**
+ * SQL scripting executor - executes script and returns result statements.
+ * This supports returning multiple result statements from a single script.
+ * The caller of the SqlScriptingExecution API must adhere to the contract of executing
+ * the returned statement before continuing iteration. Executing the statement needs to be done
+ * inside withErrorHandling block.
+ *
+ * @param sqlScript CompoundBody which need to be executed.
+ * @param session Spark session that SQL script is executed within.
+ * @param args A map of parameter names to SQL literal expressions.
+ */
+class SqlScriptingExecution(
+    sqlScript: CompoundBody,
+    session: SparkSession,
+    args: Map[String, Expression]) {
+
+  private val interpreter = SqlScriptingInterpreter(session)
+
+  // Frames to keep what is being executed.
+  private val context: SqlScriptingExecutionContext = {
+    val ctx = new SqlScriptingExecutionContext()
+    val executionPlan = interpreter.buildExecutionPlan(sqlScript, args, ctx)
+    // Add frame which represents SQL Script to the context.
+    ctx.frames.append(new SqlScriptingExecutionFrame(executionPlan.getTreeIterator))
+    // Enter the scope of the top level compound.
+    // We don't need to exit this scope explicitly as it will be done automatically
+    // when the frame is removed during iteration.
+    executionPlan.enterScope()
+    ctx
+  }
+
+
+  /** Helper method to iterate get next statements from the first available frame. */
+  private def getNextStatement: Option[CompoundStatementExec] = {
+    // Remove frames that are already executed.
+    while (context.frames.nonEmpty && !context.frames.last.hasNext) {
+      context.frames.remove(context.frames.size - 1)
+    }
+    // If there are still frames available, get the next statement.
+    if (context.frames.nonEmpty) {
+      return Some(context.frames.last.next())
+    }
+    None
+  }
+
+  /**
+   * Advances through the script and executes statements until a result statement or
+   * end of script is encountered.
+   *
+   * To know if there is result statement available, the method has to advance through script and
+   * execute statements until the result statement or end of script is encountered. For that reason
+   * the returned result must be executed before subsequent calls. Multiple calls without executing
+   * the intermediate results will lead to incorrect behavior.
+   *
+   * @return Result DataFrame if it is available, otherwise None.
+   */
+  def getNextResult: Option[DataFrame] = {
+    var currentStatement = getNextStatement
+    // While we don't have a result statement, execute the statements.
+    while (currentStatement.isDefined) {
+      currentStatement match {
+        case Some(stmt: SingleStatementExec) if !stmt.isExecuted =>
+          withErrorHandling {
+            val df = stmt.buildDataFrame(session)
+            df.logicalPlan match {
+              case _: CommandResult => // pass
+              case _ => return Some(df) // If the statement is a result, return it to the caller.
+            }
+          }
+        case _ => // pass
+      }
+      currentStatement = getNextStatement
+    }
+    None
+  }
+
+  private def handleException(e: Throwable): Unit = {
+    // Rethrow the exception.
+    // TODO: SPARK-48353 Add error handling for SQL scripts
+    throw e
+  }
+
+  def withErrorHandling(f: => Unit): Unit = {
+    try {
+      f
+    } catch {
+      case e: Throwable =>
+        handleException(e)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionContext.scala
new file mode 100644
index 0000000000000..94462ab828f75
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionContext.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.scripting
+
+import scala.collection.mutable.ListBuffer
+
+import org.apache.spark.SparkException
+
+/**
+ * SQL scripting execution context - keeps track of the current execution state.
+ */
+class SqlScriptingExecutionContext {
+  // List of frames that are currently active.
+  val frames: ListBuffer[SqlScriptingExecutionFrame] = ListBuffer.empty
+
+  def enterScope(label: String): Unit = {
+    if (frames.isEmpty) {
+      throw SparkException.internalError("Cannot enter scope: no frames.")
+    }
+    frames.last.enterScope(label)
+  }
+
+  def exitScope(label: String): Unit = {
+    if (frames.isEmpty) {
+      throw SparkException.internalError("Cannot exit scope: no frames.")
+    }
+    frames.last.exitScope(label)
+  }
+}
+
+/**
+ * SQL scripting executor - executes script and returns result statements.
+ * This supports returning multiple result statements from a single script.
+ *
+ * @param executionPlan CompoundBody which need to be executed.
+ */
+class SqlScriptingExecutionFrame(
+    executionPlan: Iterator[CompoundStatementExec]) extends Iterator[CompoundStatementExec] {
+
+  // List of scopes that are currently active.
+  private val scopes: ListBuffer[SqlScriptingExecutionScope] = ListBuffer.empty
+
+  override def hasNext: Boolean = executionPlan.hasNext
+
+  override def next(): CompoundStatementExec = {
+    if (!hasNext) throw SparkException.internalError("No more elements to iterate through.")
+    executionPlan.next()
+  }
+
+  def enterScope(label: String): Unit = {
+    scopes.append(new SqlScriptingExecutionScope(label))
+  }
+
+  def exitScope(label: String): Unit = {
+    if (scopes.isEmpty) {
+      throw SparkException.internalError("Cannot exit scope: no scopes to exit.")
+    }
+
+    // Remove all scopes until the one with the given label.
+    while (scopes.nonEmpty && scopes.last.label != label) {
+      scopes.remove(scopes.length - 1)
+    }
+
+    // Remove the scope with the given label.
+    if (scopes.nonEmpty) {
+      scopes.remove(scopes.length - 1)
+    }
+  }
+}
+
+/**
+ * SQL scripting execution scope - keeps track of the current execution scope.
+ *
+ * @param label
+ *   Label of the scope.
+ */
+class SqlScriptingExecutionScope(val label: String)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNode.scala b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNode.scala
index 9129fc6ab00f3..58cbfb0feb015 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNode.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNode.scala
@@ -17,10 +17,14 @@
 
 package org.apache.spark.sql.scripting
 
+import java.util
+
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{Dataset, SparkSession}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
+import org.apache.spark.sql.catalyst.analysis.{NameParameterizedQuery, UnresolvedAttribute, UnresolvedIdentifier}
+import org.apache.spark.sql.catalyst.expressions.{Alias, CreateArray, CreateMap, CreateNamedStruct, Expression, Literal}
+import org.apache.spark.sql.catalyst.plans.logical.{CreateVariable, DefaultValueExpression, DropVariable, LogicalPlan, OneRowRelation, Project, SetVariable}
 import org.apache.spark.sql.catalyst.trees.{Origin, WithOrigin}
 import org.apache.spark.sql.errors.SqlScriptingErrors
 import org.apache.spark.sql.types.BooleanType
@@ -77,7 +81,7 @@ trait NonLeafStatementExec extends CompoundStatementExec {
 
       // DataFrame evaluates to True if it is single row, single column
       //  of boolean type with value True.
-      val df = Dataset.ofRows(session, statement.parsedPlan)
+      val df = statement.buildDataFrame(session)
       df.schema.fields match {
         case Array(field) if field.dataType == BooleanType =>
           df.limit(2).collect() match {
@@ -105,15 +109,21 @@ trait NonLeafStatementExec extends CompoundStatementExec {
  *   Logical plan of the parsed statement.
  * @param origin
  *   Origin descriptor for the statement.
+ * @param args
+ *   A map of parameter names to SQL literal expressions.
  * @param isInternal
  *   Whether the statement originates from the SQL script or it is created during the
  *   interpretation. Example: DropVariable statements are automatically created at the end of each
  *   compound.
+ * @param context
+ *   SqlScriptingExecutionContext keeps the execution state of current script.
  */
 class SingleStatementExec(
     var parsedPlan: LogicalPlan,
     override val origin: Origin,
-    override val isInternal: Boolean)
+    val args: Map[String, Expression],
+    override val isInternal: Boolean,
+    context: SqlScriptingExecutionContext)
   extends LeafStatementExec with WithOrigin {
 
   /**
@@ -122,6 +132,17 @@ class SingleStatementExec(
    */
   var isExecuted = false
 
+  /**
+   * Plan with named parameters.
+   */
+  private lazy val preparedPlan: LogicalPlan = {
+    if (args.nonEmpty) {
+      NameParameterizedQuery(parsedPlan, args)
+    } else {
+      parsedPlan
+    }
+  }
+
   /**
    * Get the SQL query text corresponding to this statement.
    * @return
@@ -132,21 +153,82 @@ class SingleStatementExec(
     origin.sqlText.get.substring(origin.startIndex.get, origin.stopIndex.get + 1)
   }
 
+  /**
+   * Builds a DataFrame from the parsedPlan of this SingleStatementExec
+   * @param session The SparkSession on which the parsedPlan is built.
+   * @return
+   *   The DataFrame.
+   */
+  def buildDataFrame(session: SparkSession): DataFrame = {
+    Dataset.ofRows(session, preparedPlan)
+  }
+
   override def reset(): Unit = isExecuted = false
 }
 
+/**
+ * NO-OP leaf node, which does nothing when returned to the iterator.
+ * It is emitted by empty BEGIN END blocks.
+ */
+class NoOpStatementExec extends LeafStatementExec {
+  override def reset(): Unit = ()
+}
+
 /**
  * Executable node for CompoundBody.
  * @param statements
  *   Executable nodes for nested statements within the CompoundBody.
  * @param label
  *   Label set by user to CompoundBody or None otherwise.
+ * @param isScope
+ *   Flag indicating if the CompoundBody is a labeled scope.
+ *   Scopes are used for grouping local variables and exception handlers.
+ * @param context
+ *   SqlScriptingExecutionContext keeps the execution state of current script.
  */
-class CompoundBodyExec(statements: Seq[CompoundStatementExec], label: Option[String] = None)
+class CompoundBodyExec(
+    statements: Seq[CompoundStatementExec],
+    label: Option[String] = None,
+    isScope: Boolean,
+    context: SqlScriptingExecutionContext)
   extends NonLeafStatementExec {
 
+  private object ScopeStatus extends Enumeration {
+    type ScopeStatus = Value
+    val NOT_ENTERED, INSIDE, EXITED = Value
+  }
+
   private var localIterator = statements.iterator
   private var curr = if (localIterator.hasNext) Some(localIterator.next()) else None
+  private var scopeStatus = ScopeStatus.NOT_ENTERED
+
+  /**
+   * Enter scope represented by this compound statement.
+   *
+   * This operation needs to be idempotent because it is called multiple times during
+   * iteration, but it should be executed only once when compound body that represent
+   * scope is encountered for the first time.
+   */
+  def enterScope(): Unit = {
+    // This check makes this operation idempotent.
+    if (isScope && scopeStatus == ScopeStatus.NOT_ENTERED) {
+      scopeStatus = ScopeStatus.INSIDE
+      context.enterScope(label.get)
+    }
+  }
+
+  /**
+   * Exit scope represented by this compound statement.
+   *
+   * Even though this operation is called exactly once, we are making it idempotent.
+   */
+  protected def exitScope(): Unit = {
+    // This check makes this operation idempotent.
+    if (isScope && scopeStatus == ScopeStatus.INSIDE) {
+      scopeStatus = ScopeStatus.EXITED
+      context.exitScope(label.get)
+    }
+  }
 
   /** Used to stop the iteration in cases when LEAVE statement is encountered. */
   private var stopIteration = false
@@ -182,6 +264,11 @@ class CompoundBodyExec(statements: Seq[CompoundStatementExec], label: Option[Str
             statement
           case Some(body: NonLeafStatementExec) =>
             if (body.getTreeIterator.hasNext) {
+              body match {
+                // Scope will be entered only once per compound because enter scope is idempotent.
+                case compoundBodyExec: CompoundBodyExec => compoundBodyExec.enterScope()
+                case _ => // pass
+              }
               body.getTreeIterator.next() match {
                 case leaveStatement: LeaveStatementExec =>
                   handleLeaveStatement(leaveStatement)
@@ -192,6 +279,11 @@ class CompoundBodyExec(statements: Seq[CompoundStatementExec], label: Option[Str
                 case other => other
               }
             } else {
+              body match {
+                // Exit scope when there are no more statements to iterate through.
+                case compoundBodyExec: CompoundBodyExec => compoundBodyExec.exitScope()
+                case _ => // pass
+              }
               curr = if (localIterator.hasNext) Some(localIterator.next()) else None
               next()
             }
@@ -208,6 +300,7 @@ class CompoundBodyExec(statements: Seq[CompoundStatementExec], label: Option[Str
     localIterator = statements.iterator
     curr = if (localIterator.hasNext) Some(localIterator.next()) else None
     stopIteration = false
+    scopeStatus = ScopeStatus.NOT_ENTERED
   }
 
   /** Actions to do when LEAVE statement is encountered, to stop the execution of this compound. */
@@ -216,6 +309,9 @@ class CompoundBodyExec(statements: Seq[CompoundStatementExec], label: Option[Str
       // Stop the iteration.
       stopIteration = true
 
+      // Exit scope if leave statement is encountered.
+      exitScope()
+
       // TODO: Variable cleanup (once we add SQL script execution logic).
       // TODO: Add interpreter tests as well.
 
@@ -232,6 +328,9 @@ class CompoundBodyExec(statements: Seq[CompoundStatementExec], label: Option[Str
       // Stop the iteration.
       stopIteration = true
 
+      // Exit scope if iterate statement is encountered.
+      exitScope()
+
       // TODO: Variable cleanup (once we add SQL script execution logic).
       // TODO: Add interpreter tests as well.
 
@@ -636,3 +735,233 @@ class LoopStatementExec(
     body.reset()
   }
 }
+
+/**
+ * Executable node for ForStatement.
+ * @param query Executable node for the query.
+ * @param variableName Name of variable used for accessing current row during iteration.
+ * @param body Executable node for the body.
+ * @param label Label set to ForStatement by user or None otherwise.
+ * @param session Spark session that SQL script is executed within.
+ * @param context SqlScriptingExecutionContext keeps the execution state of current script.
+ */
+class ForStatementExec(
+    query: SingleStatementExec,
+    variableName: Option[String],
+    body: CompoundBodyExec,
+    val label: Option[String],
+    session: SparkSession,
+    context: SqlScriptingExecutionContext) extends NonLeafStatementExec {
+
+  private object ForState extends Enumeration {
+    val VariableAssignment, Body, VariableCleanup = Value
+  }
+  private var state = ForState.VariableAssignment
+  private var areVariablesDeclared = false
+
+  // map of all variables created internally by the for statement
+  // (variableName -> variableExpression)
+  private var variablesMap: Map[String, Expression] = Map()
+
+  // compound body used for dropping variables while in ForState.VariableAssignment
+  private var dropVariablesExec: CompoundBodyExec = null
+
+  private var queryResult: util.Iterator[Row] = _
+  private var isResultCacheValid = false
+  private def cachedQueryResult(): util.Iterator[Row] = {
+    if (!isResultCacheValid) {
+      queryResult = query.buildDataFrame(session).toLocalIterator()
+      query.isExecuted = true
+      isResultCacheValid = true
+    }
+    queryResult
+  }
+
+  /**
+   * For can be interrupted by LeaveStatementExec
+   */
+  private var interrupted: Boolean = false
+
+  private lazy val treeIterator: Iterator[CompoundStatementExec] =
+    new Iterator[CompoundStatementExec] {
+
+      override def hasNext: Boolean = !interrupted && (state match {
+          case ForState.VariableAssignment => cachedQueryResult().hasNext
+          case ForState.Body => true
+          case ForState.VariableCleanup => dropVariablesExec.getTreeIterator.hasNext
+        })
+
+      @scala.annotation.tailrec
+      override def next(): CompoundStatementExec = state match {
+
+        case ForState.VariableAssignment =>
+          variablesMap = createVariablesMapFromRow(cachedQueryResult().next())
+
+          if (!areVariablesDeclared) {
+            // create and execute declare var statements
+            variablesMap.keys.toSeq
+              .map(colName => createDeclareVarExec(colName, variablesMap(colName)))
+              .foreach(declareVarExec => declareVarExec.buildDataFrame(session).collect())
+            areVariablesDeclared = true
+          }
+
+          // create and execute set var statements
+          variablesMap.keys.toSeq
+            .map(colName => createSetVarExec(colName, variablesMap(colName)))
+            .foreach(setVarExec => setVarExec.buildDataFrame(session).collect())
+
+          state = ForState.Body
+          body.reset()
+          next()
+
+        case ForState.Body =>
+          val retStmt = body.getTreeIterator.next()
+
+          // Handle LEAVE or ITERATE statement if it has been encountered.
+          retStmt match {
+            case leaveStatementExec: LeaveStatementExec if !leaveStatementExec.hasBeenMatched =>
+              if (label.contains(leaveStatementExec.label)) {
+                leaveStatementExec.hasBeenMatched = true
+              }
+              interrupted = true
+              // If this for statement encounters LEAVE, it will either not be executed
+              // again, or it will be reset before being executed.
+              // In either case, variables will not
+              // be dropped normally, from ForState.VariableCleanup, so we drop them here.
+              dropVars()
+              return retStmt
+            case iterStatementExec: IterateStatementExec if !iterStatementExec.hasBeenMatched =>
+              if (label.contains(iterStatementExec.label)) {
+                iterStatementExec.hasBeenMatched = true
+              } else {
+                // if an outer loop is being iterated, this for statement will either not be
+                // executed again, or it will be reset before being executed.
+                // In either case, variables will not
+                // be dropped normally, from ForState.VariableCleanup, so we drop them here.
+                dropVars()
+              }
+              switchStateFromBody()
+              return retStmt
+            case _ =>
+          }
+
+          if (!body.getTreeIterator.hasNext) {
+            switchStateFromBody()
+          }
+          retStmt
+
+        case ForState.VariableCleanup =>
+          dropVariablesExec.getTreeIterator.next()
+      }
+    }
+
+  /**
+   * Recursively creates a Catalyst expression from Scala value.<br>
+   * See https://spark.apache.org/docs/latest/sql-ref-datatypes.html for Spark -> Scala mappings
+   */
+  private def createExpressionFromValue(value: Any): Expression = value match {
+    case m: Map[_, _] =>
+      // arguments of CreateMap are in the format: (key1, val1, key2, val2, ...)
+      val mapArgs = m.keys.toSeq.flatMap { key =>
+        Seq(createExpressionFromValue(key), createExpressionFromValue(m(key)))
+      }
+      CreateMap(mapArgs, useStringTypeWhenEmpty = false)
+
+    // structs and rows match this case
+    case s: Row =>
+    // arguments of CreateNamedStruct are in the format: (name1, val1, name2, val2, ...)
+    val namedStructArgs = s.schema.names.toSeq.flatMap { colName =>
+        val valueExpression = createExpressionFromValue(s.getAs(colName))
+        Seq(Literal(colName), valueExpression)
+      }
+      CreateNamedStruct(namedStructArgs)
+
+    // arrays match this case
+    case a: collection.Seq[_] =>
+      val arrayArgs = a.toSeq.map(createExpressionFromValue(_))
+      CreateArray(arrayArgs, useStringTypeWhenEmpty = false)
+
+    case _ => Literal(value)
+  }
+
+  private def createVariablesMapFromRow(row: Row): Map[String, Expression] = {
+    var variablesMap = row.schema.names.toSeq.map { colName =>
+      colName -> createExpressionFromValue(row.getAs(colName))
+    }.toMap
+
+    if (variableName.isDefined) {
+      val namedStructArgs = variablesMap.keys.toSeq.flatMap { colName =>
+        Seq(Literal(colName), variablesMap(colName))
+      }
+      val forVariable = CreateNamedStruct(namedStructArgs)
+      variablesMap = variablesMap + (variableName.get -> forVariable)
+    }
+    variablesMap
+  }
+
+  /**
+   * Create and immediately execute dropVariable exec nodes for all variables in variablesMap.
+   */
+  private def dropVars(): Unit = {
+    variablesMap.keys.toSeq
+      .map(colName => createDropVarExec(colName))
+      .foreach(dropVarExec => dropVarExec.buildDataFrame(session).collect())
+    areVariablesDeclared = false
+  }
+
+  private def switchStateFromBody(): Unit = {
+    state = if (cachedQueryResult().hasNext) ForState.VariableAssignment
+    else {
+      // create compound body for dropping nodes after execution is complete
+      dropVariablesExec = new CompoundBodyExec(
+        variablesMap.keys.toSeq.map(colName => createDropVarExec(colName)),
+        None,
+        isScope = false,
+        context
+      )
+      ForState.VariableCleanup
+    }
+  }
+
+  private def createDeclareVarExec(varName: String, variable: Expression): SingleStatementExec = {
+    val defaultExpression = DefaultValueExpression(Literal(null, variable.dataType), "null")
+    val declareVariable = CreateVariable(
+      UnresolvedIdentifier(Seq(varName)),
+      defaultExpression,
+      replace = true
+    )
+    new SingleStatementExec(declareVariable, Origin(), Map.empty, isInternal = true, context)
+  }
+
+  private def createSetVarExec(varName: String, variable: Expression): SingleStatementExec = {
+    val projectNamedStruct = Project(
+      Seq(Alias(variable, varName)()),
+      OneRowRelation()
+    )
+    val setIdentifierToCurrentRow =
+      SetVariable(Seq(UnresolvedAttribute(varName)), projectNamedStruct)
+    new SingleStatementExec(
+      setIdentifierToCurrentRow,
+      Origin(),
+      Map.empty,
+      isInternal = true,
+      context)
+  }
+
+  private def createDropVarExec(varName: String): SingleStatementExec = {
+    val dropVar = DropVariable(UnresolvedIdentifier(Seq(varName)), ifExists = true)
+    new SingleStatementExec(dropVar, Origin(), Map.empty, isInternal = true, context)
+  }
+
+  override def getTreeIterator: Iterator[CompoundStatementExec] = treeIterator
+
+  override def reset(): Unit = {
+    state = ForState.VariableAssignment
+    isResultCacheValid = false
+    variablesMap = Map()
+    areVariablesDeclared = false
+    dropVariablesExec = null
+    interrupted = false
+    body.reset()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreter.scala b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreter.scala
index 1be75cb61c8b0..7d00bbb3538df 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreter.scala
@@ -19,13 +19,17 @@ package org.apache.spark.sql.scripting
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier
-import org.apache.spark.sql.catalyst.plans.logical.{CaseStatement, CompoundBody, CompoundPlanStatement, CreateVariable, DropVariable, IfElseStatement, IterateStatement, LeaveStatement, LogicalPlan, LoopStatement, RepeatStatement, SingleStatement, WhileStatement}
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.{CaseStatement, CompoundBody, CompoundPlanStatement, CreateVariable, DropVariable, ForStatement, IfElseStatement, IterateStatement, LeaveStatement, LogicalPlan, LoopStatement, RepeatStatement, SingleStatement, WhileStatement}
 import org.apache.spark.sql.catalyst.trees.Origin
 
 /**
  * SQL scripting interpreter - builds SQL script execution plan.
+ *
+ * @param session
+ *   Spark session that SQL script is executed within.
  */
-case class SqlScriptingInterpreter() {
+case class SqlScriptingInterpreter(session: SparkSession) {
 
   /**
    * Build execution plan and return statements that need to be executed,
@@ -33,15 +37,17 @@ case class SqlScriptingInterpreter() {
    *
    * @param compound
    *   CompoundBody for which to build the plan.
-   * @param session
-   *   Spark session that SQL script is executed within.
+   * @param args
+   *   A map of parameter names to SQL literal expressions.
    * @return
-   *   Iterator through collection of statements to be executed.
+   *   Top level CompoundBodyExec representing SQL Script to be executed.
    */
   def buildExecutionPlan(
       compound: CompoundBody,
-      session: SparkSession): Iterator[CompoundStatementExec] = {
-    transformTreeIntoExecutable(compound, session).asInstanceOf[CompoundBodyExec].getTreeIterator
+      args: Map[String, Expression],
+      context: SqlScriptingExecutionContext): CompoundBodyExec = {
+    transformTreeIntoExecutable(compound, args, context)
+      .asInstanceOf[CompoundBodyExec]
   }
 
   /**
@@ -62,15 +68,17 @@ case class SqlScriptingInterpreter() {
    *
    * @param node
    *   Root node of the parsed tree.
-   * @param session
-   *   Spark session that SQL script is executed within.
+   * @param args
+   *   A map of parameter names to SQL literal expressions.
    * @return
    *   Executable statement.
    */
   private def transformTreeIntoExecutable(
-      node: CompoundPlanStatement, session: SparkSession): CompoundStatementExec =
+      node: CompoundPlanStatement,
+      args: Map[String, Expression],
+      context: SqlScriptingExecutionContext): CompoundStatementExec =
     node match {
-      case CompoundBody(collection, label) =>
+      case CompoundBody(collection, label, isScope) =>
         // TODO [SPARK-48530]: Current logic doesn't support scoped variables and shadowing.
         val variables = collection.flatMap {
           case st: SingleStatement => getDeclareVarNameFromPlan(st.parsedPlan)
@@ -78,51 +86,92 @@ case class SqlScriptingInterpreter() {
         }
         val dropVariables = variables
           .map(varName => DropVariable(varName, ifExists = true))
-          .map(new SingleStatementExec(_, Origin(), isInternal = true))
+          .map(new SingleStatementExec(_, Origin(), args, isInternal = true, context))
           .reverse
+
+        val statements = collection
+          .map(st => transformTreeIntoExecutable(st, args, context)) ++ dropVariables match {
+            case Nil => Seq(new NoOpStatementExec)
+            case s => s
+          }
+
         new CompoundBodyExec(
-          collection.map(st => transformTreeIntoExecutable(st, session)) ++ dropVariables,
-          label)
+          statements,
+          label,
+          isScope,
+          context)
 
       case IfElseStatement(conditions, conditionalBodies, elseBody) =>
         val conditionsExec = conditions.map(condition =>
-          new SingleStatementExec(condition.parsedPlan, condition.origin, isInternal = false))
+          new SingleStatementExec(
+            condition.parsedPlan,
+            condition.origin,
+            args,
+            isInternal = false,
+            context))
         val conditionalBodiesExec = conditionalBodies.map(body =>
-          transformTreeIntoExecutable(body, session).asInstanceOf[CompoundBodyExec])
+          transformTreeIntoExecutable(body, args, context).asInstanceOf[CompoundBodyExec])
         val unconditionalBodiesExec = elseBody.map(body =>
-          transformTreeIntoExecutable(body, session).asInstanceOf[CompoundBodyExec])
+          transformTreeIntoExecutable(body, args, context).asInstanceOf[CompoundBodyExec])
         new IfElseStatementExec(
           conditionsExec, conditionalBodiesExec, unconditionalBodiesExec, session)
 
       case CaseStatement(conditions, conditionalBodies, elseBody) =>
         val conditionsExec = conditions.map(condition =>
-          // todo: what to put here for isInternal, in case of simple case statement
-          new SingleStatementExec(condition.parsedPlan, condition.origin, isInternal = false))
+          new SingleStatementExec(
+            condition.parsedPlan,
+            condition.origin,
+            args,
+            isInternal = false,
+            context))
         val conditionalBodiesExec = conditionalBodies.map(body =>
-          transformTreeIntoExecutable(body, session).asInstanceOf[CompoundBodyExec])
+          transformTreeIntoExecutable(body, args, context).asInstanceOf[CompoundBodyExec])
         val unconditionalBodiesExec = elseBody.map(body =>
-          transformTreeIntoExecutable(body, session).asInstanceOf[CompoundBodyExec])
+          transformTreeIntoExecutable(body, args, context).asInstanceOf[CompoundBodyExec])
         new CaseStatementExec(
           conditionsExec, conditionalBodiesExec, unconditionalBodiesExec, session)
 
       case WhileStatement(condition, body, label) =>
         val conditionExec =
-          new SingleStatementExec(condition.parsedPlan, condition.origin, isInternal = false)
+          new SingleStatementExec(
+            condition.parsedPlan,
+            condition.origin,
+            args,
+            isInternal = false,
+            context)
         val bodyExec =
-          transformTreeIntoExecutable(body, session).asInstanceOf[CompoundBodyExec]
+          transformTreeIntoExecutable(body, args, context).asInstanceOf[CompoundBodyExec]
         new WhileStatementExec(conditionExec, bodyExec, label, session)
 
       case RepeatStatement(condition, body, label) =>
         val conditionExec =
-          new SingleStatementExec(condition.parsedPlan, condition.origin, isInternal = false)
+          new SingleStatementExec(
+            condition.parsedPlan,
+            condition.origin,
+            args,
+            isInternal = false,
+            context)
         val bodyExec =
-          transformTreeIntoExecutable(body, session).asInstanceOf[CompoundBodyExec]
+          transformTreeIntoExecutable(body, args, context).asInstanceOf[CompoundBodyExec]
         new RepeatStatementExec(conditionExec, bodyExec, label, session)
 
       case LoopStatement(body, label) =>
-        val bodyExec = transformTreeIntoExecutable(body, session).asInstanceOf[CompoundBodyExec]
+        val bodyExec = transformTreeIntoExecutable(body, args, context)
+          .asInstanceOf[CompoundBodyExec]
         new LoopStatementExec(bodyExec, label)
 
+      case ForStatement(query, variableNameOpt, body, label) =>
+        val queryExec =
+          new SingleStatementExec(
+            query.parsedPlan,
+            query.origin,
+            args,
+            isInternal = false,
+            context)
+        val bodyExec =
+          transformTreeIntoExecutable(body, args, context).asInstanceOf[CompoundBodyExec]
+        new ForStatementExec(queryExec, variableNameOpt, bodyExec, label, session, context)
+
       case leaveStatement: LeaveStatement =>
         new LeaveStatementExec(leaveStatement.label)
 
@@ -133,6 +182,8 @@ case class SqlScriptingInterpreter() {
         new SingleStatementExec(
           sparkStatement.parsedPlan,
           sparkStatement.origin,
-          isInternal = false)
+          args,
+          isInternal = false,
+          context)
     }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index b0233d2c51b75..d41933c6a135c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -175,6 +175,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) extends api.DataStr
         extraOptions.get("path"),
         None,
         None,
+        None,
         external = false)
       val cmd = CreateTable(
         UnresolvedIdentifier(originalMultipartIdentifier),
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index c54e09735a9be..39cefdaa892b2 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -425,6 +425,8 @@
 | org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis | kurtosis | SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct<kurtosis(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last | SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<last(col):int> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last_value | SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<last_value(col):int> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.ListAgg | listagg | SELECT listagg(col) FROM VALUES ('a'), ('b'), ('c') AS tab(col) | struct<listagg(col, NULL):string> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.ListAgg | string_agg | SELECT string_agg(col) FROM VALUES ('a'), ('b'), ('c') AS tab(col) | struct<string_agg(col, NULL):string> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Max | max | SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) | struct<max(col):int> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT max_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c', 20) AS tab(x, y) | struct<max_by(x, y):string> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Median | median | SELECT median(col) FROM VALUES (0), (10) AS tab(col) | struct<median(col):double> |
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
index 2fd2261708c91..ca51aa50ac1bb 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
@@ -442,77 +442,77 @@ Project [array_except(array(collate(aaa, utf8_lcase)), array(collate(AAA, utf8_l
 -- !query
 select 'a' collate unicode < 'A'
 -- !query analysis
-Project [(collate(a, unicode) < cast(A as string collate UNICODE)) AS (collate(a, unicode) < A)#x]
+Project [(collate(a, unicode) < A) AS (collate(a, unicode) < 'A' collate UNICODE)#x]
 +- OneRowRelation
 
 
 -- !query
 select 'a' collate unicode_ci = 'A'
 -- !query analysis
-Project [(collate(a, unicode_ci) = cast(A as string collate UNICODE_CI)) AS (collate(a, unicode_ci) = A)#x]
+Project [(collate(a, unicode_ci) = A) AS (collate(a, unicode_ci) = 'A' collate UNICODE_CI)#x]
 +- OneRowRelation
 
 
 -- !query
 select 'a' collate unicode_ai = 'å'
 -- !query analysis
-Project [(collate(a, unicode_ai) = cast(å as string collate UNICODE_AI)) AS (collate(a, unicode_ai) = å)#x]
+Project [(collate(a, unicode_ai) = å) AS (collate(a, unicode_ai) = 'å' collate UNICODE_AI)#x]
 +- OneRowRelation
 
 
 -- !query
 select 'a' collate unicode_ci_ai = 'Å'
 -- !query analysis
-Project [(collate(a, unicode_ci_ai) = cast(Å as string collate UNICODE_CI_AI)) AS (collate(a, unicode_ci_ai) = Å)#x]
+Project [(collate(a, unicode_ci_ai) = Å) AS (collate(a, unicode_ci_ai) = 'Å' collate UNICODE_CI_AI)#x]
 +- OneRowRelation
 
 
 -- !query
 select 'a' collate en < 'A'
 -- !query analysis
-Project [(collate(a, en) < cast(A as string collate en)) AS (collate(a, en) < A)#x]
+Project [(collate(a, en) < A) AS (collate(a, en) < 'A' collate en)#x]
 +- OneRowRelation
 
 
 -- !query
 select 'a' collate en_ci = 'A'
 -- !query analysis
-Project [(collate(a, en_ci) = cast(A as string collate en_CI)) AS (collate(a, en_ci) = A)#x]
+Project [(collate(a, en_ci) = A) AS (collate(a, en_ci) = 'A' collate en_CI)#x]
 +- OneRowRelation
 
 
 -- !query
 select 'a' collate en_ai = 'å'
 -- !query analysis
-Project [(collate(a, en_ai) = cast(å as string collate en_AI)) AS (collate(a, en_ai) = å)#x]
+Project [(collate(a, en_ai) = å) AS (collate(a, en_ai) = 'å' collate en_AI)#x]
 +- OneRowRelation
 
 
 -- !query
 select 'a' collate en_ci_ai = 'Å'
 -- !query analysis
-Project [(collate(a, en_ci_ai) = cast(Å as string collate en_CI_AI)) AS (collate(a, en_ci_ai) = Å)#x]
+Project [(collate(a, en_ci_ai) = Å) AS (collate(a, en_ci_ai) = 'Å' collate en_CI_AI)#x]
 +- OneRowRelation
 
 
 -- !query
 select 'Kypper' collate sv < 'Köpfe'
 -- !query analysis
-Project [(collate(Kypper, sv) < cast(Köpfe as string collate sv)) AS (collate(Kypper, sv) < Köpfe)#x]
+Project [(collate(Kypper, sv) < Köpfe) AS (collate(Kypper, sv) < 'Köpfe' collate sv)#x]
 +- OneRowRelation
 
 
 -- !query
 select 'Kypper' collate de > 'Köpfe'
 -- !query analysis
-Project [(collate(Kypper, de) > cast(Köpfe as string collate de)) AS (collate(Kypper, de) > Köpfe)#x]
+Project [(collate(Kypper, de) > Köpfe) AS (collate(Kypper, de) > 'Köpfe' collate de)#x]
 +- OneRowRelation
 
 
 -- !query
 select 'I' collate tr_ci = 'ı'
 -- !query analysis
-Project [(collate(I, tr_ci) = cast(ı as string collate tr_CI)) AS (collate(I, tr_ci) = ı)#x]
+Project [(collate(I, tr_ci) = ı) AS (collate(I, tr_ci) = 'ı' collate tr_CI)#x]
 +- OneRowRelation
 
 
@@ -826,7 +826,7 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d
 -- !query
 select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
 -- !query analysis
-Project [concat_ws(cast(  as string collate UTF8_LCASE), utf8_lcase#x, utf8_lcase#x) AS concat_ws( , utf8_lcase, utf8_lcase)#x]
+Project [concat_ws( , utf8_lcase#x, utf8_lcase#x) AS concat_ws( , utf8_lcase, utf8_lcase)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -868,7 +868,7 @@ Project [concat_ws(collate( , utf8_lcase), cast(utf8_binary#x as string collate
 -- !query
 select concat_ws(',', utf8_lcase, 'word'), concat_ws(',', utf8_binary, 'word') from t5
 -- !query analysis
-Project [concat_ws(cast(, as string collate UTF8_LCASE), utf8_lcase#x, cast(word as string collate UTF8_LCASE)) AS concat_ws(,, utf8_lcase, word)#x, concat_ws(,, utf8_binary#x, word) AS concat_ws(,, utf8_binary, word)#x]
+Project [concat_ws(,, utf8_lcase#x, word) AS concat_ws(,, utf8_lcase, word)#x, concat_ws(,, utf8_binary#x, word) AS concat_ws(,, utf8_binary, word)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -876,7 +876,7 @@ Project [concat_ws(cast(, as string collate UTF8_LCASE), utf8_lcase#x, cast(word
 -- !query
 select concat_ws(',', utf8_lcase, 'word' collate utf8_binary), concat_ws(',', utf8_binary, 'word' collate utf8_lcase) from t5
 -- !query analysis
-Project [concat_ws(,, cast(utf8_lcase#x as string), collate(word, utf8_binary)) AS concat_ws(,, utf8_lcase, collate(word, utf8_binary))#x, concat_ws(cast(, as string collate UTF8_LCASE), cast(utf8_binary#x as string collate UTF8_LCASE), collate(word, utf8_lcase)) AS concat_ws(,, utf8_binary, collate(word, utf8_lcase))#x]
+Project [concat_ws(,, cast(utf8_lcase#x as string), collate(word, utf8_binary)) AS concat_ws(,, utf8_lcase, collate(word, utf8_binary))#x, concat_ws(,, cast(utf8_binary#x as string collate UTF8_LCASE), collate(word, utf8_lcase)) AS concat_ws(,, utf8_binary, collate(word, utf8_lcase))#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -934,7 +934,7 @@ Project [elt(1, collate(utf8_binary#x, utf8_binary), cast(utf8_lcase#x as string
 -- !query
 select elt(1, utf8_binary, 'word'), elt(1, utf8_lcase, 'word') from t5
 -- !query analysis
-Project [elt(1, utf8_binary#x, word, true) AS elt(1, utf8_binary, word)#x, elt(1, utf8_lcase#x, cast(word as string collate UTF8_LCASE), true) AS elt(1, utf8_lcase, word)#x]
+Project [elt(1, utf8_binary#x, word, true) AS elt(1, utf8_binary, word)#x, elt(1, utf8_lcase#x, word, true) AS elt(1, utf8_lcase, 'word' collate UTF8_LCASE)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1024,7 +1024,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select split_part(utf8_binary, 'a', 3), split_part(utf8_lcase, 'a', 3) from t5
 -- !query analysis
-Project [split_part(utf8_binary#x, a, 3) AS split_part(utf8_binary, a, 3)#x, split_part(utf8_lcase#x, cast(a as string collate UTF8_LCASE), 3) AS split_part(utf8_lcase, a, 3)#x]
+Project [split_part(utf8_binary#x, a, 3) AS split_part(utf8_binary, a, 3)#x, split_part(utf8_lcase#x, a, 3) AS split_part(utf8_lcase, a, 3)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1122,7 +1122,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select contains(utf8_binary, 'a'), contains(utf8_lcase, 'a') from t5
 -- !query analysis
-Project [Contains(utf8_binary#x, a) AS contains(utf8_binary, a)#x, Contains(utf8_lcase#x, cast(a as string collate UTF8_LCASE)) AS contains(utf8_lcase, a)#x]
+Project [Contains(utf8_binary#x, a) AS contains(utf8_binary, a)#x, Contains(utf8_lcase#x, a) AS contains(utf8_lcase, a)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1220,7 +1220,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select substring_index(utf8_binary, 'a', 2), substring_index(utf8_lcase, 'a', 2) from t5
 -- !query analysis
-Project [substring_index(utf8_binary#x, a, 2) AS substring_index(utf8_binary, a, 2)#x, substring_index(utf8_lcase#x, cast(a as string collate UTF8_LCASE), 2) AS substring_index(utf8_lcase, a, 2)#x]
+Project [substring_index(utf8_binary#x, a, 2) AS substring_index(utf8_binary, a, 2)#x, substring_index(utf8_lcase#x, a, 2) AS substring_index(utf8_lcase, a, 2)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1318,7 +1318,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select instr(utf8_binary, 'a'), instr(utf8_lcase, 'a') from t5
 -- !query analysis
-Project [instr(utf8_binary#x, a) AS instr(utf8_binary, a)#x, instr(utf8_lcase#x, cast(a as string collate UTF8_LCASE)) AS instr(utf8_lcase, a)#x]
+Project [instr(utf8_binary#x, a) AS instr(utf8_binary, a)#x, instr(utf8_lcase#x, a) AS instr(utf8_lcase, a)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1384,7 +1384,7 @@ Project [find_in_set(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, u
 -- !query
 select find_in_set(utf8_binary, 'aaAaaAaA,i̇o'), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o') from t5
 -- !query analysis
-Project [find_in_set(utf8_binary#x, aaAaaAaA,i̇o) AS find_in_set(utf8_binary, aaAaaAaA,i̇o)#x, find_in_set(utf8_lcase#x, cast(aaAaaAaA,i̇o as string collate UTF8_LCASE)) AS find_in_set(utf8_lcase, aaAaaAaA,i̇o)#x]
+Project [find_in_set(utf8_binary#x, aaAaaAaA,i̇o) AS find_in_set(utf8_binary, aaAaaAaA,i̇o)#x, find_in_set(utf8_lcase#x, aaAaaAaA,i̇o) AS find_in_set(utf8_lcase, aaAaaAaA,i̇o)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1482,7 +1482,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select startswith(utf8_binary, 'aaAaaAaA'), startswith(utf8_lcase, 'aaAaaAaA') from t5
 -- !query analysis
-Project [StartsWith(utf8_binary#x, aaAaaAaA) AS startswith(utf8_binary, aaAaaAaA)#x, StartsWith(utf8_lcase#x, cast(aaAaaAaA as string collate UTF8_LCASE)) AS startswith(utf8_lcase, aaAaaAaA)#x]
+Project [StartsWith(utf8_binary#x, aaAaaAaA) AS startswith(utf8_binary, aaAaaAaA)#x, StartsWith(utf8_lcase#x, aaAaaAaA) AS startswith(utf8_lcase, aaAaaAaA)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1506,7 +1506,7 @@ Project [StartsWith(cast(utf8_binary#x as string collate UTF8_LCASE_RTRIM), coll
 -- !query
 select translate(utf8_lcase, utf8_lcase, '12345') from t5
 -- !query analysis
-Project [translate(utf8_lcase#x, utf8_lcase#x, cast(12345 as string collate UTF8_LCASE)) AS translate(utf8_lcase, utf8_lcase, 12345)#x]
+Project [translate(utf8_lcase#x, utf8_lcase#x, 12345) AS translate(utf8_lcase, utf8_lcase, 12345)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1572,7 +1572,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select translate(utf8_lcase, 'aaAaaAaA', '12345'), translate(utf8_binary, 'aaAaaAaA', '12345') from t5
 -- !query analysis
-Project [translate(utf8_lcase#x, cast(aaAaaAaA as string collate UTF8_LCASE), cast(12345 as string collate UTF8_LCASE)) AS translate(utf8_lcase, aaAaaAaA, 12345)#x, translate(utf8_binary#x, aaAaaAaA, 12345) AS translate(utf8_binary, aaAaaAaA, 12345)#x]
+Project [translate(utf8_lcase#x, aaAaaAaA, 12345) AS translate(utf8_lcase, aaAaaAaA, 12345)#x, translate(utf8_binary#x, aaAaaAaA, 12345) AS translate(utf8_binary, aaAaaAaA, 12345)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1580,7 +1580,7 @@ Project [translate(utf8_lcase#x, cast(aaAaaAaA as string collate UTF8_LCASE), ca
 -- !query
 select translate(utf8_lcase, 'aBc' collate utf8_binary, '12345'), translate(utf8_binary, 'aBc' collate utf8_lcase, '12345') from t5
 -- !query analysis
-Project [translate(cast(utf8_lcase#x as string), collate(aBc, utf8_binary), 12345) AS translate(utf8_lcase, collate(aBc, utf8_binary), 12345)#x, translate(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aBc, utf8_lcase), cast(12345 as string collate UTF8_LCASE)) AS translate(utf8_binary, collate(aBc, utf8_lcase), 12345)#x]
+Project [translate(cast(utf8_lcase#x as string), collate(aBc, utf8_binary), 12345) AS translate(utf8_lcase, collate(aBc, utf8_binary), 12345)#x, translate(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aBc, utf8_lcase), 12345) AS translate(utf8_binary, collate(aBc, utf8_lcase), 12345)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1588,7 +1588,7 @@ Project [translate(cast(utf8_lcase#x as string), collate(aBc, utf8_binary), 1234
 -- !query
 select translate(utf8_lcase, 'aBc ' collate utf8_binary_rtrim, '12345'), translate(utf8_binary, 'aBc' collate utf8_lcase, '12345') from t5
 -- !query analysis
-Project [translate(cast(utf8_lcase#x as string collate UTF8_BINARY_RTRIM), collate(aBc , utf8_binary_rtrim), cast(12345 as string collate UTF8_BINARY_RTRIM)) AS translate(utf8_lcase, collate(aBc , utf8_binary_rtrim), 12345)#x, translate(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aBc, utf8_lcase), cast(12345 as string collate UTF8_LCASE)) AS translate(utf8_binary, collate(aBc, utf8_lcase), 12345)#x]
+Project [translate(cast(utf8_lcase#x as string collate UTF8_BINARY_RTRIM), collate(aBc , utf8_binary_rtrim), 12345) AS translate(utf8_lcase, collate(aBc , utf8_binary_rtrim), 12345)#x, translate(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aBc, utf8_lcase), 12345) AS translate(utf8_binary, collate(aBc, utf8_lcase), 12345)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1638,7 +1638,7 @@ Project [replace(utf8_binary#x, collate(utf8_lcase#x, utf8_binary), abc) AS repl
 -- !query
 select replace(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 'abc') from t5
 -- !query analysis
-Project [replace(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase), cast(abc as string collate UTF8_LCASE)) AS replace(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase), abc)#x]
+Project [replace(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase), abc) AS replace(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase), abc)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1655,7 +1655,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputType" : "\"STRING COLLATE UNICODE_AI\"",
     "paramIndex" : "first",
     "requiredType" : "\"STRING\"",
-    "sqlExpr" : "\"replace(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), abc)\""
+    "sqlExpr" : "\"replace(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), 'abc' collate UNICODE_AI)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1670,7 +1670,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select replace(utf8_binary, 'aaAaaAaA', 'abc'), replace(utf8_lcase, 'aaAaaAaA', 'abc') from t5
 -- !query analysis
-Project [replace(utf8_binary#x, aaAaaAaA, abc) AS replace(utf8_binary, aaAaaAaA, abc)#x, replace(utf8_lcase#x, cast(aaAaaAaA as string collate UTF8_LCASE), cast(abc as string collate UTF8_LCASE)) AS replace(utf8_lcase, aaAaaAaA, abc)#x]
+Project [replace(utf8_binary#x, aaAaaAaA, abc) AS replace(utf8_binary, aaAaaAaA, abc)#x, replace(utf8_lcase#x, aaAaaAaA, abc) AS replace(utf8_lcase, aaAaaAaA, abc)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1678,7 +1678,7 @@ Project [replace(utf8_binary#x, aaAaaAaA, abc) AS replace(utf8_binary, aaAaaAaA,
 -- !query
 select replace(utf8_binary, 'aaAaaAaA' collate utf8_lcase, 'abc'), replace(utf8_lcase, 'aaAaaAaA' collate utf8_binary, 'abc') from t5
 -- !query analysis
-Project [replace(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aaAaaAaA, utf8_lcase), cast(abc as string collate UTF8_LCASE)) AS replace(utf8_binary, collate(aaAaaAaA, utf8_lcase), abc)#x, replace(cast(utf8_lcase#x as string), collate(aaAaaAaA, utf8_binary), abc) AS replace(utf8_lcase, collate(aaAaaAaA, utf8_binary), abc)#x]
+Project [replace(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aaAaaAaA, utf8_lcase), abc) AS replace(utf8_binary, collate(aaAaaAaA, utf8_lcase), abc)#x, replace(cast(utf8_lcase#x as string), collate(aaAaaAaA, utf8_binary), abc) AS replace(utf8_lcase, collate(aaAaaAaA, utf8_binary), abc)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1686,7 +1686,7 @@ Project [replace(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aaAaa
 -- !query
 select replace(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim, 'abc'), replace(utf8_lcase, 'aaAaaAaA' collate utf8_binary, 'abc') from t5
 -- !query analysis
-Project [replace(cast(utf8_binary#x as string collate UTF8_LCASE_RTRIM), collate(aaAaaAaA , utf8_lcase_rtrim), cast(abc as string collate UTF8_LCASE_RTRIM)) AS replace(utf8_binary, collate(aaAaaAaA , utf8_lcase_rtrim), abc)#x, replace(cast(utf8_lcase#x as string), collate(aaAaaAaA, utf8_binary), abc) AS replace(utf8_lcase, collate(aaAaaAaA, utf8_binary), abc)#x]
+Project [replace(cast(utf8_binary#x as string collate UTF8_LCASE_RTRIM), collate(aaAaaAaA , utf8_lcase_rtrim), abc) AS replace(utf8_binary, collate(aaAaaAaA , utf8_lcase_rtrim), abc)#x, replace(cast(utf8_lcase#x as string), collate(aaAaaAaA, utf8_binary), abc) AS replace(utf8_lcase, collate(aaAaaAaA, utf8_binary), abc)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -1768,7 +1768,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select endswith(utf8_binary, 'aaAaaAaA'), endswith(utf8_lcase, 'aaAaaAaA') from t5
 -- !query analysis
-Project [EndsWith(utf8_binary#x, aaAaaAaA) AS endswith(utf8_binary, aaAaaAaA)#x, EndsWith(utf8_lcase#x, cast(aaAaaAaA as string collate UTF8_LCASE)) AS endswith(utf8_lcase, aaAaaAaA)#x]
+Project [EndsWith(utf8_binary#x, aaAaaAaA) AS endswith(utf8_binary, aaAaaAaA)#x, EndsWith(utf8_lcase#x, aaAaaAaA) AS endswith(utf8_lcase, aaAaaAaA)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -2042,7 +2042,7 @@ Project [overlay(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_
 -- !query
 select overlay(utf8_binary, 'a', 2), overlay(utf8_lcase, 'a', 2) from t5
 -- !query analysis
-Project [overlay(utf8_binary#x, a, 2, -1) AS overlay(utf8_binary, a, 2, -1)#x, overlay(utf8_lcase#x, cast(a as string collate UTF8_LCASE), 2, -1) AS overlay(utf8_lcase, a, 2, -1)#x]
+Project [overlay(utf8_binary#x, a, 2, -1) AS overlay(utf8_binary, a, 2, -1)#x, overlay(utf8_lcase#x, a, 2, -1) AS overlay(utf8_lcase, 'a' collate UTF8_LCASE, 2, -1)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -2143,6 +2143,14 @@ Project [octet_length(collate(utf8_binary#x, utf8_lcase)) AS octet_length(collat
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
 
+-- !query
+select octet_length(utf8_binary collate utf8_lcase_rtrim), octet_length(utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query analysis
+Project [octet_length(collate(utf8_binary#x, utf8_lcase_rtrim)) AS octet_length(collate(utf8_binary, utf8_lcase_rtrim))#x, octet_length(collate(utf8_lcase#x, utf8_binary_rtrim)) AS octet_length(collate(utf8_lcase, utf8_binary_rtrim))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
 -- !query
 select luhn_check(num) from t9
 -- !query analysis
@@ -2204,7 +2212,7 @@ Project [levenshtein(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, u
 -- !query
 select levenshtein(utf8_binary, 'a'), levenshtein(utf8_lcase, 'a') from t5
 -- !query analysis
-Project [levenshtein(utf8_binary#x, a, None) AS levenshtein(utf8_binary, a)#x, levenshtein(utf8_lcase#x, cast(a as string collate UTF8_LCASE), None) AS levenshtein(utf8_lcase, a)#x]
+Project [levenshtein(utf8_binary#x, a, None) AS levenshtein(utf8_binary, a)#x, levenshtein(utf8_lcase#x, a, None) AS levenshtein(utf8_lcase, a)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -2233,6 +2241,14 @@ Project [is_valid_utf8(collate(utf8_binary#x, utf8_lcase)) AS is_valid_utf8(coll
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
 
+-- !query
+select is_valid_utf8(utf8_binary collate utf8_lcase_rtrim), is_valid_utf8(utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query analysis
+Project [is_valid_utf8(collate(utf8_binary#x, utf8_lcase_rtrim)) AS is_valid_utf8(collate(utf8_binary, utf8_lcase_rtrim))#x, is_valid_utf8(collate(utf8_lcase#x, utf8_binary_rtrim)) AS is_valid_utf8(collate(utf8_lcase, utf8_binary_rtrim))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
 -- !query
 select make_valid_utf8(utf8_binary), make_valid_utf8(utf8_lcase) from t5
 -- !query analysis
@@ -2249,6 +2265,14 @@ Project [make_valid_utf8(collate(utf8_binary#x, utf8_lcase)) AS make_valid_utf8(
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
 
+-- !query
+select make_valid_utf8(utf8_binary collate utf8_lcase_rtrim), make_valid_utf8(utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query analysis
+Project [make_valid_utf8(collate(utf8_binary#x, utf8_lcase_rtrim)) AS make_valid_utf8(collate(utf8_binary, utf8_lcase_rtrim))#x, make_valid_utf8(collate(utf8_lcase#x, utf8_binary_rtrim)) AS make_valid_utf8(collate(utf8_lcase, utf8_binary_rtrim))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
 -- !query
 select validate_utf8(utf8_binary), validate_utf8(utf8_lcase) from t5
 -- !query analysis
@@ -2265,6 +2289,14 @@ Project [validate_utf8(collate(utf8_binary#x, utf8_lcase)) AS validate_utf8(coll
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
 
+-- !query
+select validate_utf8(utf8_binary collate utf8_lcase_rtrim), validate_utf8(utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query analysis
+Project [validate_utf8(collate(utf8_binary#x, utf8_lcase_rtrim)) AS validate_utf8(collate(utf8_binary, utf8_lcase_rtrim))#x, validate_utf8(collate(utf8_lcase#x, utf8_binary_rtrim)) AS validate_utf8(collate(utf8_lcase, utf8_binary_rtrim))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
 -- !query
 select try_validate_utf8(utf8_binary), try_validate_utf8(utf8_lcase) from t5
 -- !query analysis
@@ -2281,6 +2313,14 @@ Project [try_validate_utf8(collate(utf8_binary#x, utf8_lcase)) AS try_validate_u
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
 
+-- !query
+select try_validate_utf8(utf8_binary collate utf8_lcase_rtrim), try_validate_utf8(utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query analysis
+Project [try_validate_utf8(collate(utf8_binary#x, utf8_lcase_rtrim)) AS try_validate_utf8(collate(utf8_binary, utf8_lcase_rtrim))#x, try_validate_utf8(collate(utf8_lcase#x, utf8_binary_rtrim)) AS try_validate_utf8(collate(utf8_lcase, utf8_binary_rtrim))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
 -- !query
 select substr(utf8_binary, 2, 2), substr(utf8_lcase, 2, 2) from t5
 -- !query analysis
@@ -2390,7 +2430,7 @@ Project [lpad(collate(utf8_binary#x, utf8_binary_rtrim), 8, collate(utf8_lcase#x
 -- !query
 select rpad(utf8_binary, 8, 'a'), rpad(utf8_lcase, 8, 'a') from t5
 -- !query analysis
-Project [rpad(utf8_binary#x, 8, a) AS rpad(utf8_binary, 8, a)#x, rpad(utf8_lcase#x, 8, cast(a as string collate UTF8_LCASE)) AS rpad(utf8_lcase, 8, a)#x]
+Project [rpad(utf8_binary#x, 8, a) AS rpad(utf8_binary, 8, a)#x, rpad(utf8_lcase#x, 8, a) AS rpad(utf8_lcase, 8, a)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -2464,7 +2504,7 @@ Project [lpad(collate(utf8_binary#x, utf8_binary_rtrim), 8, collate(utf8_lcase#x
 -- !query
 select lpad(utf8_binary, 8, 'a'), lpad(utf8_lcase, 8, 'a') from t5
 -- !query analysis
-Project [lpad(utf8_binary#x, 8, a) AS lpad(utf8_binary, 8, a)#x, lpad(utf8_lcase#x, 8, cast(a as string collate UTF8_LCASE)) AS lpad(utf8_lcase, 8, a)#x]
+Project [lpad(utf8_binary#x, 8, a) AS lpad(utf8_binary, 8, a)#x, lpad(utf8_lcase#x, 8, a) AS lpad(utf8_lcase, 8, a)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -2554,7 +2594,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select locate(utf8_binary, 'a'), locate(utf8_lcase, 'a') from t5
 -- !query analysis
-Project [locate(utf8_binary#x, a, 1) AS locate(utf8_binary, a, 1)#x, locate(utf8_lcase#x, cast(a as string collate UTF8_LCASE), 1) AS locate(utf8_lcase, a, 1)#x]
+Project [locate(utf8_binary#x, a, 1) AS locate(utf8_binary, a, 1)#x, locate(utf8_lcase#x, a, 1) AS locate(utf8_lcase, a, 1)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -2660,7 +2700,7 @@ Project [trim(collate(utf8_lcase#x, utf8_binary_rtrim), Some(collate(utf8_binary
 -- !query
 select TRIM('ABc', utf8_binary), TRIM('ABc', utf8_lcase) from t5
 -- !query analysis
-Project [trim(utf8_binary#x, Some(ABc)) AS TRIM(BOTH ABc FROM utf8_binary)#x, trim(utf8_lcase#x, Some(cast(ABc as string collate UTF8_LCASE))) AS TRIM(BOTH ABc FROM utf8_lcase)#x]
+Project [trim(utf8_binary#x, Some(ABc)) AS TRIM(BOTH ABc FROM utf8_binary)#x, trim(utf8_lcase#x, Some(ABc)) AS TRIM(BOTH ABc FROM utf8_lcase)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -2856,7 +2896,7 @@ Project [ltrim(collate(utf8_lcase#x, utf8_binary_rtrim), Some(collate(utf8_binar
 -- !query
 select LTRIM('ABc', utf8_binary), LTRIM('ABc', utf8_lcase) from t5
 -- !query analysis
-Project [ltrim(utf8_binary#x, Some(ABc)) AS TRIM(LEADING ABc FROM utf8_binary)#x, ltrim(utf8_lcase#x, Some(cast(ABc as string collate UTF8_LCASE))) AS TRIM(LEADING ABc FROM utf8_lcase)#x]
+Project [ltrim(utf8_binary#x, Some(ABc)) AS TRIM(LEADING ABc FROM utf8_binary)#x, ltrim(utf8_lcase#x, Some(ABc)) AS TRIM(LEADING ABc FROM utf8_lcase)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
@@ -2954,7 +2994,7 @@ Project [rtrim(collate(utf8_lcase#x, utf8_binary_rtrim), Some(collate(utf8_binar
 -- !query
 select RTRIM('ABc', utf8_binary), RTRIM('ABc', utf8_lcase) from t5
 -- !query analysis
-Project [rtrim(utf8_binary#x, Some(ABc)) AS TRIM(TRAILING ABc FROM utf8_binary)#x, rtrim(utf8_lcase#x, Some(cast(ABc as string collate UTF8_LCASE))) AS TRIM(TRAILING ABc FROM utf8_lcase)#x]
+Project [rtrim(utf8_binary#x, Some(ABc)) AS TRIM(TRAILING ABc FROM utf8_binary)#x, rtrim(utf8_lcase#x, Some(ABc)) AS TRIM(TRAILING ABc FROM utf8_lcase)#x]
 +- SubqueryAlias spark_catalog.default.t5
    +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-command.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-command.sql.out
index 0b539267e720f..c12076b85b1df 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-command.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-command.sql.out
@@ -10,7 +10,7 @@ CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`cte_tbl`, ErrorI
       :        +- OneRowRelation
       +- Project [col#x]
          +- SubqueryAlias s
-            +- CTERelationRef xxxx, true, [col#x], false
+            +- CTERelationRef xxxx, true, [col#x], false, false
 
 
 -- !query
@@ -32,7 +32,7 @@ CreateViewCommand `cte_view`, WITH s AS (SELECT 42 AS col) SELECT * FROM s, fals
       :        +- OneRowRelation
       +- Project [col#x]
          +- SubqueryAlias s
-            +- CTERelationRef xxxx, true, [col#x], false
+            +- CTERelationRef xxxx, true, [col#x], false, false
 
 
 -- !query
@@ -49,7 +49,7 @@ Project [col#x]
             :        +- OneRowRelation
             +- Project [col#x]
                +- SubqueryAlias s
-                  +- CTERelationRef xxxx, true, [col#x], false
+                  +- CTERelationRef xxxx, true, [col#x], false, false
 
 
 -- !query
@@ -64,7 +64,7 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d
    :        +- OneRowRelation
    +- Project [col#x]
       +- SubqueryAlias S
-         +- CTERelationRef xxxx, true, [col#x], false
+         +- CTERelationRef xxxx, true, [col#x], false, false
 
 
 -- !query
@@ -86,7 +86,7 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d
    :        +- OneRowRelation
    +- Project [col#x]
       +- SubqueryAlias s
-         +- CTERelationRef xxxx, true, [col#x], false
+         +- CTERelationRef xxxx, true, [col#x], false, false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out
index 2cbcbedff81b2..0d39ff7ad5101 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out
@@ -15,10 +15,10 @@ WithCTE
 :  +- SubqueryAlias t
 :     +- Project [1#x]
 :        +- SubqueryAlias t2
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 +- Project [1#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [1#x], false
+      +- CTERelationRef xxxx, true, [1#x], false, false
 
 
 -- !query
@@ -37,7 +37,7 @@ Aggregate [max(c#x) AS max(c)#x]
       :           +- OneRowRelation
       +- Project [c#x]
          +- SubqueryAlias t
-            +- CTERelationRef xxxx, true, [c#x], false
+            +- CTERelationRef xxxx, true, [c#x], false, false
 
 
 -- !query
@@ -54,7 +54,7 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
 :     :        +- OneRowRelation
 :     +- Project [1#x]
 :        +- SubqueryAlias t
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 +- OneRowRelation
 
 
@@ -140,10 +140,10 @@ WithCTE
 :  +- SubqueryAlias t2
 :     +- Project [2#x]
 :        +- SubqueryAlias t
-:           +- CTERelationRef xxxx, true, [2#x], false
+:           +- CTERelationRef xxxx, true, [2#x], false, false
 +- Project [2#x]
    +- SubqueryAlias t2
-      +- CTERelationRef xxxx, true, [2#x], false
+      +- CTERelationRef xxxx, true, [2#x], false, false
 
 
 -- !query
@@ -178,11 +178,11 @@ WithCTE
 :        :           :           +- OneRowRelation
 :        :           +- Project [c#x]
 :        :              +- SubqueryAlias t
-:        :                 +- CTERelationRef xxxx, true, [c#x], false
+:        :                 +- CTERelationRef xxxx, true, [c#x], false, false
 :        +- OneRowRelation
 +- Project [scalarsubquery()#x]
    +- SubqueryAlias t2
-      +- CTERelationRef xxxx, true, [scalarsubquery()#x], false
+      +- CTERelationRef xxxx, true, [scalarsubquery()#x], false, false
 
 
 -- !query
@@ -215,15 +215,15 @@ WithCTE
 :  +- SubqueryAlias t2
 :     +- Project [3#x]
 :        +- SubqueryAlias t
-:           +- CTERelationRef xxxx, true, [3#x], false
+:           +- CTERelationRef xxxx, true, [3#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias t2
 :     +- Project [3#x]
 :        +- SubqueryAlias t2
-:           +- CTERelationRef xxxx, true, [3#x], false
+:           +- CTERelationRef xxxx, true, [3#x], false, false
 +- Project [3#x]
    +- SubqueryAlias t2
-      +- CTERelationRef xxxx, true, [3#x], false
+      +- CTERelationRef xxxx, true, [3#x], false, false
 
 
 -- !query
@@ -248,7 +248,7 @@ WithCTE
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [c#x]
          +- SubqueryAlias t
-            +- CTERelationRef xxxx, true, [c#x], false
+            +- CTERelationRef xxxx, true, [c#x], false, false
 
 
 -- !query
@@ -277,7 +277,7 @@ WithCTE
          +- SubqueryAlias __auto_generated_subquery_name
             +- Project [c#x]
                +- SubqueryAlias t
-                  +- CTERelationRef xxxx, true, [c#x], false
+                  +- CTERelationRef xxxx, true, [c#x], false, false
 
 
 -- !query
@@ -312,7 +312,7 @@ WithCTE
          +- SubqueryAlias __auto_generated_subquery_name
             +- Project [c#x]
                +- SubqueryAlias t
-                  +- CTERelationRef xxxx, true, [c#x], false
+                  +- CTERelationRef xxxx, true, [c#x], false, false
 
 
 -- !query
@@ -335,7 +335,7 @@ WithCTE
    :     :        +- OneRowRelation
    :     +- Project [2#x]
    :        +- SubqueryAlias t
-   :           +- CTERelationRef xxxx, true, [2#x], false
+   :           +- CTERelationRef xxxx, true, [2#x], false, false
    +- OneRowRelation
 
 
@@ -362,7 +362,7 @@ WithCTE
    :     :     :        +- OneRowRelation
    :     :     +- Project [2#x]
    :     :        +- SubqueryAlias t
-   :     :           +- CTERelationRef xxxx, true, [2#x], false
+   :     :           +- CTERelationRef xxxx, true, [2#x], false, false
    :     +- OneRowRelation
    +- OneRowRelation
 
@@ -396,7 +396,7 @@ WithCTE
    :        :     :        +- OneRowRelation
    :        :     +- Project [3#x]
    :        :        +- SubqueryAlias t
-   :        :           +- CTERelationRef xxxx, true, [3#x], false
+   :        :           +- CTERelationRef xxxx, true, [3#x], false, false
    :        +- OneRowRelation
    +- OneRowRelation
 
@@ -425,9 +425,9 @@ WithCTE
       :     :           +- OneRowRelation
       :     +- Project [c#x]
       :        +- SubqueryAlias t
-      :           +- CTERelationRef xxxx, true, [c#x], false
+      :           +- CTERelationRef xxxx, true, [c#x], false, false
       +- SubqueryAlias t
-         +- CTERelationRef xxxx, true, [c#x], false
+         +- CTERelationRef xxxx, true, [c#x], false, false
 
 
 -- !query
@@ -448,14 +448,14 @@ WithCTE
 :  +- SubqueryAlias t
 :     +- Project [1#x]
 :        +- SubqueryAlias t2
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias t2
 :     +- Project [2 AS 2#x]
 :        +- OneRowRelation
 +- Project [1#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [1#x], false
+      +- CTERelationRef xxxx, true, [1#x], false, false
 
 
 -- !query
@@ -480,10 +480,10 @@ WithCTE
 :  +- SubqueryAlias t
 :     +- Project [2#x]
 :        +- SubqueryAlias aBC
-:           +- CTERelationRef xxxx, true, [2#x], false
+:           +- CTERelationRef xxxx, true, [2#x], false, false
 +- Project [2#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [2#x], false
+      +- CTERelationRef xxxx, true, [2#x], false, false
 
 
 -- !query
@@ -506,7 +506,7 @@ WithCTE
    :     :        +- OneRowRelation
    :     +- Project [2#x]
    :        +- SubqueryAlias aBC
-   :           +- CTERelationRef xxxx, true, [2#x], false
+   :           +- CTERelationRef xxxx, true, [2#x], false, false
    +- OneRowRelation
 
 
@@ -530,15 +530,15 @@ WithCTE
 :  +- SubqueryAlias t3
 :     +- Project [1#x]
 :        +- SubqueryAlias t1
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias t2
 :     +- Project [1#x]
 :        +- SubqueryAlias t3
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 +- Project [1#x]
    +- SubqueryAlias t2
-      +- CTERelationRef xxxx, true, [1#x], false
+      +- CTERelationRef xxxx, true, [1#x], false, false
 
 
 -- !query
@@ -561,12 +561,12 @@ WithCTE
 :  +- SubqueryAlias cte_inner
 :     +- Project [1#x]
 :        +- SubqueryAlias cte_outer
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 +- Project [1#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [1#x]
          +- SubqueryAlias cte_inner
-            +- CTERelationRef xxxx, true, [1#x], false
+            +- CTERelationRef xxxx, true, [1#x], false, false
 
 
 -- !query
@@ -594,19 +594,19 @@ WithCTE
 :  +- SubqueryAlias cte_inner_inner
 :     +- Project [1#x]
 :        +- SubqueryAlias cte_outer
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias cte_inner
 :     +- Project [1#x]
 :        +- SubqueryAlias __auto_generated_subquery_name
 :           +- Project [1#x]
 :              +- SubqueryAlias cte_inner_inner
-:                 +- CTERelationRef xxxx, true, [1#x], false
+:                 +- CTERelationRef xxxx, true, [1#x], false, false
 +- Project [1#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [1#x]
          +- SubqueryAlias cte_inner
-            +- CTERelationRef xxxx, true, [1#x], false
+            +- CTERelationRef xxxx, true, [1#x], false, false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out
index 88d7bf9f929ad..633352a8a3b6f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out
@@ -15,10 +15,10 @@ WithCTE
 :  +- SubqueryAlias t
 :     +- Project [1#x]
 :        +- SubqueryAlias t2
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 +- Project [1#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [1#x], false
+      +- CTERelationRef xxxx, true, [1#x], false, false
 
 
 -- !query
@@ -37,7 +37,7 @@ Aggregate [max(c#x) AS max(c)#x]
       :           +- OneRowRelation
       +- Project [c#x]
          +- SubqueryAlias t
-            +- CTERelationRef xxxx, true, [c#x], false
+            +- CTERelationRef xxxx, true, [c#x], false, false
 
 
 -- !query
@@ -54,7 +54,7 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
 :     :        +- OneRowRelation
 :     +- Project [1#x]
 :        +- SubqueryAlias t
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 +- OneRowRelation
 
 
@@ -171,11 +171,11 @@ WithCTE
 :        :           :           +- OneRowRelation
 :        :           +- Project [c#x]
 :        :              +- SubqueryAlias t
-:        :                 +- CTERelationRef xxxx, true, [c#x], false
+:        :                 +- CTERelationRef xxxx, true, [c#x], false, false
 :        +- OneRowRelation
 +- Project [scalarsubquery()#x]
    +- SubqueryAlias t2
-      +- CTERelationRef xxxx, true, [scalarsubquery()#x], false
+      +- CTERelationRef xxxx, true, [scalarsubquery()#x], false, false
 
 
 -- !query
@@ -225,7 +225,7 @@ WithCTE
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [c#x]
          +- SubqueryAlias t
-            +- CTERelationRef xxxx, true, [c#x], false
+            +- CTERelationRef xxxx, true, [c#x], false, false
 
 
 -- !query
@@ -254,7 +254,7 @@ WithCTE
          +- SubqueryAlias __auto_generated_subquery_name
             +- Project [c#x]
                +- SubqueryAlias t
-                  +- CTERelationRef xxxx, true, [c#x], false
+                  +- CTERelationRef xxxx, true, [c#x], false, false
 
 
 -- !query
@@ -289,7 +289,7 @@ WithCTE
          +- SubqueryAlias __auto_generated_subquery_name
             +- Project [c#x]
                +- SubqueryAlias t
-                  +- CTERelationRef xxxx, true, [c#x], false
+                  +- CTERelationRef xxxx, true, [c#x], false, false
 
 
 -- !query
@@ -392,14 +392,14 @@ WithCTE
 :  +- SubqueryAlias t
 :     +- Project [1#x]
 :        +- SubqueryAlias t2
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias t2
 :     +- Project [2 AS 2#x]
 :        +- OneRowRelation
 +- Project [1#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [1#x], false
+      +- CTERelationRef xxxx, true, [1#x], false, false
 
 
 -- !query
@@ -462,15 +462,15 @@ WithCTE
 :  +- SubqueryAlias t3
 :     +- Project [1#x]
 :        +- SubqueryAlias t1
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias t2
 :     +- Project [1#x]
 :        +- SubqueryAlias t3
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 +- Project [1#x]
    +- SubqueryAlias t2
-      +- CTERelationRef xxxx, true, [1#x], false
+      +- CTERelationRef xxxx, true, [1#x], false, false
 
 
 -- !query
@@ -493,12 +493,12 @@ WithCTE
 :  +- SubqueryAlias cte_inner
 :     +- Project [1#x]
 :        +- SubqueryAlias cte_outer
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 +- Project [1#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [1#x]
          +- SubqueryAlias cte_inner
-            +- CTERelationRef xxxx, true, [1#x], false
+            +- CTERelationRef xxxx, true, [1#x], false, false
 
 
 -- !query
@@ -526,19 +526,19 @@ WithCTE
 :  +- SubqueryAlias cte_inner_inner
 :     +- Project [1#x]
 :        +- SubqueryAlias cte_outer
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias cte_inner
 :     +- Project [1#x]
 :        +- SubqueryAlias __auto_generated_subquery_name
 :           +- Project [1#x]
 :              +- SubqueryAlias cte_inner_inner
-:                 +- CTERelationRef xxxx, true, [1#x], false
+:                 +- CTERelationRef xxxx, true, [1#x], false, false
 +- Project [1#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [1#x]
          +- SubqueryAlias cte_inner
-            +- CTERelationRef xxxx, true, [1#x], false
+            +- CTERelationRef xxxx, true, [1#x], false, false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out
index 885f34a28d67d..ded612ec8f8b6 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out
@@ -17,6 +17,19 @@ CreateViewCommand `t2`, select * from values 0, 1 as t(id), false, false, LocalT
          +- LocalRelation [id#x]
 
 
+-- !query
+create temporary view t3 as select * from t
+-- !query analysis
+CreateViewCommand `t3`, select * from t, false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [id#x]
+      +- SubqueryAlias t
+         +- View (`t`, [id#x])
+            +- Project [cast(id#x as int) AS id#x]
+               +- Project [id#x]
+                  +- SubqueryAlias t
+                     +- LocalRelation [id#x]
+
+
 -- !query
 WITH s AS (SELECT 1 FROM s) SELECT * FROM s
 -- !query analysis
@@ -73,7 +86,28 @@ WithCTE
 :                       +- LocalRelation [id#x]
 +- Project [1#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [1#x], false
+      +- CTERelationRef xxxx, true, [1#x], false, false
+
+
+-- !query
+WITH t AS (SELECT 1) SELECT * FROM t3
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+:  +- SubqueryAlias t
+:     +- Project [1 AS 1#x]
+:        +- OneRowRelation
++- Project [id#x]
+   +- SubqueryAlias t3
+      +- View (`t3`, [id#x])
+         +- Project [cast(id#x as int) AS id#x]
+            +- Project [id#x]
+               +- SubqueryAlias t
+                  +- View (`t`, [id#x])
+                     +- Project [cast(id#x as int) AS id#x]
+                        +- Project [id#x]
+                           +- SubqueryAlias t
+                              +- LocalRelation [id#x]
 
 
 -- !query
@@ -113,13 +147,13 @@ WithCTE
 :  +- SubqueryAlias t2
 :     +- Project [2 AS 2#x]
 :        +- SubqueryAlias t1
-:           +- CTERelationRef xxxx, true, [id#x], false
+:           +- CTERelationRef xxxx, true, [id#x], false, false
 +- Project [id#x, 2#x]
    +- Join Cross
       :- SubqueryAlias t1
-      :  +- CTERelationRef xxxx, true, [id#x], false
+      :  +- CTERelationRef xxxx, true, [id#x], false, false
       +- SubqueryAlias t2
-         +- CTERelationRef xxxx, true, [2#x], false
+         +- CTERelationRef xxxx, true, [2#x], false, false
 
 
 -- !query
@@ -157,10 +191,10 @@ WithCTE
    +- Join Cross
       :- SubqueryAlias t1
       :  +- SubqueryAlias CTE1
-      :     +- CTERelationRef xxxx, true, [id#x], false
+      :     +- CTERelationRef xxxx, true, [id#x], false, false
       +- SubqueryAlias t2
          +- SubqueryAlias CTE1
-            +- CTERelationRef xxxx, true, [id#x], false
+            +- CTERelationRef xxxx, true, [id#x], false, false
 
 
 -- !query
@@ -176,7 +210,7 @@ WithCTE
 +- Project [x#x]
    +- Filter (x#x = 1)
       +- SubqueryAlias t
-         +- CTERelationRef xxxx, true, [x#x], false
+         +- CTERelationRef xxxx, true, [x#x], false, false
 
 
 -- !query
@@ -192,7 +226,7 @@ WithCTE
 +- Project [x#x, y#x]
    +- Filter ((x#x = 1) AND (y#x = 2))
       +- SubqueryAlias t
-         +- CTERelationRef xxxx, true, [x#x, y#x], false
+         +- CTERelationRef xxxx, true, [x#x, y#x], false, false
 
 
 -- !query
@@ -207,7 +241,7 @@ WithCTE
 :           +- OneRowRelation
 +- Project [x#x, x#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [x#x, x#x], false
+      +- CTERelationRef xxxx, true, [x#x, x#x], false, false
 
 
 -- !query
@@ -310,46 +344,46 @@ WithCTE
 :     +- Project [c8#x AS c7#x]
 :        +- Project [c8#x]
 :           +- SubqueryAlias w8
-:              +- CTERelationRef xxxx, true, [c8#x], false
+:              +- CTERelationRef xxxx, true, [c8#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias w6
 :     +- Project [c7#x AS c6#x]
 :        +- Project [c7#x]
 :           +- SubqueryAlias w7
-:              +- CTERelationRef xxxx, true, [c7#x], false
+:              +- CTERelationRef xxxx, true, [c7#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias w5
 :     +- Project [c6#x AS c5#x]
 :        +- Project [c6#x]
 :           +- SubqueryAlias w6
-:              +- CTERelationRef xxxx, true, [c6#x], false
+:              +- CTERelationRef xxxx, true, [c6#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias w4
 :     +- Project [c5#x AS c4#x]
 :        +- Project [c5#x]
 :           +- SubqueryAlias w5
-:              +- CTERelationRef xxxx, true, [c5#x], false
+:              +- CTERelationRef xxxx, true, [c5#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias w3
 :     +- Project [c4#x AS c3#x]
 :        +- Project [c4#x]
 :           +- SubqueryAlias w4
-:              +- CTERelationRef xxxx, true, [c4#x], false
+:              +- CTERelationRef xxxx, true, [c4#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias w2
 :     +- Project [c3#x AS c2#x]
 :        +- Project [c3#x]
 :           +- SubqueryAlias w3
-:              +- CTERelationRef xxxx, true, [c3#x], false
+:              +- CTERelationRef xxxx, true, [c3#x], false, false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias w1
 :     +- Project [c2#x AS c1#x]
 :        +- Project [c2#x]
 :           +- SubqueryAlias w2
-:              +- CTERelationRef xxxx, true, [c2#x], false
+:              +- CTERelationRef xxxx, true, [c2#x], false, false
 +- Project [c1#x]
    +- SubqueryAlias w1
-      +- CTERelationRef xxxx, true, [c1#x], false
+      +- CTERelationRef xxxx, true, [c1#x], false, false
 
 
 -- !query
@@ -386,7 +420,7 @@ WithCTE
 +- Project [42#x, 10#x]
    +- Join Inner
       :- SubqueryAlias same_name
-      :  +- CTERelationRef xxxx, true, [42#x], false
+      :  +- CTERelationRef xxxx, true, [42#x], false, false
       +- SubqueryAlias same_name
          +- Project [10 AS 10#x]
             +- OneRowRelation
@@ -425,7 +459,7 @@ WithCTE
 :        +- OneRowRelation
 +- Project [x#x, typeof(x#x) AS typeof(x)#x]
    +- SubqueryAlias q
-      +- CTERelationRef xxxx, true, [x#x], false
+      +- CTERelationRef xxxx, true, [x#x], false, false
 
 
 -- !query
@@ -485,7 +519,7 @@ Project [y#x]
       :        +- OneRowRelation
       +- Project [(x#x + 1) AS y#x]
          +- SubqueryAlias q
-            +- CTERelationRef xxxx, true, [x#x], false
+            +- CTERelationRef xxxx, true, [x#x], false, false
 
 
 -- !query
@@ -499,7 +533,7 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
 :     :        +- OneRowRelation
 :     +- Project [x#x]
 :        +- SubqueryAlias q
-:           +- CTERelationRef xxxx, true, [x#x], false
+:           +- CTERelationRef xxxx, true, [x#x], false, false
 +- OneRowRelation
 
 
@@ -514,7 +548,7 @@ Project [1 IN (list#x []) AS (1 IN (listquery()))#x]
 :     :        +- OneRowRelation
 :     +- Project [1#x]
 :        +- SubqueryAlias q
-:           +- CTERelationRef xxxx, true, [1#x], false
+:           +- CTERelationRef xxxx, true, [1#x], false, false
 +- OneRowRelation
 
 
@@ -562,14 +596,14 @@ WithCTE
       :- Join Inner
       :  :- SubqueryAlias x
       :  :  +- SubqueryAlias T1
-      :  :     +- CTERelationRef xxxx, true, [a#x], false
+      :  :     +- CTERelationRef xxxx, true, [a#x], false, false
       :  +- SubqueryAlias y
       :     +- Project [b#x]
       :        +- SubqueryAlias T1
-      :           +- CTERelationRef xxxx, true, [b#x], false
+      :           +- CTERelationRef xxxx, true, [b#x], false, false
       +- SubqueryAlias z
          +- SubqueryAlias T1
-            +- CTERelationRef xxxx, true, [a#x], false
+            +- CTERelationRef xxxx, true, [a#x], false, false
 
 
 -- !query
@@ -597,9 +631,9 @@ WithCTE
       +- Project [c#x, a#x]
          +- Join Inner
             :- SubqueryAlias ttTT
-            :  +- CTERelationRef xxxx, true, [c#x], false
+            :  +- CTERelationRef xxxx, true, [c#x], false, false
             +- SubqueryAlias tttT_2
-               +- CTERelationRef xxxx, true, [a#x], false
+               +- CTERelationRef xxxx, true, [a#x], false, false
 
 
 -- !query
@@ -615,7 +649,7 @@ Project [scalar-subquery#x [x#x] AS scalarsubquery(x)#x]
 :     :        +- OneRowRelation
 :     +- Project [x#x]
 :        +- SubqueryAlias q
-:           +- CTERelationRef xxxx, true, [x#x], false
+:           +- CTERelationRef xxxx, true, [x#x], false, false
 +- SubqueryAlias T
    +- Project [1 AS x#x, 2 AS y#x]
       +- OneRowRelation
@@ -634,7 +668,7 @@ Project [scalar-subquery#x [x#x && y#x] AS scalarsubquery(x, y)#x]
 :     :        +- OneRowRelation
 :     +- Project [((outer(x#x) + outer(y#x)) + z#x) AS ((outer(T.x) + outer(T.y)) + z)#x]
 :        +- SubqueryAlias q
-:           +- CTERelationRef xxxx, true, [z#x], false
+:           +- CTERelationRef xxxx, true, [z#x], false, false
 +- SubqueryAlias T
    +- Project [1 AS x#x, 2 AS y#x]
       +- OneRowRelation
@@ -654,12 +688,12 @@ WithCTE
 :  +- SubqueryAlias q2
 :     +- Project [x#x]
 :        +- SubqueryAlias q1
-:           +- CTERelationRef xxxx, true, [x#x], false
+:           +- CTERelationRef xxxx, true, [x#x], false, false
 +- Project [x#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [x#x]
          +- SubqueryAlias q2
-            +- CTERelationRef xxxx, true, [x#x], false
+            +- CTERelationRef xxxx, true, [x#x], false, false
 
 
 -- !query
@@ -676,12 +710,12 @@ WithCTE
 :  +- SubqueryAlias q1
 :     +- Project [(x#x + 1) AS (x + 1)#x]
 :        +- SubqueryAlias q1
-:           +- CTERelationRef xxxx, true, [x#x], false
+:           +- CTERelationRef xxxx, true, [x#x], false, false
 +- Project [(x + 1)#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [(x + 1)#x]
          +- SubqueryAlias q1
-            +- CTERelationRef xxxx, true, [(x + 1)#x], false
+            +- CTERelationRef xxxx, true, [(x + 1)#x], false, false
 
 
 -- !query
@@ -723,9 +757,9 @@ WithCTE
       :  +- Aggregate [max(j#x) AS max(j)#x]
       :     +- SubqueryAlias cte2
       :        +- SubqueryAlias cte1
-      :           +- CTERelationRef xxxx, true, [j#x], false
+      :           +- CTERelationRef xxxx, true, [j#x], false, false
       +- SubqueryAlias cte1
-         +- CTERelationRef xxxx, true, [j#x], false
+         +- CTERelationRef xxxx, true, [j#x], false, false
 
 
 -- !query
@@ -778,3 +812,9 @@ DropTempViewCommand t
 DROP VIEW IF EXISTS t2
 -- !query analysis
 DropTempViewCommand t2
+
+
+-- !query
+DROP VIEW IF EXISTS t3
+-- !query analysis
+DropTempViewCommand t3
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
index d6f0953dcf90a..99f7326e5ef8e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
@@ -56,6 +56,36 @@ DESCRIBE t
 DescribeTableCommand `spark_catalog`.`default`.`t`, false, [col_name#x, data_type#x, comment#x]
 
 
+-- !query
+DESCRIBE EXTENDED t AS JSON
+-- !query analysis
+DescribeRelationJsonCommand true, [json_metadata#x]
++- ResolvedTable V2SessionCatalog(spark_catalog), default.t, V1Table(default.t), [a#x, b#x, c#x, d#x]
+
+
+-- !query
+DESCRIBE t AS JSON
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "DESCRIBE_JSON_NOT_EXTENDED",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "tableName" : "t"
+  }
+}
+
+
+-- !query
+DESC FORMATTED t a AS JSON
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON",
+  "sqlState" : "0A000"
+}
+
+
 -- !query
 DESC default.t
 -- !query analysis
@@ -110,6 +140,13 @@ DESC t PARTITION (c='Us', d=1)
 DescribeTableCommand `spark_catalog`.`default`.`t`, [c=Us, d=1], false, [col_name#x, data_type#x, comment#x]
 
 
+-- !query
+DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON
+-- !query analysis
+DescribeRelationJsonCommand [c=Us, d=1], true, [json_metadata#x]
++- ResolvedTable V2SessionCatalog(spark_catalog), default.t, V1Table(default.t), [a#x, b#x, c#x, d#x]
+
+
 -- !query
 DESC EXTENDED t PARTITION (c='Us', d=1)
 -- !query analysis
@@ -290,6 +327,12 @@ EXPLAIN DESCRIBE t PARTITION (c='Us', d=2)
 ExplainCommand 'DescribeRelation [c=Us, d=2], false, [col_name#x, data_type#x, comment#x], SimpleMode
 
 
+-- !query
+EXPLAIN DESCRIBE EXTENDED t PARTITION (c='Us', d=2) AS JSON
+-- !query analysis
+ExplainCommand 'DescribeRelationJsonCommand [c=Us, d=2], true, [json_metadata#x], SimpleMode
+
+
 -- !query
 DROP TABLE t
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/double-quoted-identifiers-enabled.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/double-quoted-identifiers-enabled.sql.out
index 2edcd638120c5..f9f0067648fcf 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/double-quoted-identifiers-enabled.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/double-quoted-identifiers-enabled.sql.out
@@ -418,7 +418,7 @@ CreateViewCommand `myview`, [(c1,None)], WITH "v"("a") AS (SELECT 1) SELECT "a"
       :           +- OneRowRelation
       +- Project [a#x]
          +- SubqueryAlias v
-            +- CTERelationRef xxxx, true, [a#x], false
+            +- CTERelationRef xxxx, true, [a#x], false, false
 
 
 -- !query
@@ -438,7 +438,7 @@ Project [a1#x AS a2#x]
                   :           +- OneRowRelation
                   +- Project [a#x]
                      +- SubqueryAlias v
-                        +- CTERelationRef xxxx, true, [a#x], false
+                        +- CTERelationRef xxxx, true, [a#x], false, false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-analytics.sql.out
index 0895fe788f84a..f085e47c08ecf 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-analytics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-analytics.sql.out
@@ -630,7 +630,7 @@ Aggregate [a#x, b#x, spark_grouping_id#xL, _gen_grouping_pos#x], [a#x, b#x, coun
 SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ())
 -- !query analysis
 Aggregate [a#x, b#x, spark_grouping_id#xL, _gen_grouping_pos#x], [a#x, b#x, count(1) AS count(1)#xL]
-+- Expand [[a#x, b#x, a#x, b#x, 0, 0], [a#x, b#x, a#x, b#x, 0, 1], [a#x, b#x, a#x, b#x, 0, 2], [a#x, b#x, a#x, b#x, 0, 3], [a#x, b#x, a#x, b#x, 0, 4], [a#x, b#x, a#x, b#x, 0, 5], [a#x, b#x, a#x, b#x, 0, 6], [a#x, b#x, a#x, b#x, 0, 7], [a#x, b#x, a#x, b#x, 0, 8], [a#x, b#x, a#x, b#x, 0, 9], [a#x, b#x, a#x, b#x, 0, 10], [a#x, b#x, a#x, b#x, 0, 11], [a#x, b#x, a#x, b#x, 0, 12], [a#x, b#x, a#x, null, 1, 13], [a#x, b#x, a#x, null, 1, 14], [a#x, b#x, a#x, b#x, 0, 15], [a#x, b#x, a#x, null, 1, 16], [a#x, b#x, a#x, null, 1, 17], [a#x, b#x, a#x, b#x, 0, 18], [a#x, b#x, a#x, b#x, 0, 19], [a#x, b#x, a#x, b#x, 0, 20], [a#x, b#x, a#x, b#x, 0, 21], [a#x, b#x, a#x, b#x, 0, 22], [a#x, b#x, a#x, b#x, 0, 23], ... 12 more fields], [a#x, b#x, a#x, b#x, spark_grouping_id#xL, _gen_grouping_pos#x]
++- Expand [[a#x, b#x, a#x, b#x, 0, 0], [a#x, b#x, a#x, b#x, 0, 1], [a#x, b#x, a#x, b#x, 0, 2], [a#x, b#x, a#x, b#x, 0, 3], [a#x, b#x, a#x, b#x, 0, 4], [a#x, b#x, a#x, b#x, 0, 5], [a#x, b#x, a#x, b#x, 0, 6], [a#x, b#x, a#x, b#x, 0, 7], [a#x, b#x, a#x, b#x, 0, 8], [a#x, b#x, a#x, b#x, 0, 9], [a#x, b#x, a#x, b#x, 0, 10], [a#x, b#x, a#x, b#x, 0, 11], [a#x, b#x, a#x, b#x, 0, 12], [a#x, b#x, a#x, null, 1, 13], [a#x, b#x, a#x, null, 1, 14], [a#x, b#x, a#x, b#x, 0, 15], [a#x, b#x, a#x, null, 1, 16], [a#x, b#x, a#x, null, 1, 17], [a#x, b#x, a#x, b#x, 0, 18], [a#x, b#x, a#x, b#x, 0, 19], [a#x, b#x, a#x, b#x, 0, 20], [a#x, b#x, a#x, b#x, 0, 21], [a#x, b#x, a#x, b#x, 0, 22], [a#x, b#x, a#x, b#x, 0, 23], [a#x, b#x, a#x, b#x, 0, 24], ... 11 more fields], [a#x, b#x, a#x, b#x, spark_grouping_id#xL, _gen_grouping_pos#x]
    +- Project [a#x, b#x, a#x AS a#x, b#x AS b#x]
       +- SubqueryAlias testdata
          +- View (`testData`, [a#x, b#x])
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
index c4839fd359d14..607b2401e853b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
@@ -1055,6 +1055,15 @@ Aggregate [histogram_numeric(col#xL, 3, 0, 0) AS histogram_numeric(col, 3)#x]
    +- LocalRelation [col#xL]
 
 
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS DECIMAL(4, 2))), (CAST(2 AS DECIMAL(4, 2))), (CAST(3 AS DECIMAL(4, 2))) AS tab(col)
+-- !query analysis
+Aggregate [histogram_numeric(col#x, 3, 0, 0) AS histogram_numeric(col, 3)#x]
++- SubqueryAlias tab
+   +- LocalRelation [col#x]
+
+
 -- !query
 SELECT histogram_numeric(col, 3) FROM VALUES (TIMESTAMP '2017-03-01 00:00:00'),
   (TIMESTAMP '2017-04-01 00:00:00'), (TIMESTAMP '2017-05-01 00:00:00') AS tab(col)
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
index 7bbad7f49fb25..e79a549f84062 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
@@ -1022,7 +1022,7 @@ WithCTE
 :        +- LocalRelation [col1#x, col2#x]
 +- Aggregate [max(c1#x) AS max(c1)#x]
    +- SubqueryAlias T
-      +- CTERelationRef xxxx, true, [c1#x, c2#x], false
+      +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false
 
 
 -- !query
@@ -1041,7 +1041,7 @@ WithCTE
 :        +- LocalRelation [col1#x, col2#x]
 +- Aggregate [max(c1#x) AS max(c1)#x]
    +- SubqueryAlias T
-      +- CTERelationRef xxxx, true, [c1#x, c2#x], false
+      +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false
 
 
 -- !query
@@ -1055,7 +1055,7 @@ WithCTE
 :        +- LocalRelation [col1#x, col2#x]
 +- Aggregate [max(c1#x) AS max(c1)#x]
    +- SubqueryAlias ABC
-      +- CTERelationRef xxxx, true, [c1#x, c2#x], false
+      +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
index e4e23339134c4..6dfbf13ce3595 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
@@ -1377,10 +1377,10 @@ WithCTE
 :           :                 +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
 :           :                    +- LocalRelation [col1#x, col2#x]
 :           +- SubqueryAlias cte1
-:              +- CTERelationRef xxxx, true, [c1#x], false
+:              +- CTERelationRef xxxx, true, [c1#x], false, false
 +- Project [c1#x, c2#x]
    +- SubqueryAlias cte2
-      +- CTERelationRef xxxx, true, [c1#x, c2#x], false
+      +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/listagg-collations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/listagg-collations.sql.out
new file mode 100644
index 0000000000000..5bced5e897e22
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/listagg-collations.sql.out
@@ -0,0 +1,86 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT listagg(c1) WITHIN GROUP (ORDER BY c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1)
+-- !query analysis
+Aggregate [listagg(c1#x, null, collate(c1#x, utf8_binary) ASC NULLS FIRST, 0, 0) AS listagg(c1, NULL) WITHIN GROUP (ORDER BY collate(c1, utf8_binary) ASC NULLS FIRST)#x]
++- SubqueryAlias t
+   +- Project [col1#x AS c1#x]
+      +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT listagg(c1) WITHIN GROUP (ORDER BY c1 COLLATE utf8_lcase) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1)
+-- !query analysis
+Aggregate [listagg(c1#x, null, collate(c1#x, utf8_lcase) ASC NULLS FIRST, 0, 0) AS listagg(c1, NULL) WITHIN GROUP (ORDER BY collate(c1, utf8_lcase) ASC NULLS FIRST)#x]
++- SubqueryAlias t
+   +- Project [col1#x AS c1#x]
+      +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT listagg(DISTINCT c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1)
+-- !query analysis
+Aggregate [listagg(distinct collate(c1#x, utf8_binary), null, 0, 0) AS listagg(DISTINCT collate(c1, utf8_binary), NULL)#x]
++- SubqueryAlias t
+   +- Project [col1#x AS c1#x]
+      +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1)
+-- !query analysis
+Aggregate [listagg(distinct collate(c1#x, utf8_lcase), null, 0, 0) AS listagg(DISTINCT collate(c1, utf8_lcase), NULL)#x]
++- SubqueryAlias t
+   +- Project [col1#x AS c1#x]
+      +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) WITHIN GROUP (ORDER BY c1 COLLATE utf8_lcase) FROM (VALUES ('a'), ('B'), ('b'), ('A')) AS t(c1)
+-- !query analysis
+Aggregate [listagg(distinct collate(c1#x, utf8_lcase), null, collate(c1#x, utf8_lcase) ASC NULLS FIRST, 0, 0) AS listagg(DISTINCT collate(c1, utf8_lcase), NULL) WITHIN GROUP (ORDER BY collate(c1, utf8_lcase) ASC NULLS FIRST)#x]
++- SubqueryAlias t
+   +- Project [col1#x AS c1#x]
+      +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT listagg(DISTINCT c1 COLLATE unicode_rtrim) FROM (VALUES ('abc  '), ('abc '), ('x'), ('abc')) AS t(c1)
+-- !query analysis
+Aggregate [listagg(distinct collate(c1#x, unicode_rtrim), null, 0, 0) AS listagg(DISTINCT collate(c1, unicode_rtrim), NULL)#x]
++- SubqueryAlias t
+   +- Project [col1#x AS c1#x]
+      +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT listagg(c1) WITHIN GROUP (ORDER BY c1) FROM (VALUES ('abc  '), ('abc '), ('abc\n'), ('abc'), ('x')) AS t(c1)
+-- !query analysis
+Aggregate [listagg(c1#x, null, c1#x ASC NULLS FIRST, 0, 0) AS listagg(c1, NULL) WITHIN GROUP (ORDER BY c1 ASC NULLS FIRST)#x]
++- SubqueryAlias t
+   +- Project [col1#x AS c1#x]
+      +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT listagg(c1) WITHIN GROUP (ORDER BY c1 COLLATE unicode_rtrim) FROM (VALUES ('abc  '), ('abc '), ('abc\n'), ('abc'), ('x')) AS t(c1)
+-- !query analysis
+Aggregate [listagg(c1#x, null, collate(c1#x, unicode_rtrim) ASC NULLS FIRST, 0, 0) AS listagg(c1, NULL) WITHIN GROUP (ORDER BY collate(c1, unicode_rtrim) ASC NULLS FIRST)#x]
++- SubqueryAlias t
+   +- Project [col1#x AS c1#x]
+      +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) WITHIN GROUP (ORDER BY c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('b'), ('A'), ('B')) AS t(c1)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.MISMATCH_WITH_DISTINCT_INPUT",
+  "sqlState" : "42K0K",
+  "messageParameters" : {
+    "funcArg" : "\"collate(c1, utf8_lcase)\"",
+    "funcName" : "`listagg`",
+    "orderingExpr" : "\"collate(c1, utf8_binary)\""
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/listagg.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/listagg.sql.out
new file mode 100644
index 0000000000000..9ad94bce3a2be
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/listagg.sql.out
@@ -0,0 +1,435 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMP VIEW df AS
+SELECT * FROM (VALUES ('a', 'b'), ('a', 'c'), ('b', 'c'), ('b', 'd'), (NULL, NULL)) AS t(a, b)
+-- !query analysis
+CreateViewCommand `df`, SELECT * FROM (VALUES ('a', 'b'), ('a', 'c'), ('b', 'c'), ('b', 'd'), (NULL, NULL)) AS t(a, b), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [a#x, b#x]
+      +- SubqueryAlias t
+         +- Project [col1#x AS a#x, col2#x AS b#x]
+            +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+CREATE TEMP VIEW df2 AS
+SELECT * FROM (VALUES (1, true), (2, false), (3, false)) AS t(a, b)
+-- !query analysis
+CreateViewCommand `df2`, SELECT * FROM (VALUES (1, true), (2, false), (3, false)) AS t(a, b), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [a#x, b#x]
+      +- SubqueryAlias t
+         +- Project [col1#x AS a#x, col2#x AS b#x]
+            +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(b) FROM df GROUP BY a
+-- !query analysis
+Aggregate [a#x], [listagg(b#x, null, 0, 0) AS listagg(b, NULL)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT string_agg(b) FROM df GROUP BY a
+-- !query analysis
+Aggregate [a#x], [string_agg(b#x, null, 0, 0) AS string_agg(b, NULL)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(b, NULL) FROM df GROUP BY a
+-- !query analysis
+Aggregate [a#x], [listagg(b#x, null, 0, 0) AS listagg(b, NULL)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(b) FROM df WHERE 1 != 1
+-- !query analysis
+Aggregate [listagg(b#x, null, 0, 0) AS listagg(b, NULL)#x]
++- Filter NOT (1 = 1)
+   +- SubqueryAlias df
+      +- View (`df`, [a#x, b#x])
+         +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+            +- Project [a#x, b#x]
+               +- SubqueryAlias t
+                  +- Project [col1#x AS a#x, col2#x AS b#x]
+                     +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(b, '|') FROM df GROUP BY a
+-- !query analysis
+Aggregate [a#x], [listagg(b#x, |, 0, 0) AS listagg(b, |)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(a) FROM df
+-- !query analysis
+Aggregate [listagg(a#x, null, 0, 0) AS listagg(a, NULL)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(DISTINCT a) FROM df
+-- !query analysis
+Aggregate [listagg(distinct a#x, null, 0, 0) AS listagg(DISTINCT a, NULL)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY a) FROM df
+-- !query analysis
+Aggregate [listagg(a#x, null, a#x ASC NULLS FIRST, 0, 0) AS listagg(a, NULL) WITHIN GROUP (ORDER BY a ASC NULLS FIRST)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY a DESC) FROM df
+-- !query analysis
+Aggregate [listagg(a#x, null, a#x DESC NULLS LAST, 0, 0) AS listagg(a, NULL) WITHIN GROUP (ORDER BY a DESC NULLS LAST)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY a DESC) OVER (PARTITION BY b) FROM df
+-- !query analysis
+Project [listagg(a, NULL) WITHIN GROUP (ORDER BY a DESC NULLS LAST) OVER (PARTITION BY b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x]
++- Project [a#x, b#x, listagg(a, NULL) WITHIN GROUP (ORDER BY a DESC NULLS LAST) OVER (PARTITION BY b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x, listagg(a, NULL) WITHIN GROUP (ORDER BY a DESC NULLS LAST) OVER (PARTITION BY b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x]
+   +- Window [listagg(a#x, null, a#x DESC NULLS LAST, 0, 0) windowspecdefinition(b#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS listagg(a, NULL) WITHIN GROUP (ORDER BY a DESC NULLS LAST) OVER (PARTITION BY b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x], [b#x]
+      +- Project [a#x, b#x]
+         +- SubqueryAlias df
+            +- View (`df`, [a#x, b#x])
+               +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+                  +- Project [a#x, b#x]
+                     +- SubqueryAlias t
+                        +- Project [col1#x AS a#x, col2#x AS b#x]
+                           +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY b) FROM df
+-- !query analysis
+Aggregate [listagg(a#x, null, b#x ASC NULLS FIRST, 0, 0) AS listagg(a, NULL) WITHIN GROUP (ORDER BY b ASC NULLS FIRST)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY b DESC) FROM df
+-- !query analysis
+Aggregate [listagg(a#x, null, b#x DESC NULLS LAST, 0, 0) AS listagg(a, NULL) WITHIN GROUP (ORDER BY b DESC NULLS LAST)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(a, '|') WITHIN GROUP (ORDER BY b DESC) FROM df
+-- !query analysis
+Aggregate [listagg(a#x, |, b#x DESC NULLS LAST, 0, 0) AS listagg(a, |) WITHIN GROUP (ORDER BY b DESC NULLS LAST)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY b DESC, a ASC) FROM df
+-- !query analysis
+Aggregate [listagg(a#x, null, b#x DESC NULLS LAST, a#x ASC NULLS FIRST, 0, 0) AS listagg(a, NULL) WITHIN GROUP (ORDER BY b DESC NULLS LAST, a ASC NULLS FIRST)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY b DESC, a DESC) FROM df
+-- !query analysis
+Aggregate [listagg(a#x, null, b#x DESC NULLS LAST, a#x DESC NULLS LAST, 0, 0) AS listagg(a, NULL) WITHIN GROUP (ORDER BY b DESC NULLS LAST, a DESC NULLS LAST)#x]
++- SubqueryAlias df
+   +- View (`df`, [a#x, b#x])
+      +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(c1) FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1)
+-- !query analysis
+Aggregate [listagg(c1#x, null, 0, 0) AS listagg(c1, NULL)#x]
++- SubqueryAlias t
+   +- Project [col1#x AS c1#x]
+      +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT listagg(c1, NULL) FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1)
+-- !query analysis
+Aggregate [listagg(c1#x, null, 0, 0) AS listagg(c1, NULL)#x]
++- SubqueryAlias t
+   +- Project [col1#x AS c1#x]
+      +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT listagg(c1, X'42') FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1)
+-- !query analysis
+Aggregate [listagg(c1#x, 0x42, 0, 0) AS listagg(c1, X'42')#x]
++- SubqueryAlias t
+   +- Project [col1#x AS c1#x]
+      +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT listagg(a), listagg(b, ',') FROM df2
+-- !query analysis
+Aggregate [listagg(cast(a#x as string), null, 0, 0) AS listagg(a, NULL)#x, listagg(cast(b#x as string), ,, 0, 0) AS listagg(b, ,)#x]
++- SubqueryAlias df2
+   +- View (`df2`, [a#x, b#x])
+      +- Project [cast(a#x as int) AS a#x, cast(b#x as boolean) AS b#x]
+         +- Project [a#x, b#x]
+            +- SubqueryAlias t
+               +- Project [col1#x AS a#x, col2#x AS b#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(c1) FROM (VALUES (ARRAY('a', 'b'))) AS t(c1)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"c1\"",
+    "inputType" : "\"ARRAY<STRING>\"",
+    "paramIndex" : "first",
+    "requiredType" : "(\"STRING\" or \"BINARY\")",
+    "sqlExpr" : "\"listagg(c1, NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "listagg(c1)"
+  } ]
+}
+
+
+-- !query
+SELECT listagg(c1, ', ') FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"BINARY\" or \"STRING\")",
+    "functionName" : "`listagg`",
+    "sqlExpr" : "\"listagg(c1, , )\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "listagg(c1, ', ')"
+  } ]
+}
+
+
+-- !query
+SELECT listagg(b, a) FROM df GROUP BY a
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"a\"",
+    "inputName" : "`delimiter`",
+    "inputType" : "\"STRING\"",
+    "sqlExpr" : "\"listagg(b, a)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "listagg(b, a)"
+  } ]
+}
+
+
+-- !query
+SELECT listagg(a) OVER (ORDER BY a) FROM df
+-- !query analysis
+Project [listagg(a, NULL) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x]
++- Project [a#x, listagg(a, NULL) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x, listagg(a, NULL) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x]
+   +- Window [listagg(a#x, null, 0, 0) windowspecdefinition(a#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS listagg(a, NULL) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x], [a#x ASC NULLS FIRST]
+      +- Project [a#x]
+         +- SubqueryAlias df
+            +- View (`df`, [a#x, b#x])
+               +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
+                  +- Project [a#x, b#x]
+                     +- SubqueryAlias t
+                        +- Project [col1#x AS a#x, col2#x AS b#x]
+                           +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a) FROM df
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "INVALID_WINDOW_SPEC_FOR_AGGREGATION_FUNC",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "aggFunc" : "\"listagg(a, NULL, a ASC NULLS FIRST)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "listagg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a)"
+  } ]
+}
+
+
+-- !query
+SELECT string_agg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a) FROM df
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "INVALID_WINDOW_SPEC_FOR_AGGREGATION_FUNC",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "aggFunc" : "\"listagg(a, NULL, a ASC NULLS FIRST)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 64,
+    "fragment" : "string_agg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a)"
+  } ]
+}
+
+
+-- !query
+SELECT listagg(DISTINCT a) OVER (ORDER BY a) FROM df
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DISTINCT_WINDOW_FUNCTION_UNSUPPORTED",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "windowExpr" : "\"listagg(DISTINCT a, NULL) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "listagg(DISTINCT a) OVER (ORDER BY a)"
+  } ]
+}
+
+
+-- !query
+SELECT listagg(DISTINCT a) WITHIN GROUP (ORDER BY b) FROM df
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.MISMATCH_WITH_DISTINCT_INPUT",
+  "sqlState" : "42K0K",
+  "messageParameters" : {
+    "funcArg" : "\"a\"",
+    "funcName" : "`listagg`",
+    "orderingExpr" : "\"b\""
+  }
+}
+
+
+-- !query
+SELECT listagg(DISTINCT a) WITHIN GROUP (ORDER BY a, b) FROM df
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.MISMATCH_WITH_DISTINCT_INPUT",
+  "sqlState" : "42K0K",
+  "messageParameters" : {
+    "funcArg" : "\"a\"",
+    "funcName" : "`listagg`",
+    "orderingExpr" : "\"a\", \"b\""
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out
index d103da1f6939f..95c2db670a87d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out
@@ -74,7 +74,7 @@ SELECT department, mode(DISTINCT salary) FROM basic_pays GROUP BY department ORD
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.DISTINCT_UNSUPPORTED",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`mode`"
@@ -379,7 +379,7 @@ FROM basic_pays
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.DISTINCT_UNSUPPORTED",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`mode`"
@@ -401,7 +401,7 @@ FROM basic_pays
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WITHIN_GROUP_MISSING",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.WITHIN_GROUP_MISSING",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`mode`"
@@ -423,7 +423,7 @@ FROM basic_pays
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WRONG_NUM_ORDERINGS",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.WRONG_NUM_ORDERINGS",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "actualNum" : "1",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/non-excludable-rule.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/non-excludable-rule.sql.out
index 6b2c60f25bae3..4a717488e0172 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/non-excludable-rule.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/non-excludable-rule.sql.out
@@ -47,7 +47,7 @@ WithCTE
    +- Filter (id#xL > scalar-subquery#x [])
       :  +- Aggregate [max(id#xL) AS max(id)#xL]
       :     +- SubqueryAlias tmp
-      :        +- CTERelationRef xxxx, true, [id#xL], false
+      :        +- CTERelationRef xxxx, true, [id#xL], false, false
       +- Range (0, 3, step=1)
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out
index 31e5f7b63c604..3088e93ead216 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out
@@ -248,7 +248,7 @@ FROM aggr
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.DISTINCT_UNSUPPORTED",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`percentile_cont`"
@@ -270,7 +270,7 @@ FROM aggr
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.DISTINCT_UNSUPPORTED",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`percentile_cont`"
@@ -342,7 +342,7 @@ FROM aggr
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WITHIN_GROUP_MISSING",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.WITHIN_GROUP_MISSING",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`percentile_cont`"
@@ -364,7 +364,7 @@ FROM aggr
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WITHIN_GROUP_MISSING",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.WITHIN_GROUP_MISSING",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`percentile_cont`"
@@ -386,7 +386,7 @@ FROM aggr
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WRONG_NUM_ORDERINGS",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.WRONG_NUM_ORDERINGS",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "actualNum" : "2",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out
index b296396c886be..8089d7c4e962a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out
@@ -265,11 +265,136 @@ CreateViewCommand `windowTestData`, select * from values
          +- LocalRelation [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
 
 
+-- !query
+from t
+-- !query analysis
+SubqueryAlias spark_catalog.default.t
++- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
 -- !query
 table t
+-- !query analysis
+SubqueryAlias spark_catalog.default.t
++- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+from t
 |> select 1 as x
 -- !query analysis
-Project [pipeexpression(1, false, SELECT) AS x#x]
+Project [1 AS x#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+from t as t_alias
+|> select t_alias.x
+-- !query analysis
+Project [x#x]
++- SubqueryAlias t_alias
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+from t as t_alias
+|> select t_alias.x as tx, t_alias.y as ty
+|> where ty = 'def'
+|> select tx
+-- !query analysis
+Project [tx#x]
++- Filter (ty#x = def)
+   +- PipeOperator
+      +- Project [x#x AS tx#x, y#x AS ty#x]
+         +- SubqueryAlias t_alias
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+from t, other
+|> select t.x + other.a as z
+-- !query analysis
+Project [(x#x + a#x) AS z#x]
++- Join Inner
+   :- SubqueryAlias spark_catalog.default.t
+   :  +- Relation spark_catalog.default.t[x#x,y#x] csv
+   +- SubqueryAlias spark_catalog.default.other
+      +- Relation spark_catalog.default.other[a#x,b#x] json
+
+
+-- !query
+from t join other on (t.x = other.a)
+|> select t.x + other.a as z
+-- !query analysis
+Project [(x#x + a#x) AS z#x]
++- Join Inner, (x#x = a#x)
+   :- SubqueryAlias spark_catalog.default.t
+   :  +- Relation spark_catalog.default.t[x#x,y#x] csv
+   +- SubqueryAlias spark_catalog.default.other
+      +- Relation spark_catalog.default.other[a#x,b#x] json
+
+
+-- !query
+from t lateral view explode(array(100, 101)) as ly
+|> select t.x + ly as z
+-- !query analysis
+Project [(x#x + ly#x) AS z#x]
++- Generate explode(array(100, 101)), false, as, [ly#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+from st
+|> select col.i1
+-- !query analysis
+Project [col#x.i1 AS i1#x]
++- SubqueryAlias spark_catalog.default.st
+   +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+from st as st_alias
+|> select st_alias.col.i1
+-- !query analysis
+Project [col#x.i1 AS i1#x]
++- SubqueryAlias st_alias
+   +- SubqueryAlias spark_catalog.default.st
+      +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+from values (0), (1) tab(col)
+|> select col as x
+-- !query analysis
+Project [col#x AS x#x]
++- SubqueryAlias tab
+   +- LocalRelation [col#x]
+
+
+-- !query
+from t
+|> from t
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'from'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table t
+|> select 1 as x
+-- !query analysis
+Project [1 AS x#x]
 +- SubqueryAlias spark_catalog.default.t
    +- Relation spark_catalog.default.t[x#x,y#x] csv
 
@@ -288,7 +413,7 @@ table t
 |> select x, y
 |> select x + length(y) as z
 -- !query analysis
-Project [pipeexpression((x#x + length(y#x)), false, SELECT) AS z#x]
+Project [(x#x + length(y#x)) AS z#x]
 +- Project [x#x, y#x]
    +- SubqueryAlias spark_catalog.default.t
       +- Relation spark_catalog.default.t[x#x,y#x] csv
@@ -298,7 +423,7 @@ Project [pipeexpression((x#x + length(y#x)), false, SELECT) AS z#x]
 values (0), (1) tab(col)
 |> select col * 2 as result
 -- !query analysis
-Project [pipeexpression((col#x * 2), false, SELECT) AS result#x]
+Project [(col#x * 2) AS result#x]
 +- SubqueryAlias tab
    +- LocalRelation [col#x]
 
@@ -307,7 +432,7 @@ Project [pipeexpression((col#x * 2), false, SELECT) AS result#x]
 (select * from t union all select * from t)
 |> select x + length(y) as result
 -- !query analysis
-Project [pipeexpression((x#x + length(y#x)), false, SELECT) AS result#x]
+Project [(x#x + length(y#x)) AS result#x]
 +- Union false, false
    :- Project [x#x, y#x]
    :  +- SubqueryAlias spark_catalog.default.t
@@ -358,7 +483,7 @@ Project [col#x.i1 AS i1#x]
 table t
 |> select (select a from other where x = a limit 1) as result
 -- !query analysis
-Project [pipeexpression(scalar-subquery#x [x#x], false, SELECT) AS result#x]
+Project [scalar-subquery#x [x#x] AS result#x]
 :  +- GlobalLimit 1
 :     +- LocalLimit 1
 :        +- Project [a#x]
@@ -383,7 +508,7 @@ Project [scalar-subquery#x [] AS result#x]
 table t
 |> select (select any_value(a) from other where x = a limit 1) as result
 -- !query analysis
-Project [pipeexpression(scalar-subquery#x [x#x], false, SELECT) AS result#x]
+Project [scalar-subquery#x [x#x] AS result#x]
 :  +- GlobalLimit 1
 :     +- LocalLimit 1
 :        +- Aggregate [any_value(a#x, false) AS any_value(a)#x]
@@ -398,8 +523,8 @@ Project [pipeexpression(scalar-subquery#x [x#x], false, SELECT) AS result#x]
 table t
 |> select x + length(x) as z, z + 1 as plus_one
 -- !query analysis
-Project [z#x, pipeexpression((z#x + 1), false, SELECT) AS plus_one#x]
-+- Project [x#x, y#x, pipeexpression((x#x + length(cast(x#x as string))), false, SELECT) AS z#x]
+Project [z#x, (z#x + 1) AS plus_one#x]
++- Project [x#x, y#x, (x#x + length(cast(x#x as string))) AS z#x]
    +- SubqueryAlias spark_catalog.default.t
       +- Relation spark_catalog.default.t[x#x,y#x] csv
 
@@ -409,8 +534,8 @@ table t
 |> select first_value(x) over (partition by y) as result
 -- !query analysis
 Project [result#x]
-+- Project [x#x, y#x, _we0#x, pipeexpression(_we0#x, false, SELECT) AS result#x]
-   +- Window [first_value(x#x, false) windowspecdefinition(y#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#x], [y#x]
++- Project [x#x, y#x, result#x, result#x]
+   +- Window [first_value(x#x, false) windowspecdefinition(y#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS result#x], [y#x]
       +- Project [x#x, y#x]
          +- SubqueryAlias spark_catalog.default.t
             +- Relation spark_catalog.default.t[x#x,y#x] csv
@@ -426,8 +551,8 @@ select 1 x, 2 y, 3 z
 -- !query analysis
 Project [a2#x]
 +- Project [(1 + sum(x) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))#xL, avg(y) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x, x#x, a2#x]
-   +- Project [x#x, y#x, _w1#x, z#x, _we0#xL, avg(y) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x, _we2#x, (cast(1 as bigint) + _we0#xL) AS (1 + sum(x) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))#xL, avg(y) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x, pipeexpression(_we2#x, false, SELECT) AS a2#x]
-      +- Window [avg(_w1#x) windowspecdefinition(y#x, z#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS _we2#x], [y#x], [z#x ASC NULLS FIRST]
+   +- Project [x#x, y#x, _w1#x, z#x, _we0#xL, avg(y) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x, a2#x, (cast(1 as bigint) + _we0#xL) AS (1 + sum(x) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))#xL, avg(y) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x, a2#x]
+      +- Window [avg(_w1#x) windowspecdefinition(y#x, z#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS a2#x], [y#x], [z#x ASC NULLS FIRST]
          +- Window [sum(x#x) windowspecdefinition(specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#xL, avg(y#x) windowspecdefinition(specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg(y) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x]
             +- Project [x#x, y#x, (x#x + 1) AS _w1#x, z#x]
                +- Project [1 AS x#x, 2 AS y#x, 3 AS z#x]
@@ -513,204 +638,703 @@ table t
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
-  "sqlState" : "0A000",
+  "condition" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "SELECT",
+    "expr" : "sum(x#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 19,
+    "stopIndex" : 24,
+    "fragment" : "sum(x)"
+  } ]
+}
+
+
+-- !query
+table t
+|> select y, length(y) + sum(x) as result
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "SELECT",
+    "expr" : "sum(x#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 39,
+    "fragment" : "sum(x)"
+  } ]
+}
+
+
+-- !query
+table t
+|> extend 1 as z
+-- !query analysis
+Project [x#x, y#x, 1 AS z#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1
+-- !query analysis
+Project [x#x, y#x, 1 AS 1#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend x as z
+-- !query analysis
+Project [x#x, y#x, x#x AS z#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend x + length(y) as z
+-- !query analysis
+Project [x#x, y#x, (x#x + length(y#x)) AS z#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend x + length(y) as z, x + 1 as zz
+-- !query analysis
+Project [x#x, y#x, (x#x + length(y#x)) AS z#x, (x#x + 1) AS zz#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend x + length(y) as z
+|> extend z + 1 as zz
+-- !query analysis
+Project [x#x, y#x, z#x, (z#x + 1) AS zz#x]
++- Project [x#x, y#x, (x#x + length(y#x)) AS z#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+select col from st
+|> extend col.i1 as z
+-- !query analysis
+Project [col#x, col#x.i1 AS z#x]
++- Project [col#x]
+   +- SubqueryAlias spark_catalog.default.st
+      +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+table t
+|> extend (select a from other where x = a limit 1) as z
+-- !query analysis
+Project [x#x, y#x, scalar-subquery#x [x#x] AS z#x]
+:  +- GlobalLimit 1
+:     +- LocalLimit 1
+:        +- Project [a#x]
+:           +- Filter (outer(x#x) = a#x)
+:              +- SubqueryAlias spark_catalog.default.other
+:                 +- Relation spark_catalog.default.other[a#x,b#x] json
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> where exists (
+    table other
+    |> extend t.x
+    |> select * except (a, b))
+-- !query analysis
+Filter exists#x [x#x]
+:  +- Project [x#x]
+:     +- Project [a#x, b#x, outer(x#x)]
+:        +- SubqueryAlias spark_catalog.default.other
+:           +- Relation spark_catalog.default.other[a#x,b#x] json
++- PipeOperator
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as x
+-- !query analysis
+Project [x#x, y#x, 1 AS x#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend first_value(x) over (partition by y) as result
+-- !query analysis
+Project [x#x, y#x, result#x]
++- Project [x#x, y#x, result#x, result#x]
+   +- Window [first_value(x#x, false) windowspecdefinition(y#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS result#x], [y#x]
+      +- Project [x#x, y#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend x + length(y) as z, z + 1 as plus_one
+-- !query analysis
+Project [x#x, y#x, z#x, (z#x + 1) AS plus_one#x]
++- Project [x#x, y#x, (x#x + length(y#x)) AS z#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend sum(x) as z
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "EXTEND",
+    "expr" : "sum(x#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 19,
+    "stopIndex" : 24,
+    "fragment" : "sum(x)"
+  } ]
+}
+
+
+-- !query
+table t
+|> extend distinct x as z
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'as'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table t
+|> extend *
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "INVALID_USAGE_OF_STAR_OR_REGEX",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "elem" : "'*'",
+    "prettyName" : "expression `pipeexpression`"
+  }
+}
+
+
+-- !query
+table t
+|> set x = 1
+-- !query analysis
+Project [1 AS x#x, y#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> set y = x
+-- !query analysis
+Project [x#x, x#x AS y#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = x + length(y)
+-- !query analysis
+Project [x#x, y#x, (x#x + length(y#x)) AS z#x]
++- Project [x#x, y#x, 1 AS z#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as z
+|> extend 2 as zz
+|> set z = x + length(y), zz = x + 1
+-- !query analysis
+Project [x#x, y#x, z#x, (x#x + 1) AS zz#x]
++- Project [x#x, y#x, (x#x + length(y#x)) AS z#x, zz#x]
+   +- Project [x#x, y#x, z#x, 2 AS zz#x]
+      +- Project [x#x, y#x, 1 AS z#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table other
+|> extend 3 as c
+|> set a = b, b = c
+-- !query analysis
+Project [a#x, c#x AS b#x, c#x]
++- Project [b#x AS a#x, b#x, c#x]
+   +- Project [a#x, b#x, 3 AS c#x]
+      +- SubqueryAlias spark_catalog.default.other
+         +- Relation spark_catalog.default.other[a#x,b#x] json
+
+
+-- !query
+table t
+|> extend 1 as z
+|> extend 2 as zz
+|> set z = x + length(y), zz = z + 1
+-- !query analysis
+Project [x#x, y#x, z#x, (z#x + 1) AS zz#x]
++- Project [x#x, y#x, (x#x + length(y#x)) AS z#x, zz#x]
+   +- Project [x#x, y#x, z#x, 2 AS zz#x]
+      +- Project [x#x, y#x, 1 AS z#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = x + length(y)
+|> set z = z + 1
+-- !query analysis
+Project [x#x, y#x, (z#x + 1) AS z#x]
++- Project [x#x, y#x, (x#x + length(y#x)) AS z#x]
+   +- Project [x#x, y#x, 1 AS z#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = x + length(y), z = z + 1
+-- !query analysis
+Project [x#x, y#x, (z#x + 1) AS z#x]
++- Project [x#x, y#x, (x#x + length(y#x)) AS z#x]
+   +- Project [x#x, y#x, 1 AS z#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+select col from st
+|> extend 1 as z
+|> set z = col.i1
+-- !query analysis
+Project [col#x, col#x.i1 AS z#x]
++- Project [col#x, 1 AS z#x]
+   +- Project [col#x]
+      +- SubqueryAlias spark_catalog.default.st
+         +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+table t
+|> set y = (select a from other where x = a limit 1)
+-- !query analysis
+Project [x#x, scalar-subquery#x [x#x] AS y#x]
+:  +- GlobalLimit 1
+:     +- LocalLimit 1
+:        +- Project [a#x]
+:           +- Filter (outer(x#x) = a#x)
+:              +- SubqueryAlias spark_catalog.default.other
+:                 +- Relation spark_catalog.default.other[a#x,b#x] json
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as `x.y.z`
+|> set `x.y.z` = x + length(y)
+-- !query analysis
+Project [x#x, y#x, (x#x + length(y#x)) AS x.y.z#x]
++- Project [x#x, y#x, 1 AS x.y.z#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = first_value(x) over (partition by y)
+-- !query analysis
+Project [x#x, y#x, z#x]
++- Project [x#x, y#x, z#x, z#x]
+   +- Window [first_value(x#x, false) windowspecdefinition(y#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS z#x], [y#x]
+      +- Project [x#x, y#x]
+         +- Project [x#x, y#x, 1 AS z#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+values (0), (1) lhs(a)
+|> inner join values (1), (2) rhs(a) using (a)
+|> extend lhs.a + rhs.a as z1
+|> extend lhs.a - rhs.a as z2
+|> drop z1
+|> where z2 = 0
+|> order by lhs.a, rhs.a, z2
+|> set z2 = 4
+|> limit 2
+|> select lhs.a, rhs.a, z2
+-- !query analysis
+Project [a#x, a#x, z2#x]
++- GlobalLimit 2
+   +- LocalLimit 2
+      +- PipeOperator
+         +- Project [a#x, 4 AS z2#x, a#x]
+            +- Project [a#x, z2#x, a#x]
+               +- Sort [a#x ASC NULLS FIRST, a#x ASC NULLS FIRST, z2#x ASC NULLS FIRST], true
+                  +- PipeOperator
+                     +- Filter (z2#x = 0)
+                        +- PipeOperator
+                           +- Project [a#x, z2#x, a#x, a#x]
+                              +- Project [a#x, z1#x, (a#x - a#x) AS z2#x, a#x, a#x]
+                                 +- Project [a#x, (a#x + a#x) AS z1#x, a#x, a#x, a#x]
+                                    +- Project [a#x, a#x, a#x, a#x, a#x]
+                                       +- Join Inner, (a#x = a#x)
+                                          :- SubqueryAlias lhs
+                                          :  +- LocalRelation [a#x]
+                                          +- SubqueryAlias rhs
+                                             +- LocalRelation [a#x]
+
+
+-- !query
+table t
+|> set z = 1
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`z`",
+    "proposal" : "`x`, `y`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 20,
+    "fragment" : "table t\n|> set z = 1"
+  } ]
+}
+
+
+-- !query
+table t
+|> set x = 1 as z
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'as'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+select col from st
+|> set col.i1 = 42
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "_LEGACY_ERROR_TEMP_0035",
+  "messageParameters" : {
+    "message" : "SQL pipe syntax |> SET operator with multi-part assignment key (only single-part keys are allowed)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 37,
+    "fragment" : "col.i1 = 42"
+  } ]
+}
+
+
+-- !query
+table t
+|> drop y
+-- !query analysis
+Project [x#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+select 1 as x, 2 as y, 3 as z
+|> drop z, y
+-- !query analysis
+Project [x#x]
++- Project [1 AS x#x, 2 AS y#x, 3 AS z#x]
+   +- OneRowRelation
+
+
+-- !query
+select 1 as x, 2 as y, 3 as z
+|> drop z
+|> drop y
+-- !query analysis
+Project [x#x]
++- Project [x#x, y#x]
+   +- Project [1 AS x#x, 2 AS y#x, 3 AS z#x]
+      +- OneRowRelation
+
+
+-- !query
+select x from t
+|> drop x
+-- !query analysis
+Project
++- Project [x#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as `x.y.z`
+|> drop `x.y.z`
+-- !query analysis
+Project [x#x, y#x]
++- Project [x#x, y#x, 1 AS x.y.z#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> drop z
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`z`",
+    "proposal" : "`x`, `y`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 17,
+    "fragment" : "table t\n|> drop z"
+  } ]
+}
+
+
+-- !query
+table st
+|> drop col.i1
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'.'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table st
+|> drop `col.i1`
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
   "messageParameters" : {
-    "clause" : "SELECT",
-    "expr" : "sum(x#x)"
+    "objectName" : "`col.i1`",
+    "proposal" : "`col`, `x`"
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "startIndex" : 19,
-    "stopIndex" : 24,
-    "fragment" : "sum(x)"
+    "startIndex" : 1,
+    "stopIndex" : 25,
+    "fragment" : "table st\n|> drop `col.i1`"
   } ]
 }
 
 
 -- !query
-table t
-|> select y, length(y) + sum(x) as result
+select 1 as x, 2 as y, 3 as z
+|> drop z, y, z
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
-  "sqlState" : "0A000",
+  "condition" : "EXCEPT_OVERLAPPING_COLUMNS",
+  "sqlState" : "42702",
   "messageParameters" : {
-    "clause" : "SELECT",
-    "expr" : "sum(x#x)"
+    "columns" : "z, y, z"
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "startIndex" : 34,
-    "stopIndex" : 39,
-    "fragment" : "sum(x)"
+    "startIndex" : 1,
+    "stopIndex" : 45,
+    "fragment" : "select 1 as x, 2 as y, 3 as z\n|> drop z, y, z"
   } ]
 }
 
 
 -- !query
 table t
-|> extend 1 as z
--- !query analysis
-Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS z#x]
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
-
-
--- !query
-table t
-|> extend 1
--- !query analysis
-Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS pipeexpression(1)#x]
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
-
-
--- !query
-table t
-|> extend x as z
--- !query analysis
-Project [x#x, y#x, pipeexpression(x#x, false, EXTEND) AS z#x]
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
-
-
--- !query
-table t
-|> extend x + length(y) as z
--- !query analysis
-Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, EXTEND) AS z#x]
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
-
-
--- !query
-table t
-|> extend x + length(y) as z, x + 1 as zz
--- !query analysis
-Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, EXTEND) AS z#x, pipeexpression((x#x + 1), false, EXTEND) AS zz#x]
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
-
-
--- !query
-table t
-|> extend x + length(y) as z
-|> extend z + 1 as zz
+|> as u
+|> select u.x, u.y
 -- !query analysis
-Project [x#x, y#x, z#x, pipeexpression((z#x + 1), false, EXTEND) AS zz#x]
-+- Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, EXTEND) AS z#x]
+Project [x#x, y#x]
++- SubqueryAlias u
    +- SubqueryAlias spark_catalog.default.t
       +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
-select col from st
-|> extend col.i1 as z
+select 1 as x, 2 as y
+|> as u
+|> select u.x, u.y
 -- !query analysis
-Project [col#x, pipeexpression(col#x.i1, false, EXTEND) AS z#x]
-+- Project [col#x]
-   +- SubqueryAlias spark_catalog.default.st
-      +- Relation spark_catalog.default.st[x#x,col#x] parquet
+Project [x#x, y#x]
++- SubqueryAlias u
+   +- Project [1 AS x#x, 2 AS y#x]
+      +- OneRowRelation
 
 
 -- !query
 table t
-|> extend (select a from other where x = a limit 1) as z
+|> as `u.v`
+|> select `u.v`.x, `u.v`.y
 -- !query analysis
-Project [x#x, y#x, pipeexpression(scalar-subquery#x [x#x], false, EXTEND) AS z#x]
-:  +- GlobalLimit 1
-:     +- LocalLimit 1
-:        +- Project [a#x]
-:           +- Filter (outer(x#x) = a#x)
-:              +- SubqueryAlias spark_catalog.default.other
-:                 +- Relation spark_catalog.default.other[a#x,b#x] json
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
+Project [x#x, y#x]
++- SubqueryAlias `u.v`
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
 table t
-|> where exists (
-    table other
-    |> extend t.x
-    |> select * except (a, b))
+|> as u
+|> as v
+|> select v.x, v.y
 -- !query analysis
-Filter exists#x [x#x]
-:  +- Project [pipeexpression(outer(spark_catalog.default.t.x))#x]
-:     +- Project [a#x, b#x, pipeexpression(outer(x#x), false, EXTEND) AS pipeexpression(outer(spark_catalog.default.t.x))#x]
-:        +- SubqueryAlias spark_catalog.default.other
-:           +- Relation spark_catalog.default.other[a#x,b#x] json
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
+Project [x#x, y#x]
++- SubqueryAlias v
+   +- SubqueryAlias u
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
 table t
-|> extend 1 as x
+|> as u
+|> where u.x = 1
 -- !query analysis
-Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS x#x]
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
+Filter (x#x = 1)
++- PipeOperator
+   +- SubqueryAlias u
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
 table t
-|> extend first_value(x) over (partition by y) as result
+|> as u, v
 -- !query analysis
-Project [x#x, y#x, result#x]
-+- Project [x#x, y#x, _we0#x, pipeexpression(_we0#x, false, EXTEND) AS result#x]
-   +- Window [first_value(x#x, false) windowspecdefinition(y#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#x], [y#x]
-      +- Project [x#x, y#x]
-         +- SubqueryAlias spark_catalog.default.t
-            +- Relation spark_catalog.default.t[x#x,y#x] csv
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "','",
+    "hint" : ""
+  }
+}
 
 
 -- !query
 table t
-|> extend x + length(y) as z, z + 1 as plus_one
+|> as 1 + 2
 -- !query analysis
-Project [x#x, y#x, z#x, pipeexpression((z#x + 1), false, EXTEND) AS plus_one#x]
-+- Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, EXTEND) AS z#x]
-   +- SubqueryAlias spark_catalog.default.t
-      +- Relation spark_catalog.default.t[x#x,y#x] csv
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'1'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
 table t
-|> extend sum(x) as z
+|> as u-v
 -- !query analysis
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "condition" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
-  "sqlState" : "0A000",
+  "condition" : "INVALID_IDENTIFIER",
+  "sqlState" : "42602",
   "messageParameters" : {
-    "clause" : "EXTEND",
-    "expr" : "sum(x#x)"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 19,
-    "stopIndex" : 24,
-    "fragment" : "sum(x)"
-  } ]
+    "ident" : "u-v"
+  }
 }
 
 
 -- !query
 table t
-|> extend distinct x as z
+|> as u@v
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
   "condition" : "PARSE_SYNTAX_ERROR",
   "sqlState" : "42601",
   "messageParameters" : {
-    "error" : "'as'",
+    "error" : "'@'",
     "hint" : ""
   }
 }
@@ -718,15 +1342,15 @@ org.apache.spark.sql.catalyst.parser.ParseException
 
 -- !query
 table t
-|> extend *
+|> as u#######v
 -- !query analysis
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "condition" : "INVALID_USAGE_OF_STAR_OR_REGEX",
-  "sqlState" : "42000",
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
   "messageParameters" : {
-    "elem" : "'*'",
-    "prettyName" : "expression `pipeexpression`"
+    "error" : "'#'",
+    "hint" : ""
   }
 }
 
@@ -736,8 +1360,9 @@ table t
 |> where true
 -- !query analysis
 Filter true
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
++- PipeOperator
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -745,8 +1370,9 @@ table t
 |> where x + length(y) < 4
 -- !query analysis
 Filter ((x#x + length(y#x)) < 4)
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
++- PipeOperator
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -755,10 +1381,11 @@ table t
 |> where x + length(y) < 3
 -- !query analysis
 Filter ((x#x + length(y#x)) < 3)
-+- SubqueryAlias __auto_generated_subquery_name
++- PipeOperator
    +- Filter ((x#x + length(y#x)) < 4)
-      +- SubqueryAlias spark_catalog.default.t
-         +- Relation spark_catalog.default.t[x#x,y#x] csv
+      +- PipeOperator
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -766,7 +1393,7 @@ Filter ((x#x + length(y#x)) < 3)
 |> where x = 1
 -- !query analysis
 Filter (x#x = 1)
-+- SubqueryAlias __auto_generated_subquery_name
++- PipeOperator
    +- Aggregate [x#x], [x#x, sum(length(y#x)) AS sum_len#xL]
       +- SubqueryAlias spark_catalog.default.t
          +- Relation spark_catalog.default.t[x#x,y#x] csv
@@ -777,8 +1404,9 @@ table t
 |> where t.x = 1
 -- !query analysis
 Filter (x#x = 1)
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
++- PipeOperator
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -786,8 +1414,9 @@ table t
 |> where spark_catalog.default.t.x = 1
 -- !query analysis
 Filter (x#x = 1)
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
++- PipeOperator
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -795,7 +1424,7 @@ Filter (x#x = 1)
 |> where col.i1 = 1
 -- !query analysis
 Filter (col#x.i1 = 1)
-+- SubqueryAlias __auto_generated_subquery_name
++- PipeOperator
    +- Project [col#x]
       +- SubqueryAlias spark_catalog.default.st
          +- Relation spark_catalog.default.st[x#x,col#x] parquet
@@ -806,8 +1435,9 @@ table st
 |> where st.col.i1 = 2
 -- !query analysis
 Filter (col#x.i1 = 2)
-+- SubqueryAlias spark_catalog.default.st
-   +- Relation spark_catalog.default.st[x#x,col#x] parquet
++- PipeOperator
+   +- SubqueryAlias spark_catalog.default.st
+      +- Relation spark_catalog.default.st[x#x,col#x] parquet
 
 
 -- !query
@@ -821,8 +1451,9 @@ Filter exists#x [x#x]
 :           +- Filter (outer(x#x) = a#x)
 :              +- SubqueryAlias spark_catalog.default.other
 :                 +- Relation spark_catalog.default.other[a#x,b#x] json
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
++- PipeOperator
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -836,8 +1467,9 @@ Filter (scalar-subquery#x [x#x] = 1)
 :           +- Filter (outer(x#x) = a#x)
 :              +- SubqueryAlias spark_catalog.default.other
 :                 +- Relation spark_catalog.default.other[a#x,b#x] json
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
++- PipeOperator
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -939,7 +1571,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42703",
   "messageParameters" : {
     "objectName" : "`y`",
-    "proposal" : "`x`, `z`"
+    "proposal" : "`z`, `spark_catalog`.`default`.`t`.`x`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -951,6 +1583,78 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+table t
+|> select x, length(y) as z
+|> limit 1000
+|> where x + length(y) < 4
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`y`",
+    "proposal" : "`z`, `spark_catalog`.`default`.`t`.`x`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 71,
+    "stopIndex" : 71,
+    "fragment" : "y"
+  } ]
+}
+
+
+-- !query
+table t
+|> select x, length(y) as z
+|> limit 1000 offset 1
+|> where x + length(y) < 4
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`y`",
+    "proposal" : "`z`, `spark_catalog`.`default`.`t`.`x`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 80,
+    "stopIndex" : 80,
+    "fragment" : "y"
+  } ]
+}
+
+
+-- !query
+table t
+|> select x, length(y) as z
+|> order by x, y
+|> where x + length(y) < 4
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`y`",
+    "proposal" : "`z`, `spark_catalog`.`default`.`t`.`x`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 52,
+    "stopIndex" : 52,
+    "fragment" : "y"
+  } ]
+}
+
+
 -- !query
 (select x, sum(length(y)) as sum_len from t group by x)
 |> where sum(length(y)) = 3
@@ -961,7 +1665,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42703",
   "messageParameters" : {
     "objectName" : "`y`",
-    "proposal" : "`x`, `sum_len`"
+    "proposal" : "`sum_len`, `spark_catalog`.`default`.`t`.`x`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1004,7 +1708,7 @@ table courseSales
 Project [c#x, __pivot_sum(e) AS s AS `sum(e) AS s`#x[0] AS firstYear_s#xL, __pivot_avg(e) AS a AS `avg(e) AS a`#x[0] AS firstYear_a#x, __pivot_sum(e) AS s AS `sum(e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(e) AS a AS `avg(e) AS a`#x[1] AS secondYear_a#x]
 +- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(e) AS s AS `sum(e) AS s`#x, pivotfirst(y#x, avg(e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(e) AS a AS `avg(e) AS a`#x]
    +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(e) AS s#xL, avg(e#x) AS avg(e) AS a#x]
-      +- Project [pipeexpression(year#x, false, SELECT) AS y#x, pipeexpression(course#x, false, SELECT) AS c#x, pipeexpression(earnings#x, false, SELECT) AS e#x]
+      +- Project [year#x AS y#x, course#x AS c#x, earnings#x AS e#x]
          +- SubqueryAlias coursesales
             +- View (`courseSales`, [course#x, year#x, earnings#x])
                +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x]
@@ -1923,7 +2627,7 @@ table natural_join_test_t1
 |> where k = "one"
 -- !query analysis
 Filter (k#x = one)
-+- SubqueryAlias __auto_generated_subquery_name
++- PipeOperator
    +- Project [k#x, v1#x, v2#x]
       +- Join Inner, (k#x = k#x)
          :- SubqueryAlias natural_join_test_t1
@@ -2110,21 +2814,34 @@ Union false, false
 
 
 -- !query
-values (0, 1) tab(x, y)
+values (2, 'xyz') tab(x, y)
 |> union table t
 |> where x = 0
 -- !query analysis
-Distinct
-+- Union false, false
-   :- Project [x#x, cast(y#x as bigint) AS y#xL]
-   :  +- SubqueryAlias tab
-   :     +- LocalRelation [x#x, y#x]
-   +- Project [x#x, cast(y#x as bigint) AS y#xL]
-      +- Filter (x#x = 0)
+Filter (x#x = 0)
++- PipeOperator
+   +- Distinct
+      +- Union false, false
+         :- SubqueryAlias tab
+         :  +- LocalRelation [x#x, y#x]
          +- SubqueryAlias spark_catalog.default.t
             +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
+-- !query
+values (2, 'xyz') tab(x, y)
+|> union table t
+|> drop x
+-- !query analysis
+Project [y#x]
++- Distinct
+   +- Union false, false
+      :- SubqueryAlias tab
+      :  +- LocalRelation [x#x, y#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
 -- !query
 (select * from t)
 |> union all (select * from t)
@@ -2260,8 +2977,9 @@ table t
 |> order by x
 -- !query analysis
 Sort [x#x ASC NULLS FIRST], true
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
++- PipeOperator
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -2269,7 +2987,7 @@ Sort [x#x ASC NULLS FIRST], true
 |> order by x
 -- !query analysis
 Sort [x#x ASC NULLS FIRST], true
-+- SubqueryAlias __auto_generated_subquery_name
++- PipeOperator
    +- Project [x#x, y#x]
       +- SubqueryAlias spark_catalog.default.t
          +- Relation spark_catalog.default.t[x#x,y#x] csv
@@ -2280,8 +2998,9 @@ values (0, 'abc') tab(x, y)
 |> order by x
 -- !query analysis
 Sort [x#x ASC NULLS FIRST], true
-+- SubqueryAlias tab
-   +- LocalRelation [x#x, y#x]
++- PipeOperator
+   +- SubqueryAlias tab
+      +- LocalRelation [x#x, y#x]
 
 
 -- !query
@@ -2291,10 +3010,11 @@ table t
 -- !query analysis
 GlobalLimit 1
 +- LocalLimit 1
-   +- SubqueryAlias __auto_generated_subquery_name
+   +- PipeOperator
       +- Sort [x#x ASC NULLS FIRST], true
-         +- SubqueryAlias spark_catalog.default.t
-            +- Relation spark_catalog.default.t[x#x,y#x] csv
+         +- PipeOperator
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -2306,11 +3026,12 @@ table t
 GlobalLimit 2
 +- LocalLimit 2
    +- Offset 1
-      +- SubqueryAlias __auto_generated_subquery_name
+      +- PipeOperator
          +- Project [y#x]
             +- Filter (x#x = 1)
-               +- SubqueryAlias spark_catalog.default.t
-                  +- Relation spark_catalog.default.t[x#x,y#x] csv
+               +- PipeOperator
+                  +- SubqueryAlias spark_catalog.default.t
+                     +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -2320,11 +3041,12 @@ table t
 |> offset 1
 -- !query analysis
 Offset 1
-+- SubqueryAlias __auto_generated_subquery_name
++- PipeOperator
    +- Project [y#x]
       +- Filter (x#x = 1)
-         +- SubqueryAlias spark_catalog.default.t
-            +- Relation spark_catalog.default.t[x#x,y#x] csv
+         +- PipeOperator
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -2332,8 +3054,9 @@ table t
 |> limit all offset 0
 -- !query analysis
 Offset 0
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
++- PipeOperator
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -2341,8 +3064,9 @@ table t
 |> distribute by x
 -- !query analysis
 RepartitionByExpression [x#x]
-+- SubqueryAlias spark_catalog.default.t
-   +- Relation spark_catalog.default.t[x#x,y#x] csv
++- PipeOperator
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -2351,8 +3075,9 @@ table t
 -- !query analysis
 Sort [x#x ASC NULLS FIRST], false
 +- RepartitionByExpression [x#x]
-   +- SubqueryAlias spark_catalog.default.t
-      +- Relation spark_catalog.default.t[x#x,y#x] csv
+   +- PipeOperator
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -2361,8 +3086,9 @@ table t
 -- !query analysis
 RepartitionByExpression [x#x]
 +- Sort [x#x ASC NULLS FIRST], false
-   +- SubqueryAlias spark_catalog.default.t
-      +- Relation spark_catalog.default.t[x#x,y#x] csv
+   +- PipeOperator
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -2372,8 +3098,9 @@ order by y
 -- !query analysis
 Sort [y#x ASC NULLS FIRST], true
 +- Sort [x#x DESC NULLS LAST], true
-   +- SubqueryAlias spark_catalog.default.t
-      +- Relation spark_catalog.default.t[x#x,y#x] csv
+   +- PipeOperator
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
 
 
 -- !query
@@ -2482,7 +3209,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 table other
 |> aggregate sum(b) as result group by a
 -- !query analysis
-Aggregate [a#x], [a#x, pipeexpression(sum(b#x), true, AGGREGATE) AS result#xL]
+Aggregate [a#x], [a#x, sum(b#x) AS result#xL]
 +- SubqueryAlias spark_catalog.default.other
    +- Relation spark_catalog.default.other[a#x,b#x] json
 
@@ -2493,7 +3220,7 @@ table other
 |> select result
 -- !query analysis
 Project [result#xL]
-+- Aggregate [a#x], [a#x, pipeexpression(sum(b#x), true, AGGREGATE) AS result#xL]
++- Aggregate [a#x], [a#x, sum(b#x) AS result#xL]
    +- SubqueryAlias spark_catalog.default.other
       +- Relation spark_catalog.default.other[a#x,b#x] json
 
@@ -2504,7 +3231,7 @@ table other
 |> select gkey
 -- !query analysis
 Project [gkey#x]
-+- Aggregate [(a#x + 1)], [(a#x + 1) AS gkey#x, pipeexpression(sum(b#x), true, AGGREGATE) AS pipeexpression(sum(b))#xL]
++- Aggregate [(a#x + 1)], [(a#x + 1) AS gkey#x, sum(b#x) AS sum(b)#xL]
    +- SubqueryAlias spark_catalog.default.other
       +- Relation spark_catalog.default.other[a#x,b#x] json
 
@@ -2522,16 +3249,106 @@ Aggregate [x#x, y#x], [x#x, y#x]
 select 3 as x, 4 as y
 |> aggregate group by 1, 2
 -- !query analysis
-Aggregate [1, 2], [1 AS 1#x, 2 AS 2#x]
+Aggregate [x#x, y#x], [x#x, y#x]
++- Project [3 AS x#x, 4 AS y#x]
+   +- OneRowRelation
+
+
+-- !query
+values (3, 4) as tab(x, y)
+|> aggregate sum(y) group by 1
+-- !query analysis
+Aggregate [x#x], [x#x, sum(y#x) AS sum(y)#xL]
++- SubqueryAlias tab
+   +- LocalRelation [x#x, y#x]
+
+
+-- !query
+values (3, 4), (5, 4) as tab(x, y)
+|> aggregate sum(y) group by 1
+-- !query analysis
+Aggregate [x#x], [x#x, sum(y#x) AS sum(y)#xL]
++- SubqueryAlias tab
+   +- LocalRelation [x#x, y#x]
+
+
+-- !query
+select 3 as x, 4 as y
+|> aggregate sum(y) group by 1, 1
+-- !query analysis
+Aggregate [x#x, x#x], [x#x, x#x, sum(y#x) AS sum(y)#xL]
++- Project [3 AS x#x, 4 AS y#x]
+   +- OneRowRelation
+
+
+-- !query
+select 1 as `1`, 2 as `2`
+|> aggregate sum(`2`) group by `1`
+-- !query analysis
+Aggregate [1#x], [1#x, sum(2#x) AS sum(2)#xL]
++- Project [1 AS 1#x, 2 AS 2#x]
+   +- OneRowRelation
+
+
+-- !query
+select 3 as x, 4 as y
+|> aggregate sum(y) group by 2
+-- !query analysis
+Aggregate [y#x], [y#x, sum(y#x) AS sum(y)#xL]
 +- Project [3 AS x#x, 4 AS y#x]
    +- OneRowRelation
 
 
+-- !query
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by 2
+-- !query analysis
+Aggregate [y#x], [y#x, sum(y#x) AS sum(y)#xL]
++- Project [3 AS x#x, 4 AS y#x, 5 AS z#x]
+   +- OneRowRelation
+
+
+-- !query
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by 3
+-- !query analysis
+Aggregate [z#x], [z#x, sum(y#x) AS sum(y)#xL]
++- Project [3 AS x#x, 4 AS y#x, 5 AS z#x]
+   +- OneRowRelation
+
+
+-- !query
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by 2, 3
+-- !query analysis
+Aggregate [y#x, z#x], [y#x, z#x, sum(y#x) AS sum(y)#xL]
++- Project [3 AS x#x, 4 AS y#x, 5 AS z#x]
+   +- OneRowRelation
+
+
+-- !query
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by 1, 2, 3
+-- !query analysis
+Aggregate [x#x, y#x, z#x], [x#x, y#x, z#x, sum(y#x) AS sum(y)#xL]
++- Project [3 AS x#x, 4 AS y#x, 5 AS z#x]
+   +- OneRowRelation
+
+
+-- !query
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by x, 2, 3
+-- !query analysis
+Aggregate [x#x, y#x, z#x], [x#x, y#x, z#x, sum(y#x) AS sum(y)#xL]
++- Project [3 AS x#x, 4 AS y#x, 5 AS z#x]
+   +- OneRowRelation
+
+
 -- !query
 table t
 |> aggregate sum(x)
 -- !query analysis
-Aggregate [pipeexpression(sum(x#x), true, AGGREGATE) AS pipeexpression(sum(x))#xL]
+Aggregate [sum(x#x) AS sum(x)#xL]
 +- SubqueryAlias spark_catalog.default.t
    +- Relation spark_catalog.default.t[x#x,y#x] csv
 
@@ -2540,7 +3357,7 @@ Aggregate [pipeexpression(sum(x#x), true, AGGREGATE) AS pipeexpression(sum(x))#x
 table t
 |> aggregate sum(x) + 1 as result_plus_one
 -- !query analysis
-Aggregate [pipeexpression((sum(x#x) + cast(1 as bigint)), true, AGGREGATE) AS result_plus_one#xL]
+Aggregate [(sum(x#x) + cast(1 as bigint)) AS result_plus_one#xL]
 +- SubqueryAlias spark_catalog.default.t
    +- Relation spark_catalog.default.t[x#x,y#x] csv
 
@@ -2551,7 +3368,7 @@ table other
 |> where a = 1
 -- !query analysis
 Filter (a#x = 1)
-+- SubqueryAlias __auto_generated_subquery_name
++- PipeOperator
    +- Aggregate [a#x], [a#x]
       +- SubqueryAlias spark_catalog.default.other
          +- Relation spark_catalog.default.other[a#x,b#x] json
@@ -2590,9 +3407,9 @@ select 1 x, 2 y, 3 z
 |> aggregate avg(z) z group by x
 |> aggregate count(distinct z) c
 -- !query analysis
-Aggregate [pipeexpression(count(distinct z#x), true, AGGREGATE) AS c#xL]
-+- Aggregate [x#x], [x#x, pipeexpression(avg(z#xL), true, AGGREGATE) AS z#x]
-   +- Aggregate [x#x, y#x], [x#x, y#x, pipeexpression(sum(z#x), true, AGGREGATE) AS z#xL]
+Aggregate [count(distinct z#x) AS c#xL]
++- Aggregate [x#x], [x#x, avg(z#xL) AS z#x]
+   +- Aggregate [x#x, y#x], [x#x, y#x, sum(z#x) AS z#xL]
       +- Project [1 AS x#x, 2 AS y#x, 3 AS z#x]
          +- OneRowRelation
 
@@ -2603,7 +3420,7 @@ select 1 x, 3 z
 |> select x
 -- !query analysis
 Project [x#x]
-+- Aggregate [x#x, z#x, x#x], [x#x, z#x, x#x, pipeexpression(count(1), true, AGGREGATE) AS pipeexpression(count(1))#xL]
++- Aggregate [x#x, z#x, x#x], [x#x, z#x, x#x, count(1) AS count(1)#xL]
    +- Project [1 AS x#x, 3 AS z#x]
       +- OneRowRelation
 
@@ -2612,7 +3429,7 @@ Project [x#x]
 table other
 |> aggregate a + count(b) group by a
 -- !query analysis
-Aggregate [a#x], [a#x, pipeexpression((cast(a#x as bigint) + count(b#x)), true, AGGREGATE) AS pipeexpression((a + count(b)))#xL]
+Aggregate [a#x], [a#x, (cast(a#x as bigint) + count(b#x)) AS (a + count(b))#xL]
 +- SubqueryAlias spark_catalog.default.other
    +- Relation spark_catalog.default.other[a#x,b#x] json
 
@@ -2895,7 +3712,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
   "condition" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
   "sqlState" : "0A000",
   "messageParameters" : {
-    "case" : "window functions"
+    "case" : "window functions; please update the query to move the window functions to a subsequent |> SELECT operator instead"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2991,8 +3808,8 @@ Project [cate#x, val#x, sum_val#xL, first_value(cate) OVER (ORDER BY val ASC NUL
    +- Window [first_value(cate#x, false) windowspecdefinition(val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value(cate) OVER (ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x], [val#x ASC NULLS FIRST]
       +- Project [cate#x, val#x, sum_val#xL]
          +- Project [cate#x, val#x, sum_val#xL]
-            +- Project [cate#x, val#x, _we0#xL, pipeexpression(_we0#xL, false, SELECT) AS sum_val#xL]
-               +- Window [sum(val#x) windowspecdefinition(cate#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#xL], [cate#x]
+            +- Project [cate#x, val#x, sum_val#xL, sum_val#xL]
+               +- Window [sum(val#x) windowspecdefinition(cate#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS sum_val#xL], [cate#x]
                   +- Project [cate#x, val#x]
                      +- SubqueryAlias windowtestdata
                         +- View (`windowTestData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
@@ -3159,6 +3976,709 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+with customer_total_return as
+(select
+    sr_customer_sk as ctr_customer_sk,
+    sr_store_sk as ctr_store_sk,
+    sum(sr_return_amt) as ctr_total_return
+  from store_returns, date_dim
+  where sr_returned_date_sk = d_date_sk and d_year = 2000
+  group by sr_customer_sk, sr_store_sk)
+select c_customer_id
+from customer_total_return ctr1, store, customer
+where ctr1.ctr_total_return >
+  (select avg(ctr_total_return) * 1.2
+  from customer_total_return ctr2
+  where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+  and s_store_sk = ctr1.ctr_store_sk
+  and s_state = 'tn'
+  and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`store_returns`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 161,
+    "stopIndex" : 173,
+    "fragment" : "store_returns"
+  } ]
+}
+
+
+-- !query
+with customer_total_return as
+  (from store_returns
+  |> join date_dim
+  |> where sr_returned_date_sk = d_date_sk and d_year = 2000
+  |> aggregate sum(sr_return_amt) as ctr_total_return
+       group by sr_customer_sk as ctr_customer_sk, sr_store_sk as ctr_store_sk)
+from customer_total_return ctr1
+|> join store
+|> join customer
+|> where ctr1.ctr_total_return >
+     (table customer_total_return
+      |> as ctr2
+      |> where ctr1.ctr_store_sk = ctr2.ctr_store_sk
+      |> aggregate avg(ctr_total_return) * 1.2)
+     and s_store_sk = ctr1.ctr_store_sk
+     and s_state = 'tn'
+     and ctr1.ctr_customer_sk = c_customer_sk
+|> order by c_customer_id
+|> limit 100
+|> select c_customer_id
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`store_returns`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 51,
+    "fragment" : "store_returns"
+  } ]
+}
+
+
+-- !query
+with wscs as
+( select
+    sold_date_sk,
+    sales_price
+  from (select
+    ws_sold_date_sk sold_date_sk,
+    ws_ext_sales_price sales_price
+  from web_sales) x
+  union all
+  (select
+    cs_sold_date_sk sold_date_sk,
+    cs_ext_sales_price sales_price
+  from catalog_sales)),
+    wswscs as
+  ( select
+    d_week_seq,
+    sum(case when (d_day_name = 'sunday')
+      then sales_price
+        else null end)
+    sun_sales,
+    sum(case when (d_day_name = 'monday')
+      then sales_price
+        else null end)
+    mon_sales,
+    sum(case when (d_day_name = 'tuesday')
+      then sales_price
+        else null end)
+    tue_sales,
+    sum(case when (d_day_name = 'wednesday')
+      then sales_price
+        else null end)
+    wed_sales,
+    sum(case when (d_day_name = 'thursday')
+      then sales_price
+        else null end)
+    thu_sales,
+    sum(case when (d_day_name = 'friday')
+      then sales_price
+        else null end)
+    fri_sales,
+    sum(case when (d_day_name = 'saturday')
+      then sales_price
+        else null end)
+    sat_sales
+  from wscs, date_dim
+  where d_date_sk = sold_date_sk
+  group by d_week_seq)
+select
+  d_week_seq1,
+  round(sun_sales1 / sun_sales2, 2),
+  round(mon_sales1 / mon_sales2, 2),
+  round(tue_sales1 / tue_sales2, 2),
+  round(wed_sales1 / wed_sales2, 2),
+  round(thu_sales1 / thu_sales2, 2),
+  round(fri_sales1 / fri_sales2, 2),
+  round(sat_sales1 / sat_sales2, 2)
+from
+  (select
+    wswscs.d_week_seq d_week_seq1,
+    sun_sales sun_sales1,
+    mon_sales mon_sales1,
+    tue_sales tue_sales1,
+    wed_sales wed_sales1,
+    thu_sales thu_sales1,
+    fri_sales fri_sales1,
+    sat_sales sat_sales1
+  from wswscs, date_dim
+  where date_dim.d_week_seq = wswscs.d_week_seq and d_year = 2001) y,
+  (select
+    wswscs.d_week_seq d_week_seq2,
+    sun_sales sun_sales2,
+    mon_sales mon_sales2,
+    tue_sales tue_sales2,
+    wed_sales wed_sales2,
+    thu_sales thu_sales2,
+    fri_sales fri_sales2,
+    sat_sales sat_sales2
+  from wswscs, date_dim
+  where date_dim.d_week_seq = wswscs.d_week_seq and d_year = 2001 + 1) z
+where d_week_seq1 = d_week_seq2 - 53
+order by d_week_seq1
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`web_sales`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 148,
+    "stopIndex" : 156,
+    "fragment" : "web_sales"
+  } ]
+}
+
+
+-- !query
+with wscs as
+  (table web_sales
+  |> select
+       ws_sold_date_sk sold_date_sk,
+       ws_ext_sales_price sales_price
+  |> as x
+  |> union all (
+       table catalog_sales
+       |> select
+            cs_sold_date_sk sold_date_sk,
+            cs_ext_sales_price sales_price)
+  |> select
+       sold_date_sk,
+       sales_price),
+wswscs as
+  (table wscs
+  |> join date_dim
+  |> where d_date_sk = sold_date_sk
+  |> aggregate
+      sum(case when (d_day_name = 'sunday')
+        then sales_price
+          else null end)
+      sun_sales,
+      sum(case when (d_day_name = 'monday')
+        then sales_price
+          else null end)
+      mon_sales,
+      sum(case when (d_day_name = 'tuesday')
+        then sales_price
+          else null end)
+      tue_sales,
+      sum(case when (d_day_name = 'wednesday')
+        then sales_price
+          else null end)
+      wed_sales,
+      sum(case when (d_day_name = 'thursday')
+        then sales_price
+          else null end)
+      thu_sales,
+      sum(case when (d_day_name = 'friday')
+        then sales_price
+          else null end)
+      fri_sales,
+      sum(case when (d_day_name = 'saturday')
+        then sales_price
+          else null end)
+      sat_sales
+      group by d_week_seq)
+table wswscs
+|> join date_dim
+|> where date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001
+|> select
+     wswscs.d_week_seq d_week_seq1,
+     sun_sales sun_sales1,
+     mon_sales mon_sales1,
+     tue_sales tue_sales1,
+     wed_sales wed_sales1,
+     thu_sales thu_sales1,
+     fri_sales fri_sales1,
+     sat_sales sat_sales1
+|> as y
+|> join (
+     table wswscs
+     |> join date_dim
+     |> where date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1
+     |> select
+          wswscs.d_week_seq d_week_seq2,
+          sun_sales sun_sales2,
+          mon_sales mon_sales2,
+          tue_sales tue_sales2,
+          wed_sales wed_sales2,
+          thu_sales thu_sales2,
+          fri_sales fri_sales2,
+          sat_sales sat_sales2
+     |> as z)
+|> where d_week_seq1 = d_week_seq2 - 53
+|> order by d_week_seq1
+|> select
+     d_week_seq1,
+     round(sun_sales1 / sun_sales2, 2),
+     round(mon_sales1 / mon_sales2, 2),
+     round(tue_sales1 / tue_sales2, 2),
+     round(wed_sales1 / wed_sales2, 2),
+     round(thu_sales1 / thu_sales2, 2),
+     round(fri_sales1 / fri_sales2, 2),
+     round(sat_sales1 / sat_sales2, 2)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`web_sales`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 31,
+    "fragment" : "web_sales"
+  } ]
+}
+
+
+-- !query
+select
+  dt.d_year,
+  item.i_brand_id brand_id,
+  item.i_brand brand,
+  sum(ss_ext_sales_price) sum_agg
+from date_dim dt, store_sales, item
+where dt.d_date_sk = store_sales.ss_sold_date_sk
+  and store_sales.ss_item_sk = item.i_item_sk
+  and item.i_manufact_id = 128
+  and dt.d_moy = 11
+group by dt.d_year, item.i_brand, item.i_brand_id
+order by dt.d_year, sum_agg desc, brand_id
+limit 100
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`date_dim`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 110,
+    "stopIndex" : 117,
+    "fragment" : "date_dim"
+  } ]
+}
+
+
+-- !query
+table date_dim
+|> as dt
+|> join store_sales
+|> join item
+|> where dt.d_date_sk = store_sales.ss_sold_date_sk
+     and store_sales.ss_item_sk = item.i_item_sk
+     and item.i_manufact_id = 128
+     and dt.d_moy = 11
+|> aggregate sum(ss_ext_sales_price) sum_agg
+     group by dt.d_year d_year, item.i_brand_id brand_id, item.i_brand brand
+|> order by d_year, sum_agg desc, brand_id
+|> limit 100
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`date_dim`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 7,
+    "stopIndex" : 14,
+    "fragment" : "date_dim"
+  } ]
+}
+
+
+-- !query
+select
+  i_item_desc,
+  i_category,
+  i_class,
+  i_current_price,
+  sum(ws_ext_sales_price) as itemrevenue,
+  sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price))
+  over
+  (partition by i_class) as revenueratio
+from
+  web_sales, item, date_dim
+where
+  ws_item_sk = i_item_sk
+    and i_category in ('sports', 'books', 'home')
+    and ws_sold_date_sk = d_date_sk
+    and d_date between cast('1999-02-22' as date)
+  and (cast('1999-02-22' as date) + interval 30 days)
+group by
+  i_item_id, i_item_desc, i_category, i_class, i_current_price
+order by
+  i_category, i_class, i_item_id, i_item_desc, revenueratio
+limit 100
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`web_sales`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 227,
+    "stopIndex" : 235,
+    "fragment" : "web_sales"
+  } ]
+}
+
+
+-- !query
+table web_sales
+|> join item
+|> join date_dim
+|> where ws_item_sk = i_item_sk
+     and i_category in ('sports', 'books', 'home')
+     and ws_sold_date_sk = d_date_sk
+     and d_date between cast('1999-02-22' as date)
+     and (cast('1999-02-22' as date) + interval 30 days)
+|> aggregate sum(ws_ext_sales_price) AS itemrevenue
+     group by i_item_id, i_item_desc, i_category, i_class, i_current_price
+|> extend
+     itemrevenue * 100 / sum(itemrevenue)
+       over (partition by i_class) as revenueratio
+|> order by i_category, i_class, i_item_id, i_item_desc, revenueratio
+|> select i_item_desc, i_category, i_class, i_current_price, itemrevenue, revenueratio
+|> limit 100
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`web_sales`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 7,
+    "stopIndex" : 15,
+    "fragment" : "web_sales"
+  } ]
+}
+
+
+-- !query
+select
+  asceding.rnk,
+  i1.i_product_name best_performing,
+  i2.i_product_name worst_performing
+from (select *
+from (select
+  item_sk,
+  rank()
+  over (
+    order by rank_col asc) rnk
+from (select
+  ss_item_sk item_sk,
+  avg(ss_net_profit) rank_col
+from store_sales ss1
+where ss_store_sk = 4
+group by ss_item_sk
+having avg(ss_net_profit) > 0.9 * (select avg(ss_net_profit) rank_col
+from store_sales
+where ss_store_sk = 4
+  and ss_addr_sk is null
+group by ss_store_sk)) v1) v11
+where rnk < 11) asceding,
+  (select *
+  from (select
+    item_sk,
+    rank()
+    over (
+      order by rank_col desc) rnk
+  from (select
+    ss_item_sk item_sk,
+    avg(ss_net_profit) rank_col
+  from store_sales ss1
+  where ss_store_sk = 4
+  group by ss_item_sk
+  having avg(ss_net_profit) > 0.9 * (select avg(ss_net_profit) rank_col
+  from store_sales
+  where ss_store_sk = 4
+    and ss_addr_sk is null
+  group by ss_store_sk)) v2) v21
+  where rnk < 11) descending,
+  item i1, item i2
+where asceding.rnk = descending.rnk
+  and i1.i_item_sk = asceding.item_sk
+  and i2.i_item_sk = descending.item_sk
+order by asceding.rnk
+limit 100
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`store_sales`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 256,
+    "stopIndex" : 266,
+    "fragment" : "store_sales"
+  } ]
+}
+
+
+-- !query
+from store_sales ss1
+|> where ss_store_sk = 4
+|> aggregate avg(ss_net_profit) rank_col
+     group by ss_item_sk as item_sk
+|> where rank_col > 0.9 * (
+     from store_sales
+     |> where ss_store_sk = 4
+          and ss_addr_sk is null
+     |> aggregate avg(ss_net_profit) rank_col
+          group by ss_store_sk
+     |> select rank_col)
+|> as v1
+|> select
+     item_sk,
+     rank() over (
+       order by rank_col asc) rnk
+|> as v11
+|> where rnk < 11
+|> as asceding
+|> join (
+     from store_sales ss1
+     |> where ss_store_sk = 4
+     |> aggregate avg(ss_net_profit) rank_col
+          group by ss_item_sk as item_sk
+     |> where rank_col > 0.9 * (
+          table store_sales
+          |> where ss_store_sk = 4
+               and ss_addr_sk is null
+          |> aggregate avg(ss_net_profit) rank_col
+               group by ss_store_sk
+          |> select rank_col)
+     |> as v2
+     |> select
+          item_sk,
+          rank() over (
+            order by rank_col asc) rnk
+     |> as v21
+     |> where rnk < 11) descending
+|> join item i1
+|> join item i2
+|> where asceding.rnk = descending.rnk
+     and i1.i_item_sk = asceding.item_sk
+     and i2.i_item_sk = descending.item_sk
+|> order by asceding.rnk
+|> select
+     asceding.rnk,
+     i1.i_product_name best_performing,
+     i2.i_product_name worst_performing
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`store_sales`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 6,
+    "stopIndex" : 16,
+    "fragment" : "store_sales"
+  } ]
+}
+
+
+-- !query
+with web_v1 as (
+  select
+    ws_item_sk item_sk,
+    d_date,
+    sum(sum(ws_sales_price))
+    over (partition by ws_item_sk
+      order by d_date
+      rows between unbounded preceding and current row) cume_sales
+  from web_sales, date_dim
+  where ws_sold_date_sk = d_date_sk
+    and d_month_seq between 1200 and 1200 + 11
+    and ws_item_sk is not null
+  group by ws_item_sk, d_date),
+    store_v1 as (
+    select
+      ss_item_sk item_sk,
+      d_date,
+      sum(sum(ss_sales_price))
+      over (partition by ss_item_sk
+        order by d_date
+        rows between unbounded preceding and current row) cume_sales
+    from store_sales, date_dim
+    where ss_sold_date_sk = d_date_sk
+      and d_month_seq between 1200 and 1200 + 11
+      and ss_item_sk is not null
+    group by ss_item_sk, d_date)
+select *
+from (select
+  item_sk,
+  d_date,
+  web_sales,
+  store_sales,
+  max(web_sales)
+  over (partition by item_sk
+    order by d_date
+    rows between unbounded preceding and current row) web_cumulative,
+  max(store_sales)
+  over (partition by item_sk
+    order by d_date
+    rows between unbounded preceding and current row) store_cumulative
+from (select
+  case when web.item_sk is not null
+    then web.item_sk
+  else store.item_sk end item_sk,
+  case when web.d_date is not null
+    then web.d_date
+  else store.d_date end d_date,
+  web.cume_sales web_sales,
+  store.cume_sales store_sales
+from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk
+  and web.d_date = store.d_date)
+     ) x) y
+where web_cumulative > store_cumulative
+order by item_sk, d_date
+limit 100
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`web_sales`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 222,
+    "stopIndex" : 230,
+    "fragment" : "web_sales"
+  } ]
+}
+
+
+-- !query
+with web_v1 as (
+  table web_sales
+  |> join date_dim
+  |> where ws_sold_date_sk = d_date_sk
+       and d_month_seq between 1200 and 1200 + 11
+       and ws_item_sk is not null
+  |> aggregate sum(ws_sales_price) as sum_ws_sales_price
+       group by ws_item_sk as item_sk, d_date
+  |> extend sum(sum_ws_sales_price)
+       over (partition by item_sk
+         order by d_date
+         rows between unbounded preceding and current row)
+       as cume_sales),
+store_v1 as (
+  table store_sales
+  |> join date_dim
+  |> where ss_sold_date_sk = d_date_sk
+       and d_month_seq between 1200 and 1200 + 11
+       and ss_item_sk is not null
+  |> aggregate sum(ss_sales_price) as sum_ss_sales_price
+       group by ss_item_sk as item_sk, d_date
+  |> extend sum(sum_ss_sales_price)
+       over (partition by item_sk
+           order by d_date
+           rows between unbounded preceding and current row)
+       as cume_sales)
+table web_v1
+|> as web
+|> full outer join store_v1 store
+     on (web.item_sk = store.item_sk and web.d_date = store.d_date)
+|> select
+     case when web.item_sk is not null
+       then web.item_sk
+       else store.item_sk end item_sk,
+     case when web.d_date is not null
+       then web.d_date
+       else store.d_date end d_date,
+     web.cume_sales web_sales,
+     store.cume_sales store_sales
+|> as x
+|> select
+     item_sk,
+     d_date,
+     web_sales,
+     store_sales,
+     max(web_sales)
+       over (partition by item_sk
+         order by d_date
+         rows between unbounded preceding and current row) web_cumulative,
+     max(store_sales)
+       over (partition by item_sk
+         order by d_date
+         rows between unbounded preceding and current row) store_cumulative
+|> as y
+|> where web_cumulative > store_cumulative
+|> order by item_sk, d_date
+|> limit 100
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`web_sales`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 34,
+    "fragment" : "web_sales"
+  } ]
+}
+
+
 -- !query
 drop table t
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out
index 1892741aa4232..6f862b36f9de6 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out
@@ -1326,7 +1326,7 @@ Aggregate [count(1) AS count(1)#xL]
 +- Filter unique1#x IN (list#x [])
    :  +- Project [unique1#x]
    :     +- Filter (unique2#x = 42)
-   :        +- Project [unique1#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, fivethous#x, tenthous#x, odd#x, even#x, stringu1#x, stringu2#x, string4#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, ... 7 more fields]
+   :        +- Project [unique1#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, fivethous#x, tenthous#x, odd#x, even#x, stringu1#x, stringu2#x, string4#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, fivethous#x, ... 6 more fields]
    :           +- Join Inner, (unique1#x = unique1#x)
    :              :- SubqueryAlias b
    :              :  +- SubqueryAlias spark_catalog.default.tenk1
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part3.sql.out
index 87831f7f30384..db223603c8fd9 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part3.sql.out
@@ -99,7 +99,7 @@ WithCTE
       +- Window [sum(x#xL) windowspecdefinition(x#xL ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, 1)) AS sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)#xL], [x#xL ASC NULLS FIRST]
          +- Project [x#xL]
             +- SubqueryAlias cte
-               +- CTERelationRef xxxx, true, [x#xL], false
+               +- CTERelationRef xxxx, true, [x#xL], false, false
 
 
 -- !query
@@ -121,7 +121,7 @@ WithCTE
       +- Window [sum(x#xL) windowspecdefinition(x#xL ASC NULLS FIRST, specifiedwindowframe(RangeFrame, cast(-1 as bigint), cast(1 as bigint))) AS sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN (- 1) FOLLOWING AND 1 FOLLOWING)#xL], [x#xL ASC NULLS FIRST]
          +- Project [x#xL]
             +- SubqueryAlias cte
-               +- CTERelationRef xxxx, true, [x#xL], false
+               +- CTERelationRef xxxx, true, [x#xL], false, false
 
 
 -- !query
@@ -154,7 +154,7 @@ WithCTE
       +- Window [sum(x#xL) windowspecdefinition(x#xL ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, 1)) AS sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)#xL], [x#xL ASC NULLS FIRST]
          +- Project [x#xL]
             +- SubqueryAlias cte
-               +- CTERelationRef xxxx, true, [x#xL], false
+               +- CTERelationRef xxxx, true, [x#xL], false, false
 
 
 -- !query
@@ -187,7 +187,7 @@ WithCTE
       +- Window [sum(x#xL) windowspecdefinition(x#xL ASC NULLS FIRST, specifiedwindowframe(RangeFrame, cast(-1 as bigint), cast(1 as bigint))) AS sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN (- 1) FOLLOWING AND 1 FOLLOWING)#xL], [x#xL ASC NULLS FIRST]
          +- Project [x#xL]
             +- SubqueryAlias cte
-               +- CTERelationRef xxxx, true, [x#xL], false
+               +- CTERelationRef xxxx, true, [x#xL], false, false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out
index 8582043c1a375..4a220f59ac52a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out
@@ -12,10 +12,10 @@ WithCTE
 +- Project [x#x, y#x, x#x, y#x]
    +- Join Inner
       :- SubqueryAlias q1
-      :  +- CTERelationRef xxxx, true, [x#x, y#x], false
+      :  +- CTERelationRef xxxx, true, [x#x, y#x], false, false
       +- SubqueryAlias q2
          +- SubqueryAlias q1
-            +- CTERelationRef xxxx, true, [x#x, y#x], false
+            +- CTERelationRef xxxx, true, [x#x, y#x], false, false
 
 
 -- !query
@@ -194,7 +194,7 @@ WithCTE
    +- SubqueryAlias q
       +- Project [foo#x]
          +- SubqueryAlias cte
-            +- CTERelationRef xxxx, true, [foo#x], false
+            +- CTERelationRef xxxx, true, [foo#x], false, false
 
 
 -- !query
@@ -222,13 +222,13 @@ WithCTE
 :                 +- Union false, false
 :                    :- Project [2#x]
 :                    :  +- SubqueryAlias innermost
-:                    :     +- CTERelationRef xxxx, true, [2#x], false
+:                    :     +- CTERelationRef xxxx, true, [2#x], false, false
 :                    +- Project [3 AS 3#x]
 :                       +- OneRowRelation
 +- Sort [x#x ASC NULLS FIRST], true
    +- Project [x#x]
       +- SubqueryAlias outermost
-         +- CTERelationRef xxxx, true, [x#x], false
+         +- CTERelationRef xxxx, true, [x#x], false, false
 
 
 -- !query
@@ -418,7 +418,7 @@ WithCTE
 :        +- OneRowRelation
 +- Project [x#x]
    +- SubqueryAlias ordinality
-      +- CTERelationRef xxxx, true, [x#x], false
+      +- CTERelationRef xxxx, true, [x#x], false, false
 
 
 -- !query
@@ -459,7 +459,7 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d
       :        +- OneRowRelation
       +- Project [42#x]
          +- SubqueryAlias test
-            +- CTERelationRef xxxx, true, [42#x], false
+            +- CTERelationRef xxxx, true, [42#x], false, false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/random.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/random.sql.out
index 4b945238dddaa..c46708e5e8661 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/random.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/random.sql.out
@@ -119,6 +119,18 @@ SELECT uniform(0, 10L, 0) AS result
 [Analyzer test output redacted due to nondeterminism]
 
 
+-- !query
+SELECT uniform(0, cast(10 as tinyint), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(0, cast(10 as smallint), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
 -- !query
 SELECT uniform(0, 10S, 0) AS result
 -- !query analysis
@@ -137,6 +149,30 @@ SELECT uniform(10.0F, 20.0F, 0) AS result
 [Analyzer test output redacted due to nondeterminism]
 
 
+-- !query
+SELECT uniform(cast(10 as decimal(10, 3)), cast(20 as decimal(10, 3)), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(cast(10 as decimal(10, 3)), cast(20 as decimal(11, 4)), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(10, cast(20 as decimal(10, 3)), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(cast(10 as decimal(10, 3)), 20, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
 -- !query
 SELECT uniform(10.0D, 20.0D, CAST(3 / 7 AS LONG)) AS result
 -- !query analysis
@@ -161,24 +197,108 @@ SELECT uniform(10, 20.0F) IS NOT NULL AS result
 [Analyzer test output redacted due to nondeterminism]
 
 
+-- !query
+SELECT uniform(-10L, 10L, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(-20L, -10L, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(-20L, -10L, -10) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
 -- !query
 SELECT uniform(NULL, 1, 0) AS result
 -- !query analysis
 [Analyzer test output redacted due to nondeterminism]
 
 
+-- !query
+SELECT uniform(cast(NULL AS int), 1, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(cast(NULL AS float), 1, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
 -- !query
 SELECT uniform(0, NULL, 0) AS result
 -- !query analysis
 [Analyzer test output redacted due to nondeterminism]
 
 
+-- !query
+SELECT uniform(0, cast(NULL AS int), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(0, cast(NULL AS float), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
 -- !query
 SELECT uniform(0, 1, NULL) AS result
 -- !query analysis
 [Analyzer test output redacted due to nondeterminism]
 
 
+-- !query
+SELECT uniform(NULL, NULL, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(NULL, NULL, NULL) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(0, 1, cast(NULL as int)) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(0, 1, cast(NULL as float)) AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(NULL AS FLOAT)\"",
+    "inputType" : "\"FLOAT\"",
+    "paramIndex" : "third",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"uniform(0, 1, CAST(NULL AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "uniform(0, 1, cast(NULL as float))"
+  } ]
+}
+
+
 -- !query
 SELECT uniform(10, 20, col) AS result FROM VALUES (0), (1), (2) tab(col)
 -- !query analysis
@@ -272,161 +392,251 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT randstr(1, 0) AS result
--- !query analysis
-[Analyzer test output redacted due to nondeterminism]
-
-
--- !query
-SELECT randstr(5, 0) AS result
--- !query analysis
-[Analyzer test output redacted due to nondeterminism]
-
-
--- !query
-SELECT randstr(10, 0) AS result
--- !query analysis
-[Analyzer test output redacted due to nondeterminism]
-
-
--- !query
-SELECT randstr(10S, 0) AS result
+SELECT uniform(10.0F, 20.0F, 0.0F) AS result
 -- !query analysis
-[Analyzer test output redacted due to nondeterminism]
-
-
--- !query
-SELECT randstr(10, 0) AS result FROM VALUES (0), (1), (2) tab(col)
--- !query analysis
-[Analyzer test output redacted due to nondeterminism]
-
-
--- !query
-SELECT randstr(10) IS NOT NULL AS result
--- !query analysis
-[Analyzer test output redacted due to nondeterminism]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"0.0\"",
+    "inputType" : "\"FLOAT\"",
+    "paramIndex" : "third",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"uniform(10.0, 20.0, 0.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "uniform(10.0F, 20.0F, 0.0F)"
+  } ]
+}
 
 
 -- !query
-SELECT randstr(10L, 0) AS result
+SELECT uniform(10.0F, 20.0F, 0.0D) AS result
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
   "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"10\"",
-    "inputType" : "\"BIGINT\"",
-    "paramIndex" : "first",
-    "requiredType" : "INT or SMALLINT",
-    "sqlExpr" : "\"randstr(10, 0)\""
+    "inputSql" : "\"0.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "third",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"uniform(10.0, 20.0, 0.0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 22,
-    "fragment" : "randstr(10L, 0)"
+    "stopIndex" : 34,
+    "fragment" : "uniform(10.0F, 20.0F, 0.0D)"
   } ]
 }
 
 
 -- !query
-SELECT randstr(10.0F, 0) AS result
+SELECT uniform(cast(10 as decimal(10, 3)), cast(20 as decimal(10, 3)), cast(0 as decimal(10, 3)))
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
   "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"10.0\"",
-    "inputType" : "\"FLOAT\"",
-    "paramIndex" : "first",
-    "requiredType" : "INT or SMALLINT",
-    "sqlExpr" : "\"randstr(10.0, 0)\""
+    "inputSql" : "\"CAST(0 AS DECIMAL(10,3))\"",
+    "inputType" : "\"DECIMAL(10,3)\"",
+    "paramIndex" : "third",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"uniform(CAST(10 AS DECIMAL(10,3)), CAST(20 AS DECIMAL(10,3)), CAST(0 AS DECIMAL(10,3)))\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 24,
-    "fragment" : "randstr(10.0F, 0)"
+    "stopIndex" : 97,
+    "fragment" : "uniform(cast(10 as decimal(10, 3)), cast(20 as decimal(10, 3)), cast(0 as decimal(10, 3)))"
   } ]
 }
 
 
 -- !query
-SELECT randstr(10.0D, 0) AS result
+SELECT uniform('abc', 10, 0) AS result
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
   "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"10.0\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputSql" : "\"abc\"",
+    "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "INT or SMALLINT",
-    "sqlExpr" : "\"randstr(10.0, 0)\""
+    "requiredType" : "\"NUMERIC\"",
+    "sqlExpr" : "\"uniform(abc, 10, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 24,
-    "fragment" : "randstr(10.0D, 0)"
+    "stopIndex" : 28,
+    "fragment" : "uniform('abc', 10, 0)"
   } ]
 }
 
 
 -- !query
-SELECT randstr(NULL, 0) AS result
+SELECT uniform(0, 'def', 0) AS result
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
   "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"NULL\"",
-    "inputType" : "\"VOID\"",
-    "paramIndex" : "first",
-    "requiredType" : "INT or SMALLINT",
-    "sqlExpr" : "\"randstr(NULL, 0)\""
+    "inputSql" : "\"def\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "\"NUMERIC\"",
+    "sqlExpr" : "\"uniform(0, def, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 23,
-    "fragment" : "randstr(NULL, 0)"
+    "stopIndex" : 27,
+    "fragment" : "uniform(0, 'def', 0)"
   } ]
 }
 
 
 -- !query
-SELECT randstr(0, NULL) AS result
+SELECT uniform(0, 10, 'ghi') AS result
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
   "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"NULL\"",
-    "inputType" : "\"VOID\"",
-    "paramIndex" : "second",
-    "requiredType" : "INT or SMALLINT",
-    "sqlExpr" : "\"randstr(0, NULL)\""
+    "inputSql" : "\"ghi\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "third",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"uniform(0, 10, ghi)\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 23,
-    "fragment" : "randstr(0, NULL)"
+    "stopIndex" : 28,
+    "fragment" : "uniform(0, 10, 'ghi')"
   } ]
 }
 
 
+-- !query
+SELECT randstr(1, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(5, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10S, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(CAST(10 AS TINYINT), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(CAST(10 AS BIGINT), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(1.0F, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(1.0D, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(cast(1 AS DECIMAL(10, 2)), 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10, 0) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10) IS NOT NULL AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(1, -1) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10L, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10.0F, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10.0D, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(NULL, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(0, NULL) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
 -- !query
 SELECT randstr(col, 0) AS result FROM VALUES (0), (1), (2) tab(col)
 -- !query analysis
@@ -437,7 +647,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "messageParameters" : {
     "inputExpr" : "\"col\"",
     "inputName" : "`length`",
-    "inputType" : "INT or SMALLINT",
+    "inputType" : "integer",
     "sqlExpr" : "\"randstr(col, 0)\""
   },
   "queryContext" : [ {
@@ -460,7 +670,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "messageParameters" : {
     "inputExpr" : "\"col\"",
     "inputName" : "`seed`",
-    "inputType" : "INT or SMALLINT",
+    "inputType" : "integer",
     "sqlExpr" : "\"randstr(10, col)\""
   },
   "queryContext" : [ {
@@ -494,3 +704,57 @@ org.apache.spark.sql.AnalysisException
     "fragment" : "randstr(10, 0, 1)"
   } ]
 }
+
+
+-- !query
+SELECT randstr(-1, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10, "a") AS result FROM VALUES (0) tab(a)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"a\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"randstr(10, a)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "randstr(10, \"a\")"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(10, 1.5) AS result FROM VALUES (0) tab(a)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.5\"",
+    "inputType" : "\"DECIMAL(2,1)\"",
+    "paramIndex" : "second",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"randstr(10, 1.5)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "randstr(10, 1.5)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/show-tables.sql.out
index bb1d695c4e546..fb985d6b6b0ca 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/show-tables.sql.out
@@ -166,10 +166,11 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(a='Us', d=1)
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "_LEGACY_ERROR_TEMP_1231",
+  "condition" : "PARTITIONS_NOT_FOUND",
+  "sqlState" : "428FT",
   "messageParameters" : {
-    "key" : "a",
-    "tblName" : "`spark_catalog`.`showdb`.`show_t1`"
+    "partitionList" : "`a`",
+    "tableName" : "`spark_catalog`.`showdb`.`show_t1`"
   }
 }
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out
index 441034ea65e9f..a70bdc9f05214 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out
@@ -231,3 +231,25 @@ DROP DATABASE sql_on_files
 -- !query analysis
 DropNamespace false, false
 +- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_on_files]
+
+
+-- !query
+SELECT * FROM json.`https://raw.githubusercontent.com/apache/spark/refs/heads/master/examples/src/main/resources/employees.json`
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "FAILED_READ_FILE.UNSUPPORTED_FILE_SYSTEM",
+  "sqlState" : "KD001",
+  "messageParameters" : {
+    "fileSystemClass" : "org.apache.hadoop.fs.http.HttpsFileSystem",
+    "method" : "listStatus",
+    "path" : "https://raw.githubusercontent.com/apache/spark/refs/heads/master/examples/src/main/resources/employees.json"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 128,
+    "fragment" : "json.`https://raw.githubusercontent.com/apache/spark/refs/heads/master/examples/src/main/resources/employees.json`"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
index add7e79a98993..a18a889821500 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
@@ -2050,7 +2050,7 @@ WithCTE
 :        +- OneRowRelation
 +- Project [c1#x AS 1#x]
    +- SubqueryAlias v1
-      +- CTERelationRef xxxx, true, [c1#x], false
+      +- CTERelationRef xxxx, true, [c1#x], false, false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out
new file mode 100644
index 0000000000000..3316642de1f8b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-udf.sql.out
@@ -0,0 +1,575 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE FUNCTION foo1a0() RETURNS INT RETURN 1
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo1a0`"
+  }
+}
+
+
+-- !query
+SELECT foo1a0()
+-- !query analysis
+Project [spark_catalog.default.foo1a0() AS spark_catalog.default.foo1a0()#x]
++- Project
+   +- OneRowRelation
+
+
+-- !query
+SELECT foo1a0(1)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "0",
+    "functionName" : "`spark_catalog`.`default`.`foo1a0`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "foo1a0(1)"
+  } ]
+}
+
+
+-- !query
+CREATE FUNCTION foo1a1(a INT) RETURNS INT RETURN 1
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo1a1`"
+  }
+}
+
+
+-- !query
+SELECT foo1a1(1)
+-- !query analysis
+Project [spark_catalog.default.foo1a1(a#x) AS spark_catalog.default.foo1a1(1)#x]
++- Project [cast(1 as int) AS a#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT foo1a1(1, 2)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "2",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "1",
+    "functionName" : "`spark_catalog`.`default`.`foo1a1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "foo1a1(1, 2)"
+  } ]
+}
+
+
+-- !query
+CREATE FUNCTION foo1a2(a INT, b INT, c INT, d INT) RETURNS INT RETURN 1
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo1a2`"
+  }
+}
+
+
+-- !query
+SELECT foo1a2(1, 2, 3, 4)
+-- !query analysis
+Project [spark_catalog.default.foo1a2(a#x, b#x, c#x, d#x) AS spark_catalog.default.foo1a2(1, 2, 3, 4)#x]
++- Project [cast(1 as int) AS a#x, cast(2 as int) AS b#x, cast(3 as int) AS c#x, cast(4 as int) AS d#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE FUNCTION foo2_1a(a INT) RETURNS INT RETURN a
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo2_1a`"
+  }
+}
+
+
+-- !query
+SELECT foo2_1a(5)
+-- !query analysis
+Project [spark_catalog.default.foo2_1a(a#x) AS spark_catalog.default.foo2_1a(5)#x]
++- Project [cast(5 as int) AS a#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE FUNCTION foo2_1b(a INT, b INT) RETURNS INT RETURN a + b
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo2_1b`"
+  }
+}
+
+
+-- !query
+SELECT foo2_1b(5, 6)
+-- !query analysis
+Project [spark_catalog.default.foo2_1b(a#x, b#x) AS spark_catalog.default.foo2_1b(5, 6)#x]
++- Project [cast(5 as int) AS a#x, cast(6 as int) AS b#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE FUNCTION foo2_1c(a INT, b INT) RETURNS INT RETURN 10 * (a + b) + 100 * (a -b)
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo2_1c`"
+  }
+}
+
+
+-- !query
+SELECT foo2_1c(5, 6)
+-- !query analysis
+Project [spark_catalog.default.foo2_1c(a#x, b#x) AS spark_catalog.default.foo2_1c(5, 6)#x]
++- Project [cast(5 as int) AS a#x, cast(6 as int) AS b#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE FUNCTION foo2_1d(a INT, b INT) RETURNS INT RETURN ABS(a) - LENGTH(CAST(b AS VARCHAR(10)))
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo2_1d`"
+  }
+}
+
+
+-- !query
+SELECT foo2_1d(-5, 6)
+-- !query analysis
+Project [spark_catalog.default.foo2_1d(a#x, b#x) AS spark_catalog.default.foo2_1d(-5, 6)#x]
++- Project [cast(-5 as int) AS a#x, cast(6 as int) AS b#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE FUNCTION foo2_2a(a INT) RETURNS INT RETURN SELECT a
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo2_2a`"
+  }
+}
+
+
+-- !query
+SELECT foo2_2a(5)
+-- !query analysis
+Project [spark_catalog.default.foo2_2a(a#x) AS spark_catalog.default.foo2_2a(5)#x]
++- Project [cast(5 as int) AS a#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE FUNCTION foo2_2b(a INT) RETURNS INT RETURN 1 + (SELECT a)
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo2_2b`"
+  }
+}
+
+
+-- !query
+SELECT foo2_2b(5)
+-- !query analysis
+Project [spark_catalog.default.foo2_2b(a#x) AS spark_catalog.default.foo2_2b(5)#x]
+:  +- Project [outer(a#x)]
+:     +- OneRowRelation
++- Project [cast(5 as int) AS a#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE FUNCTION foo2_2c(a INT) RETURNS INT RETURN 1 + (SELECT (SELECT a))
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`a`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 21,
+    "stopIndex" : 21,
+    "fragment" : "a"
+  } ]
+}
+
+
+-- !query
+CREATE FUNCTION foo2_2d(a INT) RETURNS INT RETURN 1 + (SELECT (SELECT (SELECT (SELECT a))))
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`a`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 37,
+    "stopIndex" : 37,
+    "fragment" : "a"
+  } ]
+}
+
+
+-- !query
+CREATE FUNCTION foo2_2e(a INT) RETURNS INT RETURN
+SELECT a FROM (VALUES 1) AS V(c1) WHERE c1 = 2
+UNION ALL
+SELECT a + 1 FROM (VALUES 1) AS V(c1)
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo2_2e`"
+  }
+}
+
+
+-- !query
+CREATE FUNCTION foo2_2f(a INT) RETURNS INT RETURN
+SELECT a FROM (VALUES 1) AS V(c1)
+EXCEPT
+SELECT a + 1 FROM (VALUES 1) AS V(a)
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo2_2f`"
+  }
+}
+
+
+-- !query
+CREATE FUNCTION foo2_2g(a INT) RETURNS INT RETURN
+SELECT a FROM (VALUES 1) AS V(c1)
+INTERSECT
+SELECT a FROM (VALUES 1) AS V(a)
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo2_2g`"
+  }
+}
+
+
+-- !query
+DROP TABLE IF EXISTS t1
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t1
+
+
+-- !query
+DROP TABLE IF EXISTS t2
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t2
+
+
+-- !query
+DROP TABLE IF EXISTS ts
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.ts
+
+
+-- !query
+DROP TABLE IF EXISTS tm
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.tm
+
+
+-- !query
+DROP TABLE IF EXISTS ta
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.ta
+
+
+-- !query
+DROP TABLE IF EXISTS V1
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.V1
+
+
+-- !query
+DROP TABLE IF EXISTS V2
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.V2
+
+
+-- !query
+DROP VIEW IF EXISTS t1
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`t1`, true, true, false
+
+
+-- !query
+DROP VIEW IF EXISTS t2
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`t2`, true, true, false
+
+
+-- !query
+DROP VIEW IF EXISTS ts
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`ts`, true, true, false
+
+
+-- !query
+DROP VIEW IF EXISTS tm
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`tm`, true, true, false
+
+
+-- !query
+DROP VIEW IF EXISTS ta
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`ta`, true, true, false
+
+
+-- !query
+DROP VIEW IF EXISTS V1
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`V1`, true, true, false
+
+
+-- !query
+DROP VIEW IF EXISTS V2
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`V2`, true, true, false
+
+
+-- !query
+CREATE FUNCTION foo2_3(a INT, b INT) RETURNS INT RETURN a + b
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo2_3`"
+  }
+}
+
+
+-- !query
+CREATE VIEW V1(c1, c2) AS VALUES (1, 2), (3, 4), (5, 6)
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`V1`, [(c1,None), (c2,None)], VALUES (1, 2), (3, 4), (5, 6), false, false, PersistedView, COMPENSATION, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+CREATE VIEW V2(c1, c2) AS VALUES (-1, -2), (-3, -4), (-5, -6)
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`V2`, [(c1,None), (c2,None)], VALUES (-1, -2), (-3, -4), (-5, -6), false, false, PersistedView, COMPENSATION, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT foo2_3(c1, c2), foo2_3(c2, 1), foo2_3(c1, c2) - foo2_3(c2, c1 - 1) FROM V1 ORDER BY 1, 2, 3
+-- !query analysis
+Sort [spark_catalog.default.foo2_3(c1, c2)#x ASC NULLS FIRST, spark_catalog.default.foo2_3(c2, 1)#x ASC NULLS FIRST, (spark_catalog.default.foo2_3(c1, c2) - spark_catalog.default.foo2_3(c2, (c1 - 1)))#x ASC NULLS FIRST], true
++- Project [spark_catalog.default.foo2_3(a#x, b#x) AS spark_catalog.default.foo2_3(c1, c2)#x, spark_catalog.default.foo2_3(a#x, b#x) AS spark_catalog.default.foo2_3(c2, 1)#x, (spark_catalog.default.foo2_3(a#x, b#x) - spark_catalog.default.foo2_3(a#x, b#x)) AS (spark_catalog.default.foo2_3(c1, c2) - spark_catalog.default.foo2_3(c2, (c1 - 1)))#x]
+   +- Project [c1#x, c2#x, cast(c1#x as int) AS a#x, cast(c2#x as int) AS b#x, cast(c2#x as int) AS a#x, cast(1 as int) AS b#x, cast(c1#x as int) AS a#x, cast(c2#x as int) AS b#x, cast(c2#x as int) AS a#x, cast((c1#x - 1) as int) AS b#x]
+      +- SubqueryAlias spark_catalog.default.v1
+         +- View (`spark_catalog`.`default`.`v1`, [c1#x, c2#x])
+            +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+               +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT * FROM V1 WHERE foo2_3(c1, 0) = c1 AND foo2_3(c1, c2) < 8
+-- !query analysis
+Project [c1#x, c2#x]
++- Project [c1#x, c2#x]
+   +- Filter ((spark_catalog.default.foo2_3(a#x, b#x) = c1#x) AND (spark_catalog.default.foo2_3(a#x, b#x) < 8))
+      +- Project [c1#x, c2#x, cast(c1#x as int) AS a#x, cast(0 as int) AS b#x, cast(c1#x as int) AS a#x, cast(c2#x as int) AS b#x]
+         +- SubqueryAlias spark_catalog.default.v1
+            +- View (`spark_catalog`.`default`.`v1`, [c1#x, c2#x])
+               +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT foo2_3(SUM(c1), SUM(c2)), SUM(c1) + SUM(c2), SUM(foo2_3(c1, c2) + foo2_3(c2, c1) - foo2_3(c2, c1))
+FROM V1
+-- !query analysis
+Project [spark_catalog.default.foo2_3(a#x, b#x) AS spark_catalog.default.foo2_3(sum(c1), sum(c2))#x, (sum(c1) + sum(c2))#xL, sum(((spark_catalog.default.foo2_3(c1, c2) + spark_catalog.default.foo2_3(c2, c1)) - spark_catalog.default.foo2_3(c2, c1)))#xL]
++- Project [sum(c1)#xL, sum(c2)#xL, (sum(c1) + sum(c2))#xL, sum(((spark_catalog.default.foo2_3(c1, c2) + spark_catalog.default.foo2_3(c2, c1)) - spark_catalog.default.foo2_3(c2, c1)))#xL, cast(sum(c1)#xL as int) AS a#x, cast(sum(c2)#xL as int) AS b#x]
+   +- Aggregate [sum(c1#x) AS sum(c1)#xL, sum(c2#x) AS sum(c2)#xL, (sum(c1#x) + sum(c2#x)) AS (sum(c1) + sum(c2))#xL, sum(((spark_catalog.default.foo2_3(a#x, b#x) + spark_catalog.default.foo2_3(a#x, b#x)) - spark_catalog.default.foo2_3(a#x, b#x))) AS sum(((spark_catalog.default.foo2_3(c1, c2) + spark_catalog.default.foo2_3(c2, c1)) - spark_catalog.default.foo2_3(c2, c1)))#xL]
+      +- Project [c1#x, c2#x, cast(c1#x as int) AS a#x, cast(c2#x as int) AS b#x, cast(c2#x as int) AS a#x, cast(c1#x as int) AS b#x, cast(c2#x as int) AS a#x, cast(c1#x as int) AS b#x]
+         +- SubqueryAlias spark_catalog.default.v1
+            +- View (`spark_catalog`.`default`.`v1`, [c1#x, c2#x])
+               +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+                  +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+CREATE FUNCTION foo2_4a(a ARRAY<STRING>) RETURNS STRING RETURN
+SELECT array_sort(a, (i, j) -> rank[i] - rank[j])[0] FROM (SELECT MAP('a', 1, 'b', 2) rank)
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo2_4a`"
+  }
+}
+
+
+-- !query
+SELECT foo2_4a(ARRAY('a', 'b'))
+-- !query analysis
+Project [spark_catalog.default.foo2_4a(a#x) AS spark_catalog.default.foo2_4a(array(a, b))#x]
+:  +- Project [array_sort(outer(a#x), lambdafunction((rank#x[lambda i#x] - rank#x[lambda j#x]), lambda i#x, lambda j#x, false), false)[0] AS array_sort(outer(foo2_4a.a), lambdafunction((rank[namedlambdavariable()] - rank[namedlambdavariable()]), namedlambdavariable(), namedlambdavariable()))[0]#x]
+:     +- SubqueryAlias __auto_generated_subquery_name
+:        +- Project [map(a, 1, b, 2) AS rank#x]
+:           +- OneRowRelation
++- Project [cast(array(a, b) as array<string>) AS a#x]
+   +- OneRowRelation
+
+
+-- !query
+CREATE FUNCTION foo2_4b(m MAP<STRING, STRING>, k STRING) RETURNS STRING RETURN
+SELECT v || ' ' || v FROM (SELECT upper(m[k]) AS v)
+-- !query analysis
+org.apache.spark.sql.catalyst.analysis.FunctionAlreadyExistsException
+{
+  "condition" : "ROUTINE_ALREADY_EXISTS",
+  "sqlState" : "42723",
+  "messageParameters" : {
+    "existingRoutineType" : "routine",
+    "newRoutineType" : "routine",
+    "routineName" : "`default`.`foo2_4b`"
+  }
+}
+
+
+-- !query
+SELECT foo2_4b(map('a', 'hello', 'b', 'world'), 'a')
+-- !query analysis
+Project [spark_catalog.default.foo2_4b(m#x, k#x) AS spark_catalog.default.foo2_4b(map(a, hello, b, world), a)#x]
+:  +- Project [concat(concat(v#x,  ), v#x) AS concat(concat(v,  ), v)#x]
+:     +- SubqueryAlias __auto_generated_subquery_name
+:        +- Project [upper(outer(m#x)[outer(k#x)]) AS v#x]
+:           +- OneRowRelation
++- Project [cast(map(a, hello, b, world) as map<string,string>) AS m#x, cast(a as string) AS k#x]
+   +- OneRowRelation
+
+
+-- !query
+DROP VIEW V2
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`V2`, false, true, false
+
+
+-- !query
+DROP VIEW V1
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`V1`, false, true, false
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-cte.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-cte.sql.out
index 7c3678c66c117..abaf6a2432251 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-cte.sql.out
@@ -133,7 +133,7 @@ WithCTE
       :     +- Filter (outer(emp_name#x) = emp_name#x)
       :        +- SubqueryAlias b
       :           +- SubqueryAlias bonus_cte
-      :              +- CTERelationRef xxxx, true, [emp_name#x, bonus_amt#x], false
+      :              +- CTERelationRef xxxx, true, [emp_name#x, bonus_amt#x], false, false
       +- SubqueryAlias a
          +- SubqueryAlias bonus
             +- View (`BONUS`, [emp_name#x, bonus_amt#x])
@@ -189,10 +189,10 @@ WithCTE
       :        +- Join Inner, (dept_id#x = dept_id#x)
       :           :- SubqueryAlias a
       :           :  +- SubqueryAlias emp_cte
-      :           :     +- CTERelationRef xxxx, true, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x], false
+      :           :     +- CTERelationRef xxxx, true, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x], false, false
       :           +- SubqueryAlias b
       :              +- SubqueryAlias dept_cte
-      :                 +- CTERelationRef xxxx, true, [dept_id#x, dept_name#x, state#x], false
+      :                 +- CTERelationRef xxxx, true, [dept_id#x, dept_name#x, state#x], false, false
       +- SubqueryAlias bonus
          +- View (`BONUS`, [emp_name#x, bonus_amt#x])
             +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
@@ -253,10 +253,10 @@ WithCTE
          :        +- Join LeftOuter, (dept_id#x = dept_id#x)
          :           :- SubqueryAlias a
          :           :  +- SubqueryAlias emp_cte
-         :           :     +- CTERelationRef xxxx, true, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x], false
+         :           :     +- CTERelationRef xxxx, true, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x], false, false
          :           +- SubqueryAlias b
          :              +- SubqueryAlias dept_cte
-         :                 +- CTERelationRef xxxx, true, [dept_id#x, dept_name#x, state#x], false
+         :                 +- CTERelationRef xxxx, true, [dept_id#x, dept_name#x, state#x], false, false
          +- Join Inner
             :- Join Inner
             :  :- SubqueryAlias b
@@ -268,7 +268,7 @@ WithCTE
             :  :                 +- LocalRelation [emp_name#x, bonus_amt#x]
             :  +- SubqueryAlias e
             :     +- SubqueryAlias emp_cte
-            :        +- CTERelationRef xxxx, true, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x], false
+            :        +- CTERelationRef xxxx, true, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x], false, false
             +- SubqueryAlias d
                +- SubqueryAlias dept
                   +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])
@@ -322,7 +322,7 @@ WithCTE
       :     +- Filter (count(1)#xL > cast(1 as bigint))
       :        +- Aggregate [dept_id#x], [dept_id#x, max(salary#x) AS max(salary)#x, count(1) AS count(1)#xL]
       :           +- SubqueryAlias empdept
-      :              +- CTERelationRef xxxx, true, [id#x, salary#x, emp_name#x, dept_id#x], false
+      :              +- CTERelationRef xxxx, true, [id#x, salary#x, emp_name#x, dept_id#x], false, false
       +- SubqueryAlias bonus
          +- View (`BONUS`, [emp_name#x, bonus_amt#x])
             +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
@@ -375,7 +375,7 @@ WithCTE
       :     +- Filter (count(1)#xL < cast(1 as bigint))
       :        +- Aggregate [dept_id#x], [dept_id#x, max(salary#x) AS max(salary)#x, count(1) AS count(1)#xL]
       :           +- SubqueryAlias empdept
-      :              +- CTERelationRef xxxx, true, [id#x, salary#x, emp_name#x, dept_id#x], false
+      :              +- CTERelationRef xxxx, true, [id#x, salary#x, emp_name#x, dept_id#x], false, false
       +- SubqueryAlias bonus
          +- View (`BONUS`, [emp_name#x, bonus_amt#x])
             +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-multiple-columns.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-multiple-columns.sql.out
index 39748a324e527..230ffc005e90d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-multiple-columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-multiple-columns.sql.out
@@ -330,7 +330,7 @@ WithCTE
       +- Project [t1a#x, t1b#x, t1a#x, t1b#x]
          +- Join Inner, (t1b#x = t1b#x)
             :- SubqueryAlias cte1
-            :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+            :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
             +- SubqueryAlias cte2
                +- SubqueryAlias cte1
-                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-with-cte.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-with-cte.sql.out
index 0074991b4ea6a..199b876fb9a86 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-with-cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-with-cte.sql.out
@@ -138,7 +138,7 @@ WithCTE
       :  +- Project [t1b#x]
       :     +- Filter (cast(t1b#x as int) > 0)
       :        +- SubqueryAlias cte1
-      :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+      :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
       +- SubqueryAlias t1
          +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
             +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as double) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
@@ -197,21 +197,21 @@ WithCTE
          :        :  :     :- Project [t1b#x]
          :        :  :     :  +- Filter (cast(t1b#x as int) > 0)
          :        :  :     :     +- SubqueryAlias cte1
-         :        :  :     :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+         :        :  :     :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
          :        :  :     +- Project [t1b#x]
          :        :  :        +- Filter (cast(t1b#x as int) > 5)
          :        :  :           +- SubqueryAlias cte1
-         :        :  :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+         :        :  :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
          :        :  +- Intersect false
          :        :     :- Project [t1b#x]
          :        :     :  +- SubqueryAlias cte1
-         :        :     :     +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+         :        :     :     +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
          :        :     +- Project [t1b#x]
          :        :        +- SubqueryAlias cte1
-         :        :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+         :        :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
          :        +- Project [t1b#x]
          :           +- SubqueryAlias cte1
-         :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+         :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
          +- SubqueryAlias t1
             +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
                +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as double) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
@@ -268,22 +268,22 @@ WithCTE
       :        :  :  :- Join FullOuter, (t1c#x = t1c#x)
       :        :  :  :  :- Join Inner, (t1b#x > t1b#x)
       :        :  :  :  :  :- SubqueryAlias cte1
-      :        :  :  :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false
+      :        :  :  :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false, false
       :        :  :  :  :  +- SubqueryAlias cte2
       :        :  :  :  :     +- SubqueryAlias cte1
-      :        :  :  :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false
+      :        :  :  :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false, false
       :        :  :  :  +- SubqueryAlias cte3
       :        :  :  :     +- SubqueryAlias cte1
-      :        :  :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false
+      :        :  :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false, false
       :        :  :  +- SubqueryAlias cte4
       :        :  :     +- SubqueryAlias cte1
-      :        :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false
+      :        :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false, false
       :        :  +- SubqueryAlias cte5
       :        :     +- SubqueryAlias cte1
-      :        :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false
+      :        :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false, false
       :        +- SubqueryAlias cte6
       :           +- SubqueryAlias cte1
-      :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false
+      :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false, false
       +- SubqueryAlias t1
          +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
             +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as double) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
@@ -354,16 +354,16 @@ WithCTE
             :- Join FullOuter, (t1a#x = t1a#x)
             :  :- Join Inner, ((cast(t1b#x as int) > 5) AND (t1a#x = t1a#x))
             :  :  :- SubqueryAlias cte1
-            :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+            :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
             :  :  +- SubqueryAlias cte2
             :  :     +- SubqueryAlias cte1
-            :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+            :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
             :  +- SubqueryAlias cte3
             :     +- SubqueryAlias cte1
-            :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+            :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
             +- SubqueryAlias cte4
                +- SubqueryAlias cte1
-                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
 
 
 -- !query
@@ -424,10 +424,10 @@ WithCTE
          +- Project [t1a#x, t1b#x]
             +- Join Inner, (t1h#x >= t1h#x)
                :- SubqueryAlias cte1
-               :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1h#x], false
+               :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1h#x], false, false
                +- SubqueryAlias cte2
                   +- SubqueryAlias cte1
-                     +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1h#x], false
+                     +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1h#x], false, false
 
 
 -- !query
@@ -485,16 +485,16 @@ WithCTE
             :- Join RightOuter, (t1b#x = t1b#x)
             :  :- Join Inner, (t1a#x = t1a#x)
             :  :  :- SubqueryAlias cte1
-            :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x], false
+            :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x], false, false
             :  :  +- SubqueryAlias cte2
             :  :     +- SubqueryAlias cte1
-            :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x], false
+            :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x], false, false
             :  +- SubqueryAlias cte3
             :     +- SubqueryAlias cte1
-            :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x], false
+            :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x], false, false
             +- SubqueryAlias cte4
                +- SubqueryAlias cte1
-                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x], false
+                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x], false, false
 
 
 -- !query
@@ -538,10 +538,10 @@ WithCTE
       +- Project [t1a#x, t1b#x]
          +- Join RightOuter, (t1a#x = t1a#x)
             :- SubqueryAlias cte1
-            :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+            :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
             +- SubqueryAlias cte2
                +- SubqueryAlias cte1
-                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
 
 
 -- !query
@@ -599,15 +599,15 @@ WithCTE
          :        :           +- SubqueryAlias t1
          :        :              +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
          :        +- SubqueryAlias cte1
-         :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+         :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
          +- SubqueryAlias s
             +- Project [t1b#x]
                +- Join LeftOuter, (t1b#x = t1b#x)
                   :- SubqueryAlias cte1
-                  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+                  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
                   +- SubqueryAlias cte2
                      +- SubqueryAlias cte1
-                        +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+                        +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
 
 
 -- !query
@@ -642,7 +642,7 @@ WithCTE
       :  +- Project [t1b#x]
       :     +- Filter (cast(t1b#x as int) < 0)
       :        +- SubqueryAlias cte1
-      :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
+      :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false, false
       +- SubqueryAlias t1
          +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
             +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as double) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
@@ -722,16 +722,16 @@ WithCTE
          :        :- Join RightOuter, (t1b#x = t1b#x)
          :        :  :- Join Inner, (t1a#x = t1a#x)
          :        :  :  :- SubqueryAlias cte1
-         :        :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x], false
+         :        :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x], false, false
          :        :  :  +- SubqueryAlias cte2
          :        :  :     +- SubqueryAlias cte1
-         :        :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x], false
+         :        :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x], false, false
          :        :  +- SubqueryAlias cte3
          :        :     +- SubqueryAlias cte1
-         :        :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x], false
+         :        :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x], false, false
          :        +- SubqueryAlias cte4
          :           +- SubqueryAlias cte1
-         :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x], false
+         :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x], false, false
          +- SubqueryAlias t1
             +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
                +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as double) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out
index 72e230f9bb881..2a3a87e5cab81 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out
@@ -623,7 +623,7 @@ Project [c1#x, scalar-subquery#x [c1#x] AS scalarsubquery(c1)#x]
 :     :        +- OneRowRelation
 :     +- Project [(a#x + outer(c1#x)) AS (a + outer(t1.c1))#x]
 :        +- SubqueryAlias t
-:           +- CTERelationRef xxxx, true, [a#x], false
+:           +- CTERelationRef xxxx, true, [a#x], false, false
 +- SubqueryAlias t1
    +- View (`t1`, [c1#x, c2#x])
       +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
@@ -647,7 +647,7 @@ Project [c1#x, scalar-subquery#x [c1#x] AS scalarsubquery(c1)#xL]
 :     :                       +- LocalRelation [c1#x, c2#x]
 :     +- Aggregate [sum(c2#x) AS sum(c2)#xL]
 :        +- SubqueryAlias t
-:           +- CTERelationRef xxxx, true, [c1#x, c2#x], false
+:           +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false
 +- SubqueryAlias t1
    +- View (`t1`, [c1#x, c2#x])
       +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
@@ -677,10 +677,10 @@ Project [c1#x, scalar-subquery#x [c1#x] AS scalarsubquery(c1)#xL]
 :     :     +- Project [c1#x, c2#x]
 :     :        +- Filter (outer(c1#x) = c1#x)
 :     :           +- SubqueryAlias t3
-:     :              +- CTERelationRef xxxx, true, [c1#x, c2#x], false
+:     :              +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false
 :     +- Aggregate [sum(c2#x) AS sum(c2)#xL]
 :        +- SubqueryAlias t4
-:           +- CTERelationRef xxxx, true, [c1#x, c2#x], false
+:           +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false
 +- SubqueryAlias t1
    +- View (`t1`, [c1#x, c2#x])
       +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
@@ -713,10 +713,10 @@ Project [c1#x, scalar-subquery#x [c1#x] AS scalarsubquery(c1)#xL]
 :                    +- Union false, false
 :                       :- Project [c1#x, c2#x]
 :                       :  +- SubqueryAlias t
-:                       :     +- CTERelationRef xxxx, true, [c1#x, c2#x], false
+:                       :     +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false
 :                       +- Project [c2#x, c1#x]
 :                          +- SubqueryAlias t
-:                             +- CTERelationRef xxxx, true, [c1#x, c2#x], false
+:                             +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false
 +- SubqueryAlias t1
    +- View (`t1`, [c1#x, c2#x])
       +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
@@ -756,9 +756,9 @@ WithCTE
       :           :  +- Aggregate [sum(c2#x) AS sum(c2)#xL]
       :           :     +- Filter (c1#x = outer(c1#x))
       :           :        +- SubqueryAlias t
-      :           :           +- CTERelationRef xxxx, true, [c1#x, c2#x], false
+      :           :           +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false
       :           +- SubqueryAlias v
-      :              +- CTERelationRef xxxx, true, [c1#x, c2#x], false
+      :              +- CTERelationRef xxxx, true, [c1#x, c2#x], false, false
       +- SubqueryAlias t1
          +- View (`t1`, [c1#x, c2#x])
             +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
@@ -779,7 +779,7 @@ WithCTE
    :  +- Project [a#x]
    :     +- Filter (a#x = outer(c1#x))
    :        +- SubqueryAlias t
-   :           +- CTERelationRef xxxx, true, [a#x], false
+   :           +- CTERelationRef xxxx, true, [a#x], false, false
    +- SubqueryAlias t1
       +- View (`t1`, [c1#x, c2#x])
          +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
@@ -1027,7 +1027,7 @@ WithCTE
    :  +- Aggregate [sum(1) AS sum(1)#xL]
    :     +- Filter ((a#x = cast(outer(col#x) as int)) OR (upper(cast(outer(col#x) as string)) = Y))
    :        +- SubqueryAlias T
-   :           +- CTERelationRef xxxx, true, [a#x], false
+   :           +- CTERelationRef xxxx, true, [a#x], false, false
    +- SubqueryAlias foo
       +- Project [null AS col#x]
          +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/table-aliases.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/table-aliases.sql.out
index afbdc4293e6a3..bca87e0b5da5f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/table-aliases.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/table-aliases.sql.out
@@ -217,3 +217,45 @@ Project [a#x, b#x, c#x, d#x]
                      +- Project [id#x, v2#x]
                         +- SubqueryAlias src2
                            +- LocalRelation [id#x, v2#x]
+
+
+-- !query
+SELECT src1.* FROM src1 a ORDER BY id LIMIT 1
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "CANNOT_RESOLVE_STAR_EXPAND",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "columns" : "`id`, `v1`",
+    "targetString" : "`src1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 13,
+    "fragment" : "src1.*"
+  } ]
+}
+
+
+-- !query
+SELECT src1.id FROM (SELECT * FROM src1 ORDER BY id LIMIT 1) a
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`src1`.`id`",
+    "proposal" : "`a`.`id`, `a`.`v1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 14,
+    "fragment" : "src1.id"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out
index 6acd4e3774f78..3e185decc2578 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out
@@ -759,14 +759,14 @@ Project [from_csv(StructField(t,TimestampNTZType,true), (timestampFormat,dd/MMMM
 -- !query
 select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03')
 -- !query analysis
-Project [timestampadd(MONTH, -1, 2022-02-14 01:02:03, Some(America/Los_Angeles)) AS timestampadd(MONTH, -1, TIMESTAMP_NTZ '2022-02-14 01:02:03')#x]
+Project [timestampadd(MONTH, cast(-1 as bigint), 2022-02-14 01:02:03, Some(America/Los_Angeles)) AS timestampadd(MONTH, -1, TIMESTAMP_NTZ '2022-02-14 01:02:03')#x]
 +- OneRowRelation
 
 
 -- !query
 select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
 -- !query analysis
-Project [timestampadd(MINUTE, 58, 2022-02-14 01:02:03, Some(America/Los_Angeles)) AS timestampadd(MINUTE, 58, TIMESTAMP_NTZ '2022-02-14 01:02:03')#x]
+Project [timestampadd(MINUTE, cast(58 as bigint), 2022-02-14 01:02:03, Some(America/Los_Angeles)) AS timestampadd(MINUTE, 58, TIMESTAMP_NTZ '2022-02-14 01:02:03')#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out
index dd2c2d5032798..0134892e0caab 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out
@@ -819,14 +819,14 @@ Project [from_csv(StructField(t,TimestampNTZType,true), (timestampFormat,dd/MMMM
 -- !query
 select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03')
 -- !query analysis
-Project [timestampadd(MONTH, -1, 2022-02-14 01:02:03, Some(America/Los_Angeles)) AS timestampadd(MONTH, -1, TIMESTAMP_NTZ '2022-02-14 01:02:03')#x]
+Project [timestampadd(MONTH, cast(-1 as bigint), 2022-02-14 01:02:03, Some(America/Los_Angeles)) AS timestampadd(MONTH, -1, TIMESTAMP_NTZ '2022-02-14 01:02:03')#x]
 +- OneRowRelation
 
 
 -- !query
 select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03')
 -- !query analysis
-Project [timestampadd(MINUTE, 58, 2022-02-14 01:02:03, Some(America/Los_Angeles)) AS timestampadd(MINUTE, 58, TIMESTAMP_NTZ '2022-02-14 01:02:03')#x]
+Project [timestampadd(MINUTE, cast(58 as bigint), 2022-02-14 01:02:03, Some(America/Los_Angeles)) AS timestampadd(MINUTE, 58, TIMESTAMP_NTZ '2022-02-14 01:02:03')#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
index 17c6797545c3d..2675008424872 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
@@ -888,10 +888,10 @@ WithCTE
    +- Join Inner, (b#x = b#x)
       :- SubqueryAlias t1
       :  +- SubqueryAlias temp
-      :     +- CTERelationRef xxxx, true, [b#x], false
+      :     +- CTERelationRef xxxx, true, [b#x], false, false
       +- SubqueryAlias t2
          +- SubqueryAlias temp
-            +- CTERelationRef xxxx, true, [b#x], false
+            +- CTERelationRef xxxx, true, [b#x], false, false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out
index 6f3bc9ccb66f3..4a35fffe3191b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out
@@ -226,9 +226,9 @@ CreateViewCommand `various_arrays`, SELECT * FROM VALUES (
   struct_array1, struct_array2,
   map_array1, map_array2
 ), false, false, LocalTempView, UNSUPPORTED, true
-   +- Project [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, ... 4 more fields]
+   +- Project [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, struct_array1#x, ... 3 more fields]
       +- SubqueryAlias various_arrays
-         +- LocalRelation [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, ... 4 more fields]
+         +- LocalRelation [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, struct_array1#x, ... 3 more fields]
 
 
 -- !query
@@ -251,11 +251,11 @@ FROM various_arrays
 -- !query analysis
 Project [concat(boolean_array1#x, boolean_array2#x) AS boolean_array#x, concat(tinyint_array1#x, tinyint_array2#x) AS tinyint_array#x, concat(smallint_array1#x, smallint_array2#x) AS smallint_array#x, concat(int_array1#x, int_array2#x) AS int_array#x, concat(bigint_array1#x, bigint_array2#x) AS bigint_array#x, concat(decimal_array1#x, decimal_array2#x) AS decimal_array#x, concat(double_array1#x, double_array2#x) AS double_array#x, concat(float_array1#x, float_array2#x) AS float_array#x, concat(date_array1#x, data_array2#x) AS data_array#x, concat(timestamp_array1#x, timestamp_array2#x) AS timestamp_array#x, concat(string_array1#x, string_array2#x) AS string_array#x, concat(array_array1#x, array_array2#x) AS array_array#x, concat(struct_array1#x, struct_array2#x) AS struct_array#x, concat(map_array1#x, map_array2#x) AS map_array#x]
 +- SubqueryAlias various_arrays
-   +- View (`various_arrays`, [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, ... 4 more fields])
-      +- Project [cast(boolean_array1#x as array<boolean>) AS boolean_array1#x, cast(boolean_array2#x as array<boolean>) AS boolean_array2#x, cast(tinyint_array1#x as array<tinyint>) AS tinyint_array1#x, cast(tinyint_array2#x as array<tinyint>) AS tinyint_array2#x, cast(smallint_array1#x as array<smallint>) AS smallint_array1#x, cast(smallint_array2#x as array<smallint>) AS smallint_array2#x, cast(int_array1#x as array<int>) AS int_array1#x, cast(int_array2#x as array<int>) AS int_array2#x, cast(bigint_array1#x as array<bigint>) AS bigint_array1#x, cast(bigint_array2#x as array<bigint>) AS bigint_array2#x, cast(decimal_array1#x as array<decimal(19,0)>) AS decimal_array1#x, cast(decimal_array2#x as array<decimal(19,0)>) AS decimal_array2#x, cast(double_array1#x as array<double>) AS double_array1#x, cast(double_array2#x as array<double>) AS double_array2#x, cast(float_array1#x as array<float>) AS float_array1#x, cast(float_array2#x as array<float>) AS float_array2#x, cast(date_array1#x as array<date>) AS date_array1#x, cast(data_array2#x as array<date>) AS data_array2#x, cast(timestamp_array1#x as array<timestamp>) AS timestamp_array1#x, cast(timestamp_array2#x as array<timestamp>) AS timestamp_array2#x, cast(string_array1#x as array<string>) AS string_array1#x, cast(string_array2#x as array<string>) AS string_array2#x, cast(array_array1#x as array<array<string>>) AS array_array1#x, cast(array_array2#x as array<array<string>>) AS array_array2#x, ... 4 more fields]
-         +- Project [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, ... 4 more fields]
+   +- View (`various_arrays`, [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, struct_array1#x, ... 3 more fields])
+      +- Project [cast(boolean_array1#x as array<boolean>) AS boolean_array1#x, cast(boolean_array2#x as array<boolean>) AS boolean_array2#x, cast(tinyint_array1#x as array<tinyint>) AS tinyint_array1#x, cast(tinyint_array2#x as array<tinyint>) AS tinyint_array2#x, cast(smallint_array1#x as array<smallint>) AS smallint_array1#x, cast(smallint_array2#x as array<smallint>) AS smallint_array2#x, cast(int_array1#x as array<int>) AS int_array1#x, cast(int_array2#x as array<int>) AS int_array2#x, cast(bigint_array1#x as array<bigint>) AS bigint_array1#x, cast(bigint_array2#x as array<bigint>) AS bigint_array2#x, cast(decimal_array1#x as array<decimal(19,0)>) AS decimal_array1#x, cast(decimal_array2#x as array<decimal(19,0)>) AS decimal_array2#x, cast(double_array1#x as array<double>) AS double_array1#x, cast(double_array2#x as array<double>) AS double_array2#x, cast(float_array1#x as array<float>) AS float_array1#x, cast(float_array2#x as array<float>) AS float_array2#x, cast(date_array1#x as array<date>) AS date_array1#x, cast(data_array2#x as array<date>) AS data_array2#x, cast(timestamp_array1#x as array<timestamp>) AS timestamp_array1#x, cast(timestamp_array2#x as array<timestamp>) AS timestamp_array2#x, cast(string_array1#x as array<string>) AS string_array1#x, cast(string_array2#x as array<string>) AS string_array2#x, cast(array_array1#x as array<array<string>>) AS array_array1#x, cast(array_array2#x as array<array<string>>) AS array_array2#x, cast(struct_array1#x as array<struct<col1:string,col2:int>>) AS struct_array1#x, ... 3 more fields]
+         +- Project [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, struct_array1#x, ... 3 more fields]
             +- SubqueryAlias various_arrays
-               +- LocalRelation [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, ... 4 more fields]
+               +- LocalRelation [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, struct_array1#x, ... 3 more fields]
 
 
 -- !query
@@ -273,8 +273,8 @@ FROM various_arrays
 -- !query analysis
 Project [concat(cast(tinyint_array1#x as array<smallint>), smallint_array2#x) AS ts_array#x, concat(cast(smallint_array1#x as array<int>), int_array2#x) AS si_array#x, concat(cast(int_array1#x as array<bigint>), bigint_array2#x) AS ib_array#x, concat(cast(bigint_array1#x as array<decimal(20,0)>), cast(decimal_array2#x as array<decimal(20,0)>)) AS bd_array#x, concat(cast(decimal_array1#x as array<double>), double_array2#x) AS dd_array#x, concat(double_array1#x, cast(float_array2#x as array<double>)) AS df_array#x, concat(cast(string_array1#x as array<date>), data_array2#x) AS std_array#x, concat(timestamp_array1#x, cast(string_array2#x as array<timestamp>)) AS tst_array#x, concat(cast(string_array1#x as array<bigint>), cast(int_array2#x as array<bigint>)) AS sti_array#x]
 +- SubqueryAlias various_arrays
-   +- View (`various_arrays`, [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, ... 4 more fields])
-      +- Project [cast(boolean_array1#x as array<boolean>) AS boolean_array1#x, cast(boolean_array2#x as array<boolean>) AS boolean_array2#x, cast(tinyint_array1#x as array<tinyint>) AS tinyint_array1#x, cast(tinyint_array2#x as array<tinyint>) AS tinyint_array2#x, cast(smallint_array1#x as array<smallint>) AS smallint_array1#x, cast(smallint_array2#x as array<smallint>) AS smallint_array2#x, cast(int_array1#x as array<int>) AS int_array1#x, cast(int_array2#x as array<int>) AS int_array2#x, cast(bigint_array1#x as array<bigint>) AS bigint_array1#x, cast(bigint_array2#x as array<bigint>) AS bigint_array2#x, cast(decimal_array1#x as array<decimal(19,0)>) AS decimal_array1#x, cast(decimal_array2#x as array<decimal(19,0)>) AS decimal_array2#x, cast(double_array1#x as array<double>) AS double_array1#x, cast(double_array2#x as array<double>) AS double_array2#x, cast(float_array1#x as array<float>) AS float_array1#x, cast(float_array2#x as array<float>) AS float_array2#x, cast(date_array1#x as array<date>) AS date_array1#x, cast(data_array2#x as array<date>) AS data_array2#x, cast(timestamp_array1#x as array<timestamp>) AS timestamp_array1#x, cast(timestamp_array2#x as array<timestamp>) AS timestamp_array2#x, cast(string_array1#x as array<string>) AS string_array1#x, cast(string_array2#x as array<string>) AS string_array2#x, cast(array_array1#x as array<array<string>>) AS array_array1#x, cast(array_array2#x as array<array<string>>) AS array_array2#x, ... 4 more fields]
-         +- Project [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, ... 4 more fields]
+   +- View (`various_arrays`, [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, struct_array1#x, ... 3 more fields])
+      +- Project [cast(boolean_array1#x as array<boolean>) AS boolean_array1#x, cast(boolean_array2#x as array<boolean>) AS boolean_array2#x, cast(tinyint_array1#x as array<tinyint>) AS tinyint_array1#x, cast(tinyint_array2#x as array<tinyint>) AS tinyint_array2#x, cast(smallint_array1#x as array<smallint>) AS smallint_array1#x, cast(smallint_array2#x as array<smallint>) AS smallint_array2#x, cast(int_array1#x as array<int>) AS int_array1#x, cast(int_array2#x as array<int>) AS int_array2#x, cast(bigint_array1#x as array<bigint>) AS bigint_array1#x, cast(bigint_array2#x as array<bigint>) AS bigint_array2#x, cast(decimal_array1#x as array<decimal(19,0)>) AS decimal_array1#x, cast(decimal_array2#x as array<decimal(19,0)>) AS decimal_array2#x, cast(double_array1#x as array<double>) AS double_array1#x, cast(double_array2#x as array<double>) AS double_array2#x, cast(float_array1#x as array<float>) AS float_array1#x, cast(float_array2#x as array<float>) AS float_array2#x, cast(date_array1#x as array<date>) AS date_array1#x, cast(data_array2#x as array<date>) AS data_array2#x, cast(timestamp_array1#x as array<timestamp>) AS timestamp_array1#x, cast(timestamp_array2#x as array<timestamp>) AS timestamp_array2#x, cast(string_array1#x as array<string>) AS string_array1#x, cast(string_array2#x as array<string>) AS string_array2#x, cast(array_array1#x as array<array<string>>) AS array_array1#x, cast(array_array2#x as array<array<string>>) AS array_array2#x, cast(struct_array1#x as array<struct<col1:string,col2:int>>) AS struct_array1#x, ... 3 more fields]
+         +- Project [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, struct_array1#x, ... 3 more fields]
             +- SubqueryAlias various_arrays
-               +- LocalRelation [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, ... 4 more fields]
+               +- LocalRelation [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, struct_array1#x, ... 3 more fields]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapconcat.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapconcat.sql.out
index 62bbdeba34c2e..7ae45bc0f241a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapconcat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapconcat.sql.out
@@ -71,9 +71,9 @@ CreateViewCommand `various_maps`, SELECT * FROM VALUES (
   string_int_map1, string_int_map2,
   int_string_map1, int_string_map2
 ), false, false, LocalTempView, UNSUPPORTED, true
-   +- Project [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, ... 6 more fields]
+   +- Project [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, struct_map1#x, ... 5 more fields]
       +- SubqueryAlias various_maps
-         +- LocalRelation [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, ... 6 more fields]
+         +- LocalRelation [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, struct_map1#x, ... 5 more fields]
 
 
 -- !query
@@ -97,11 +97,11 @@ FROM various_maps
 -- !query analysis
 Project [map_concat(boolean_map1#x, boolean_map2#x) AS boolean_map#x, map_concat(tinyint_map1#x, tinyint_map2#x) AS tinyint_map#x, map_concat(smallint_map1#x, smallint_map2#x) AS smallint_map#x, map_concat(int_map1#x, int_map2#x) AS int_map#x, map_concat(bigint_map1#x, bigint_map2#x) AS bigint_map#x, map_concat(decimal_map1#x, decimal_map2#x) AS decimal_map#x, map_concat(float_map1#x, float_map2#x) AS float_map#x, map_concat(double_map1#x, double_map2#x) AS double_map#x, map_concat(date_map1#x, date_map2#x) AS date_map#x, map_concat(timestamp_map1#x, timestamp_map2#x) AS timestamp_map#x, map_concat(string_map1#x, string_map2#x) AS string_map#x, map_concat(array_map1#x, array_map2#x) AS array_map#x, map_concat(struct_map1#x, struct_map2#x) AS struct_map#x, map_concat(string_int_map1#x, string_int_map2#x) AS string_int_map#x, map_concat(int_string_map1#x, int_string_map2#x) AS int_string_map#x]
 +- SubqueryAlias various_maps
-   +- View (`various_maps`, [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, ... 6 more fields])
-      +- Project [cast(boolean_map1#x as map<boolean,boolean>) AS boolean_map1#x, cast(boolean_map2#x as map<boolean,boolean>) AS boolean_map2#x, cast(tinyint_map1#x as map<tinyint,tinyint>) AS tinyint_map1#x, cast(tinyint_map2#x as map<tinyint,tinyint>) AS tinyint_map2#x, cast(smallint_map1#x as map<smallint,smallint>) AS smallint_map1#x, cast(smallint_map2#x as map<smallint,smallint>) AS smallint_map2#x, cast(int_map1#x as map<int,int>) AS int_map1#x, cast(int_map2#x as map<int,int>) AS int_map2#x, cast(bigint_map1#x as map<bigint,bigint>) AS bigint_map1#x, cast(bigint_map2#x as map<bigint,bigint>) AS bigint_map2#x, cast(decimal_map1#x as map<decimal(19,0),decimal(19,0)>) AS decimal_map1#x, cast(decimal_map2#x as map<decimal(19,0),decimal(19,0)>) AS decimal_map2#x, cast(double_map1#x as map<double,double>) AS double_map1#x, cast(double_map2#x as map<double,double>) AS double_map2#x, cast(float_map1#x as map<float,float>) AS float_map1#x, cast(float_map2#x as map<float,float>) AS float_map2#x, cast(date_map1#x as map<date,date>) AS date_map1#x, cast(date_map2#x as map<date,date>) AS date_map2#x, cast(timestamp_map1#x as map<timestamp,timestamp>) AS timestamp_map1#x, cast(timestamp_map2#x as map<timestamp,timestamp>) AS timestamp_map2#x, cast(string_map1#x as map<string,string>) AS string_map1#x, cast(string_map2#x as map<string,string>) AS string_map2#x, cast(array_map1#x as map<array<string>,array<string>>) AS array_map1#x, cast(array_map2#x as map<array<string>,array<string>>) AS array_map2#x, ... 6 more fields]
-         +- Project [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, ... 6 more fields]
+   +- View (`various_maps`, [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, struct_map1#x, ... 5 more fields])
+      +- Project [cast(boolean_map1#x as map<boolean,boolean>) AS boolean_map1#x, cast(boolean_map2#x as map<boolean,boolean>) AS boolean_map2#x, cast(tinyint_map1#x as map<tinyint,tinyint>) AS tinyint_map1#x, cast(tinyint_map2#x as map<tinyint,tinyint>) AS tinyint_map2#x, cast(smallint_map1#x as map<smallint,smallint>) AS smallint_map1#x, cast(smallint_map2#x as map<smallint,smallint>) AS smallint_map2#x, cast(int_map1#x as map<int,int>) AS int_map1#x, cast(int_map2#x as map<int,int>) AS int_map2#x, cast(bigint_map1#x as map<bigint,bigint>) AS bigint_map1#x, cast(bigint_map2#x as map<bigint,bigint>) AS bigint_map2#x, cast(decimal_map1#x as map<decimal(19,0),decimal(19,0)>) AS decimal_map1#x, cast(decimal_map2#x as map<decimal(19,0),decimal(19,0)>) AS decimal_map2#x, cast(double_map1#x as map<double,double>) AS double_map1#x, cast(double_map2#x as map<double,double>) AS double_map2#x, cast(float_map1#x as map<float,float>) AS float_map1#x, cast(float_map2#x as map<float,float>) AS float_map2#x, cast(date_map1#x as map<date,date>) AS date_map1#x, cast(date_map2#x as map<date,date>) AS date_map2#x, cast(timestamp_map1#x as map<timestamp,timestamp>) AS timestamp_map1#x, cast(timestamp_map2#x as map<timestamp,timestamp>) AS timestamp_map2#x, cast(string_map1#x as map<string,string>) AS string_map1#x, cast(string_map2#x as map<string,string>) AS string_map2#x, cast(array_map1#x as map<array<string>,array<string>>) AS array_map1#x, cast(array_map2#x as map<array<string>,array<string>>) AS array_map2#x, cast(struct_map1#x as map<struct<col1:string,col2:int>,struct<col1:string,col2:int>>) AS struct_map1#x, ... 5 more fields]
+         +- Project [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, struct_map1#x, ... 5 more fields]
             +- SubqueryAlias various_maps
-               +- LocalRelation [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, ... 6 more fields]
+               +- LocalRelation [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, struct_map1#x, ... 5 more fields]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out
index 1d76b9a8be8b9..27f02a3dcdb64 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out
@@ -1330,7 +1330,7 @@ Aggregate [cast(udf(cast(cast(udf(cast(count(1) as string)) as bigint) as string
 +- Filter cast(udf(cast(cast(udf(cast(unique1#x as string)) as int) as string)) as int) IN (list#x [])
    :  +- Project [cast(udf(cast(unique1#x as string)) as int) AS udf(unique1)#x]
    :     +- Filter (cast(udf(cast(cast(udf(cast(unique2#x as string)) as int) as string)) as int) = cast(udf(cast(42 as string)) as int))
-   :        +- Project [unique1#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, fivethous#x, tenthous#x, odd#x, even#x, stringu1#x, stringu2#x, string4#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, ... 7 more fields]
+   :        +- Project [unique1#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, fivethous#x, tenthous#x, odd#x, even#x, stringu1#x, stringu2#x, string4#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, fivethous#x, ... 6 more fields]
    :           +- Join Inner, (unique1#x = unique1#x)
    :              :- SubqueryAlias b
    :              :  +- SubqueryAlias spark_catalog.default.tenk1
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-window.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-window.sql.out
index f4e11d7628601..f9ff41cc81a6d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-window.sql.out
@@ -385,11 +385,11 @@ FROM testData
 WINDOW w AS (PARTITION BY udf(cate) ORDER BY udf(val))
 ORDER BY cate, udf(val)
 -- !query analysis
-Project [udf(val)#x, cate#x, max#x, min#x, min#x, count#xL, sum#xL, avg#x, stddev#x, first_value#x, first_value_ignore_null#x, first_value_contain_null#x, any_value#x, any_value_ignore_null#x, any_value_contain_null#x, last_value#x, last_value_ignore_null#x, last_value_contain_null#x, rank#x, dense_rank#x, cume_dist#x, percent_rank#x, ntile#x, row_number#x, ... 11 more fields]
+Project [udf(val)#x, cate#x, max#x, min#x, min#x, count#xL, sum#xL, avg#x, stddev#x, first_value#x, first_value_ignore_null#x, first_value_contain_null#x, any_value#x, any_value_ignore_null#x, any_value_contain_null#x, last_value#x, last_value_ignore_null#x, last_value_contain_null#x, rank#x, dense_rank#x, cume_dist#x, percent_rank#x, ntile#x, row_number#x, var_pop#x, ... 10 more fields]
 +- Sort [cate#x ASC NULLS FIRST, cast(udf(cast(val#x as string)) as int) ASC NULLS FIRST], true
-   +- Project [udf(val)#x, cate#x, max#x, min#x, min#x, count#xL, sum#xL, avg#x, stddev#x, first_value#x, first_value_ignore_null#x, first_value_contain_null#x, any_value#x, any_value_ignore_null#x, any_value_contain_null#x, last_value#x, last_value_ignore_null#x, last_value_contain_null#x, rank#x, dense_rank#x, cume_dist#x, percent_rank#x, ntile#x, row_number#x, ... 12 more fields]
-      +- Project [udf(val)#x, cate#x, _w0#x, _w1#x, _w2#x, _w3#x, _w4#x, max#x, min#x, min#x, count#xL, sum#xL, avg#x, stddev#x, first_value#x, first_value_ignore_null#x, first_value_contain_null#x, any_value#x, any_value_ignore_null#x, any_value_contain_null#x, last_value#x, last_value_ignore_null#x, last_value_contain_null#x, rank#x, ... 50 more fields]
-         +- Window [max(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS max#x, min(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS min#x, min(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS min#x, count(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS count#xL, sum(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS sum#xL, avg(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS avg#x, stddev(_w2#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS stddev#x, first_value(_w0#x, false) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value#x, first_value(_w0#x, true) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value_ignore_null#x, first_value(_w0#x, false) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value_contain_null#x, any_value(_w0#x, false) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value#x, any_value(_w0#x, true) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value_ignore_null#x, any_value(_w0#x, false) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value_contain_null#x, last_value(_w0#x, false) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value#x, last_value(_w0#x, true) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value_ignore_null#x, last_value(_w0#x, false) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value_contain_null#x, rank(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank#x, dense_rank(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS dense_rank#x, cume_dist() windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS cume_dist#x, percent_rank(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS percent_rank#x, ntile(2) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ntile#x, row_number() windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS row_number#x, var_pop(_w2#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS var_pop#x, var_samp(_w2#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS var_samp#x, ... 9 more fields], [_w1#x], [_w0#x ASC NULLS FIRST]
+   +- Project [udf(val)#x, cate#x, max#x, min#x, min#x, count#xL, sum#xL, avg#x, stddev#x, first_value#x, first_value_ignore_null#x, first_value_contain_null#x, any_value#x, any_value_ignore_null#x, any_value_contain_null#x, last_value#x, last_value_ignore_null#x, last_value_contain_null#x, rank#x, dense_rank#x, cume_dist#x, percent_rank#x, ntile#x, row_number#x, var_pop#x, ... 11 more fields]
+      +- Project [udf(val)#x, cate#x, _w0#x, _w1#x, _w2#x, _w3#x, _w4#x, max#x, min#x, min#x, count#xL, sum#xL, avg#x, stddev#x, first_value#x, first_value_ignore_null#x, first_value_contain_null#x, any_value#x, any_value_ignore_null#x, any_value_contain_null#x, last_value#x, last_value_ignore_null#x, last_value_contain_null#x, rank#x, dense_rank#x, ... 49 more fields]
+         +- Window [max(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS max#x, min(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS min#x, min(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS min#x, count(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS count#xL, sum(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS sum#xL, avg(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS avg#x, stddev(_w2#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS stddev#x, first_value(_w0#x, false) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value#x, first_value(_w0#x, true) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value_ignore_null#x, first_value(_w0#x, false) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value_contain_null#x, any_value(_w0#x, false) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value#x, any_value(_w0#x, true) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value_ignore_null#x, any_value(_w0#x, false) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value_contain_null#x, last_value(_w0#x, false) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value#x, last_value(_w0#x, true) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value_ignore_null#x, last_value(_w0#x, false) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value_contain_null#x, rank(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank#x, dense_rank(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS dense_rank#x, cume_dist() windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS cume_dist#x, percent_rank(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS percent_rank#x, ntile(2) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ntile#x, row_number() windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS row_number#x, var_pop(_w2#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS var_pop#x, var_samp(_w2#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS var_samp#x, approx_count_distinct(_w0#x, 0.05, 0, 0) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS approx_count_distinct#xL, ... 8 more fields], [_w1#x], [_w0#x ASC NULLS FIRST]
             +- Project [cast(udf(cast(val#x as string)) as int) AS udf(val)#x, cate#x, cast(udf(cast(val#x as string)) as int) AS _w0#x, cast(udf(cast(cate#x as string)) as string) AS _w1#x, cast(cast(udf(cast(val#x as string)) as int) as double) AS _w2#x, cast(cast(udf(cast(val_long#xL as string)) as bigint) as double) AS _w3#x, cast(udf(cast(val_double#x as string)) as double) AS _w4#x, val#x]
                +- SubqueryAlias testdata
                   +- View (`testData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out
index 5a74c4be107e3..d26c5ba4430da 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out
@@ -833,6 +833,6 @@ WithCTE
          +- Project [coalesce(key#x, key#x) AS key#x, key#x, key#x, key#x]
             +- Join FullOuter, (key#x = key#x)
                :- SubqueryAlias t1
-               :  +- CTERelationRef xxxx, true, [key#x], false
+               :  +- CTERelationRef xxxx, true, [key#x], false, false
                +- SubqueryAlias t2
-                  +- CTERelationRef xxxx, true, [key#x], false
+                  +- CTERelationRef xxxx, true, [key#x], false, false
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out
index c1638096312bd..f8a03652c02b6 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out
@@ -583,9 +583,9 @@ WINDOW w AS (PARTITION BY cate ORDER BY val)
 ORDER BY cate, val
 -- !query analysis
 Sort [cate#x ASC NULLS FIRST, val#x ASC NULLS FIRST], true
-+- Project [val#x, cate#x, max#x, min#x, min#x, count#xL, sum#xL, avg#x, stddev#x, first_value#x, first_value_ignore_null#x, first_value_contain_null#x, any_value#x, any_value_ignore_null#x, any_value_contain_null#x, last_value#x, last_value_ignore_null#x, last_value_contain_null#x, rank#x, dense_rank#x, cume_dist#x, percent_rank#x, ntile#x, row_number#x, ... 11 more fields]
-   +- Project [val#x, cate#x, _w0#x, _w1#x, val_double#x, max#x, min#x, min#x, count#xL, sum#xL, avg#x, stddev#x, first_value#x, first_value_ignore_null#x, first_value_contain_null#x, any_value#x, any_value_ignore_null#x, any_value_contain_null#x, last_value#x, last_value_ignore_null#x, last_value_contain_null#x, rank#x, dense_rank#x, cume_dist#x, ... 47 more fields]
-      +- Window [max(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS max#x, min(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS min#x, min(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS min#x, count(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS count#xL, sum(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS sum#xL, avg(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS avg#x, stddev(_w0#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS stddev#x, first_value(val#x, false) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value#x, first_value(val#x, true) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value_ignore_null#x, first_value(val#x, false) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value_contain_null#x, any_value(val#x, false) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value#x, any_value(val#x, true) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value_ignore_null#x, any_value(val#x, false) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value_contain_null#x, last_value(val#x, false) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value#x, last_value(val#x, true) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value_ignore_null#x, last_value(val#x, false) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value_contain_null#x, rank(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank#x, dense_rank(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS dense_rank#x, cume_dist() windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS cume_dist#x, percent_rank(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS percent_rank#x, ntile(2) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ntile#x, row_number() windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS row_number#x, var_pop(_w0#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS var_pop#x, var_samp(_w0#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS var_samp#x, ... 9 more fields], [cate#x], [val#x ASC NULLS FIRST]
++- Project [val#x, cate#x, max#x, min#x, min#x, count#xL, sum#xL, avg#x, stddev#x, first_value#x, first_value_ignore_null#x, first_value_contain_null#x, any_value#x, any_value_ignore_null#x, any_value_contain_null#x, last_value#x, last_value_ignore_null#x, last_value_contain_null#x, rank#x, dense_rank#x, cume_dist#x, percent_rank#x, ntile#x, row_number#x, var_pop#x, ... 10 more fields]
+   +- Project [val#x, cate#x, _w0#x, _w1#x, val_double#x, max#x, min#x, min#x, count#xL, sum#xL, avg#x, stddev#x, first_value#x, first_value_ignore_null#x, first_value_contain_null#x, any_value#x, any_value_ignore_null#x, any_value_contain_null#x, last_value#x, last_value_ignore_null#x, last_value_contain_null#x, rank#x, dense_rank#x, cume_dist#x, percent_rank#x, ... 46 more fields]
+      +- Window [max(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS max#x, min(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS min#x, min(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS min#x, count(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS count#xL, sum(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS sum#xL, avg(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS avg#x, stddev(_w0#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS stddev#x, first_value(val#x, false) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value#x, first_value(val#x, true) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value_ignore_null#x, first_value(val#x, false) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value_contain_null#x, any_value(val#x, false) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value#x, any_value(val#x, true) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value_ignore_null#x, any_value(val#x, false) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value_contain_null#x, last_value(val#x, false) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value#x, last_value(val#x, true) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value_ignore_null#x, last_value(val#x, false) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value_contain_null#x, rank(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank#x, dense_rank(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS dense_rank#x, cume_dist() windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS cume_dist#x, percent_rank(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS percent_rank#x, ntile(2) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ntile#x, row_number() windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS row_number#x, var_pop(_w0#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS var_pop#x, var_samp(_w0#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS var_samp#x, approx_count_distinct(val#x, 0.05, 0, 0) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS approx_count_distinct#xL, ... 8 more fields], [cate#x], [val#x ASC NULLS FIRST]
          +- Project [val#x, cate#x, cast(val#x as double) AS _w0#x, cast(val_long#xL as double) AS _w1#x, val_double#x]
             +- SubqueryAlias testdata
                +- View (`testData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
@@ -1009,7 +1009,7 @@ ORDER BY id
 -- !query analysis
 Sort [id#x ASC NULLS FIRST], true
 +- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, lag_3#x, lag_plus_3#x, nth_value_1#x, nth_value_2#x, nth_value_3#x, first_value#x, any_value#x, last_value#x]
-   +- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, lag_3#x, lag_plus_3#x, nth_value_1#x, nth_value_2#x, nth_value_3#x, first_value#x, any_value#x, last_value#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, ... 9 more fields]
+   +- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, lag_3#x, lag_plus_3#x, nth_value_1#x, nth_value_2#x, nth_value_3#x, first_value#x, any_value#x, last_value#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, ... 8 more fields]
       +- Window [lead(v#x, 0, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 0, 0)) AS lead_0#x, lead(v#x, 1, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 1, 1)) AS lead_1#x, lead(v#x, 2, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 2, 2)) AS lead_2#x, lead(v#x, 3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 3, 3)) AS lead_3#x, lag(v#x, 0, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 0, 0)) AS lag_0#x, lag(v#x, -1, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS lag_1#x, lag(v#x, -2, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS lag_2#x, lag(v#x, -3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -3, -3)) AS lag_3#x, lag(v#x, -3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -3, -3)) AS lag_plus_3#x, nth_value(v#x, 1, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_1#x, nth_value(v#x, 2, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_2#x, nth_value(v#x, 3, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_3#x, first(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value#x, any_value(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value#x, last(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value#x], [id#x ASC NULLS FIRST]
          +- Project [content#x, id#x, v#x]
             +- SubqueryAlias test_ignore_null
diff --git a/sql/core/src/test/resources/sql-tests/inputs/collations.sql b/sql/core/src/test/resources/sql-tests/inputs/collations.sql
index b4d33bb0196c9..df15adf2f8fe4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/collations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/collations.sql
@@ -328,6 +328,7 @@ select bit_length(utf8_binary), bit_length(utf8_lcase) from t5;
 select bit_length(utf8_binary collate utf8_lcase), bit_length(utf8_lcase collate utf8_binary) from t5;
 select octet_length(utf8_binary), octet_length(utf8_lcase) from t5;
 select octet_length(utf8_binary collate utf8_lcase), octet_length(utf8_lcase collate utf8_binary) from t5;
+select octet_length(utf8_binary collate utf8_lcase_rtrim), octet_length(utf8_lcase collate utf8_binary_rtrim) from t5;
 
 -- Luhncheck
 select luhn_check(num) from t9;
@@ -344,18 +345,22 @@ select levenshtein(utf8_binary, 'AaAA' collate utf8_lcase, 3), levenshtein(utf8_
 -- IsValidUTF8
 select is_valid_utf8(utf8_binary), is_valid_utf8(utf8_lcase) from t5;
 select is_valid_utf8(utf8_binary collate utf8_lcase), is_valid_utf8(utf8_lcase collate utf8_binary) from t5;
+select is_valid_utf8(utf8_binary collate utf8_lcase_rtrim), is_valid_utf8(utf8_lcase collate utf8_binary_rtrim) from t5;
 
 -- MakeValidUTF8
 select make_valid_utf8(utf8_binary), make_valid_utf8(utf8_lcase) from t5;
 select make_valid_utf8(utf8_binary collate utf8_lcase), make_valid_utf8(utf8_lcase collate utf8_binary) from t5;
+select make_valid_utf8(utf8_binary collate utf8_lcase_rtrim), make_valid_utf8(utf8_lcase collate utf8_binary_rtrim) from t5;
 
 -- ValidateUTF8
 select validate_utf8(utf8_binary), validate_utf8(utf8_lcase) from t5;
 select validate_utf8(utf8_binary collate utf8_lcase), validate_utf8(utf8_lcase collate utf8_binary) from t5;
+select validate_utf8(utf8_binary collate utf8_lcase_rtrim), validate_utf8(utf8_lcase collate utf8_binary_rtrim) from t5;
 
 -- TryValidateUTF8
 select try_validate_utf8(utf8_binary), try_validate_utf8(utf8_lcase) from t5;
 select try_validate_utf8(utf8_binary collate utf8_lcase), try_validate_utf8(utf8_lcase collate utf8_binary) from t5;
+select try_validate_utf8(utf8_binary collate utf8_lcase_rtrim), try_validate_utf8(utf8_lcase collate utf8_binary_rtrim) from t5;
 
 -- Left/Right/Substr
 select substr(utf8_binary, 2, 2), substr(utf8_lcase, 2, 2) from t5;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql
index 67a94ce61617d..1e17529d545bf 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql
@@ -1,5 +1,6 @@
 create temporary view t as select * from values 0, 1, 2 as t(id);
 create temporary view t2 as select * from values 0, 1 as t(id);
+create temporary view t3 as select * from t;
 
 -- WITH clause should not fall into infinite loop by referencing self
 WITH s AS (SELECT 1 FROM s) SELECT * FROM s;
@@ -10,6 +11,9 @@ SELECT * FROM r;
 -- WITH clause should reference the base table
 WITH t AS (SELECT 1 FROM t) SELECT * FROM t;
 
+-- Table `t` referenced by a view should take precedence over the top CTE `t`
+WITH t AS (SELECT 1) SELECT * FROM t3;
+
 -- WITH clause should not allow cross reference
 WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2;
 
@@ -175,3 +179,4 @@ with cte as (select * from cte) select * from cte;
 -- Clean up
 DROP VIEW IF EXISTS t;
 DROP VIEW IF EXISTS t2;
+DROP VIEW IF EXISTS t3;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
index b37931456d00c..aa6f38defdecc 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
@@ -21,6 +21,14 @@ ALTER TABLE t ADD PARTITION (c='Us', d=1);
 
 DESCRIBE t;
 
+DESCRIBE EXTENDED t AS JSON;
+
+-- AnalysisException: describe table as json must be extended
+DESCRIBE t AS JSON;
+
+-- AnalysisException: describe col as json unsupported
+DESC FORMATTED t a AS JSON;
+
 DESC default.t;
 
 DESC TABLE t;
@@ -39,6 +47,8 @@ DESC EXTENDED t;
 
 DESC t PARTITION (c='Us', d=1);
 
+DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON;
+
 DESC EXTENDED t PARTITION (c='Us', d=1);
 
 DESC FORMATTED t PARTITION (c='Us', d=1);
@@ -88,6 +98,7 @@ EXPLAIN DESC EXTENDED t;
 EXPLAIN EXTENDED DESC t;
 EXPLAIN DESCRIBE t b;
 EXPLAIN DESCRIBE t PARTITION (c='Us', d=2);
+EXPLAIN DESCRIBE EXTENDED t PARTITION (c='Us', d=2) AS JSON;
 
 -- DROP TEST TABLES/VIEWS
 DROP TABLE t;
@@ -119,3 +130,4 @@ DESC EXTENDED e;
 DESC TABLE EXTENDED e;
 
 DESC FORMATTED e;
+
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 6dd0adbc87221..0cc1f62b0583a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -221,6 +221,8 @@ SELECT histogram_numeric(col, 3) FROM VALUES
   (CAST(1 AS SMALLINT)), (CAST(2 AS SMALLINT)), (CAST(3 AS SMALLINT)) AS tab(col);
 SELECT histogram_numeric(col, 3) FROM VALUES
   (CAST(1 AS BIGINT)), (CAST(2 AS BIGINT)), (CAST(3 AS BIGINT)) AS tab(col);
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS DECIMAL(4, 2))), (CAST(2 AS DECIMAL(4, 2))), (CAST(3 AS DECIMAL(4, 2))) AS tab(col);
 SELECT histogram_numeric(col, 3) FROM VALUES (TIMESTAMP '2017-03-01 00:00:00'),
   (TIMESTAMP '2017-04-01 00:00:00'), (TIMESTAMP '2017-05-01 00:00:00') AS tab(col);
 SELECT histogram_numeric(col, 3) FROM VALUES (INTERVAL '100-00' YEAR TO MONTH),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/listagg-collations.sql b/sql/core/src/test/resources/sql-tests/inputs/listagg-collations.sql
new file mode 100644
index 0000000000000..35f86183c37b3
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/listagg-collations.sql
@@ -0,0 +1,12 @@
+-- Test cases with collations
+SELECT listagg(c1) WITHIN GROUP (ORDER BY c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1);
+SELECT listagg(c1) WITHIN GROUP (ORDER BY c1 COLLATE utf8_lcase) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1);
+SELECT listagg(DISTINCT c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1);
+SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1);
+SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) WITHIN GROUP (ORDER BY c1 COLLATE utf8_lcase) FROM (VALUES ('a'), ('B'), ('b'), ('A')) AS t(c1);
+SELECT listagg(DISTINCT c1 COLLATE unicode_rtrim) FROM (VALUES ('abc  '), ('abc '), ('x'), ('abc')) AS t(c1);
+SELECT listagg(c1) WITHIN GROUP (ORDER BY c1) FROM (VALUES ('abc  '), ('abc '), ('abc\n'), ('abc'), ('x')) AS t(c1);
+SELECT listagg(c1) WITHIN GROUP (ORDER BY c1 COLLATE unicode_rtrim) FROM (VALUES ('abc  '), ('abc '), ('abc\n'), ('abc'), ('x')) AS t(c1);
+
+-- Error case with collations
+SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) WITHIN GROUP (ORDER BY c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('b'), ('A'), ('B')) AS t(c1);
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/listagg.sql b/sql/core/src/test/resources/sql-tests/inputs/listagg.sql
new file mode 100644
index 0000000000000..15c8cfa823e9b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/listagg.sql
@@ -0,0 +1,38 @@
+-- Create temporary views
+CREATE TEMP VIEW df AS
+SELECT * FROM (VALUES ('a', 'b'), ('a', 'c'), ('b', 'c'), ('b', 'd'), (NULL, NULL)) AS t(a, b);
+
+CREATE TEMP VIEW df2 AS
+SELECT * FROM (VALUES (1, true), (2, false), (3, false)) AS t(a, b);
+
+-- Test cases for listagg function
+SELECT listagg(b) FROM df GROUP BY a;
+SELECT string_agg(b) FROM df GROUP BY a;
+SELECT listagg(b, NULL) FROM df GROUP BY a;
+SELECT listagg(b) FROM df WHERE 1 != 1;
+SELECT listagg(b, '|') FROM df GROUP BY a;
+SELECT listagg(a) FROM df;
+SELECT listagg(DISTINCT a) FROM df;
+SELECT listagg(a) WITHIN GROUP (ORDER BY a) FROM df;
+SELECT listagg(a) WITHIN GROUP (ORDER BY a DESC) FROM df;
+SELECT listagg(a) WITHIN GROUP (ORDER BY a DESC) OVER (PARTITION BY b) FROM df;
+SELECT listagg(a) WITHIN GROUP (ORDER BY b) FROM df;
+SELECT listagg(a) WITHIN GROUP (ORDER BY b DESC) FROM df;
+SELECT listagg(a, '|') WITHIN GROUP (ORDER BY b DESC) FROM df;
+SELECT listagg(a) WITHIN GROUP (ORDER BY b DESC, a ASC) FROM df;
+SELECT listagg(a) WITHIN GROUP (ORDER BY b DESC, a DESC) FROM df;
+SELECT listagg(c1) FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1);
+SELECT listagg(c1, NULL) FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1);
+SELECT listagg(c1, X'42') FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1);
+SELECT listagg(a), listagg(b, ',') FROM df2;
+
+-- Error cases
+SELECT listagg(c1) FROM (VALUES (ARRAY('a', 'b'))) AS t(c1);
+SELECT listagg(c1, ', ') FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1);
+SELECT listagg(b, a) FROM df GROUP BY a;
+SELECT listagg(a) OVER (ORDER BY a) FROM df;
+SELECT listagg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a) FROM df;
+SELECT string_agg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a) FROM df;
+SELECT listagg(DISTINCT a) OVER (ORDER BY a) FROM df;
+SELECT listagg(DISTINCT a) WITHIN GROUP (ORDER BY b) FROM df;
+SELECT listagg(DISTINCT a) WITHIN GROUP (ORDER BY a, b) FROM df;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pipe-operators.sql b/sql/core/src/test/resources/sql-tests/inputs/pipe-operators.sql
index b9224db129ea4..0cae29d722a8b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pipe-operators.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/pipe-operators.sql
@@ -71,6 +71,60 @@ create temporary view windowTestData as select * from values
   (3, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), null)
   AS testData(val, val_long, val_double, val_date, val_timestamp, cate);
 
+-- FROM operators: positive tests.
+----------------------------------
+
+-- FromClause alone.
+from t;
+
+-- Table alone.
+table t;
+
+-- Selecting from a constant.
+from t
+|> select 1 as x;
+
+-- Selecting using a table alias.
+from t as t_alias
+|> select t_alias.x;
+
+-- Selecting using a table alias.
+from t as t_alias
+|> select t_alias.x as tx, t_alias.y as ty
+|> where ty = 'def'
+|> select tx;
+
+-- Selecting from multiple relations.
+from t, other
+|> select t.x + other.a as z;
+
+-- Selecting from multiple relations with join.
+from t join other on (t.x = other.a)
+|> select t.x + other.a as z;
+
+-- Selecting from lateral view.
+from t lateral view explode(array(100, 101)) as ly
+|> select t.x + ly as z;
+
+-- Selecting struct fields.
+from st
+|> select col.i1;
+
+-- Selecting struct fields using a table alias.
+from st as st_alias
+|> select st_alias.col.i1;
+
+-- Selecting from a VALUES list.
+from values (0), (1) tab(col)
+|> select col as x;
+
+-- FROM operators: negative tests.
+----------------------------------
+
+-- It is not possible to use the FROM operator accepting an input relation.
+from t
+|> from t;
+
 -- SELECT operators: positive tests.
 ---------------------------------------
 
@@ -241,6 +295,190 @@ table t
 table t
 |> extend *;
 
+-- SET operators: positive tests.
+---------------------------------
+
+-- Setting with a constant.
+-- The indicated column is not the last column in the table, and the SET operator will replace it
+-- with the new value in its existing position.
+table t
+|> set x = 1;
+
+-- Setting with an attribute.
+table t
+|> set y = x;
+
+-- Setting with an expression.
+table t
+|> extend 1 as z
+|> set z = x + length(y);
+
+-- Setting two times.
+table t
+|> extend 1 as z
+|> extend 2 as zz
+|> set z = x + length(y), zz = x + 1;
+
+table other
+|> extend 3 as c
+|> set a = b, b = c;
+
+-- Setting two times with a lateral reference.
+table t
+|> extend 1 as z
+|> extend 2 as zz
+|> set z = x + length(y), zz = z + 1;
+
+-- Setting two times in sequence.
+table t
+|> extend 1 as z
+|> set z = x + length(y)
+|> set z = z + 1;
+
+-- SET assignments with duplicate keys. This is supported, and we can update the column as we go.
+table t
+|> extend 1 as z
+|> set z = x + length(y), z = z + 1;
+
+-- Setting with a struct field.
+select col from st
+|> extend 1 as z
+|> set z = col.i1;
+
+-- Setting with a subquery.
+table t
+|> set y = (select a from other where x = a limit 1);
+
+-- Setting with a backquoted column name with a dot inside.
+table t
+|> extend 1 as `x.y.z`
+|> set `x.y.z` = x + length(y);
+
+-- Window functions are allowed in the pipe operator SET list.
+table t
+|> extend 1 as z
+|> set z = first_value(x) over (partition by y);
+
+-- Any prior table aliases remain visible after a SET operator.
+values (0), (1) lhs(a)
+|> inner join values (1), (2) rhs(a) using (a)
+|> extend lhs.a + rhs.a as z1
+|> extend lhs.a - rhs.a as z2
+|> drop z1
+|> where z2 = 0
+|> order by lhs.a, rhs.a, z2
+|> set z2 = 4
+|> limit 2
+|> select lhs.a, rhs.a, z2;
+
+-- SET operators: negative tests.
+---------------------------------
+
+-- SET with a column name that does not exist in the input relation.
+table t
+|> set z = 1;
+
+-- SET with an alias.
+table t
+|> set x = 1 as z;
+
+-- Setting nested fields in structs is not supported.
+select col from st
+|> set col.i1 = 42;
+
+-- DROP operators: positive tests.
+------------------------------------
+
+-- Dropping a column.
+table t
+|> drop y;
+
+-- Dropping two times.
+select 1 as x, 2 as y, 3 as z
+|> drop z, y;
+
+-- Dropping two times in sequence.
+select 1 as x, 2 as y, 3 as z
+|> drop z
+|> drop y;
+
+-- Dropping all columns in the input relation.
+select x from t
+|> drop x;
+
+-- Dropping a backquoted column name with a dot inside.
+table t
+|> extend 1 as `x.y.z`
+|> drop `x.y.z`;
+
+-- DROP operators: negative tests.
+----------------------------------
+
+-- Dropping a column that is not present in the input relation.
+table t
+|> drop z;
+
+-- Attempting to drop a struct field.
+table st
+|> drop col.i1;
+
+table st
+|> drop `col.i1`;
+
+-- Duplicate fields in the drop list.
+select 1 as x, 2 as y, 3 as z
+|> drop z, y, z;
+
+-- AS operators: positive tests.
+--------------------------------
+
+-- Renaming a table.
+table t
+|> as u
+|> select u.x, u.y;
+
+-- Renaming an input relation that is not a table.
+select 1 as x, 2 as y
+|> as u
+|> select u.x, u.y;
+
+-- Renaming as a backquoted name including a period.
+table t
+|> as `u.v`
+|> select `u.v`.x, `u.v`.y;
+
+-- Renaming two times.
+table t
+|> as u
+|> as v
+|> select v.x, v.y;
+
+-- Filtering by referring to the table or table subquery alias.
+table t
+|> as u
+|> where u.x = 1;
+
+-- AS operators: negative tests.
+--------------------------------
+
+-- Multiple aliases are not supported.
+table t
+|> as u, v;
+
+-- Expressions are not supported.
+table t
+|> as 1 + 2;
+
+-- Renaming as an invalid name.
+table t
+|> as u-v;
+
+table t
+|> as u@v;
+
+table t
+|> as u#######v;
+
 -- WHERE operators: positive tests.
 -----------------------------------
 
@@ -316,6 +554,21 @@ table t
 |> select x, length(y) as z
 |> where x + length(y) < 4;
 
+table t
+|> select x, length(y) as z
+|> limit 1000
+|> where x + length(y) < 4;
+
+table t
+|> select x, length(y) as z
+|> limit 1000 offset 1
+|> where x + length(y) < 4;
+
+table t
+|> select x, length(y) as z
+|> order by x, y
+|> where x + length(y) < 4;
+
 -- If the WHERE clause wants to filter rows produced by an aggregation, it is not valid to try to
 -- refer to the aggregate functions directly; it is necessary to use aliases instead.
 (select x, sum(length(y)) as sum_len from t group by x)
@@ -617,10 +870,17 @@ values (0, 'abc') tab(x, y)
 |> union all table t;
 
 -- Union distinct with a VALUES list.
-values (0, 1) tab(x, y)
+-- The |> WHERE operator applies to the result of the |> UNION operator, not to the "table t" input.
+values (2, 'xyz') tab(x, y)
 |> union table t
 |> where x = 0;
 
+-- Union distinct with a VALUES list.
+-- The |> DROP operator applies to the result of the |> UNION operator, not to the "table t" input.
+values (2, 'xyz') tab(x, y)
+|> union table t
+|> drop x;
+
 -- Union all with a table subquery on both the source and target sides.
 (select * from t)
 |> union all (select * from t);
@@ -772,6 +1032,36 @@ select 1 as x, 2 as y
 select 3 as x, 4 as y
 |> aggregate group by 1, 2;
 
+values (3, 4) as tab(x, y)
+|> aggregate sum(y) group by 1;
+
+values (3, 4), (5, 4) as tab(x, y)
+|> aggregate sum(y) group by 1;
+
+select 3 as x, 4 as y
+|> aggregate sum(y) group by 1, 1;
+
+select 1 as `1`, 2 as `2`
+|> aggregate sum(`2`) group by `1`;
+
+select 3 as x, 4 as y
+|> aggregate sum(y) group by 2;
+
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by 2;
+
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by 3;
+
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by 2, 3;
+
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by 1, 2, 3;
+
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by x, 2, 3;
+
 -- Basic table aggregation.
 table t
 |> aggregate sum(x);
@@ -961,6 +1251,502 @@ table windowTestData
 |> select cate, val, sum(val) over w as sum_val
    window w as (order by val);
 
+-- Exercise SQL compilation using a subset of TPC-DS table schemas.
+-------------------------------------------------------------------
+
+-- Q1
+with customer_total_return as
+(select
+    sr_customer_sk as ctr_customer_sk,
+    sr_store_sk as ctr_store_sk,
+    sum(sr_return_amt) as ctr_total_return
+  from store_returns, date_dim
+  where sr_returned_date_sk = d_date_sk and d_year = 2000
+  group by sr_customer_sk, sr_store_sk)
+select c_customer_id
+from customer_total_return ctr1, store, customer
+where ctr1.ctr_total_return >
+  (select avg(ctr_total_return) * 1.2
+  from customer_total_return ctr2
+  where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+  and s_store_sk = ctr1.ctr_store_sk
+  and s_state = 'tn'
+  and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100;
+
+with customer_total_return as
+  (from store_returns
+  |> join date_dim
+  |> where sr_returned_date_sk = d_date_sk and d_year = 2000
+  |> aggregate sum(sr_return_amt) as ctr_total_return
+       group by sr_customer_sk as ctr_customer_sk, sr_store_sk as ctr_store_sk)
+from customer_total_return ctr1
+|> join store
+|> join customer
+|> where ctr1.ctr_total_return >
+     (table customer_total_return
+      |> as ctr2
+      |> where ctr1.ctr_store_sk = ctr2.ctr_store_sk
+      |> aggregate avg(ctr_total_return) * 1.2)
+     and s_store_sk = ctr1.ctr_store_sk
+     and s_state = 'tn'
+     and ctr1.ctr_customer_sk = c_customer_sk
+|> order by c_customer_id
+|> limit 100
+|> select c_customer_id;
+
+-- Q2
+with wscs as
+( select
+    sold_date_sk,
+    sales_price
+  from (select
+    ws_sold_date_sk sold_date_sk,
+    ws_ext_sales_price sales_price
+  from web_sales) x
+  union all
+  (select
+    cs_sold_date_sk sold_date_sk,
+    cs_ext_sales_price sales_price
+  from catalog_sales)),
+    wswscs as
+  ( select
+    d_week_seq,
+    sum(case when (d_day_name = 'sunday')
+      then sales_price
+        else null end)
+    sun_sales,
+    sum(case when (d_day_name = 'monday')
+      then sales_price
+        else null end)
+    mon_sales,
+    sum(case when (d_day_name = 'tuesday')
+      then sales_price
+        else null end)
+    tue_sales,
+    sum(case when (d_day_name = 'wednesday')
+      then sales_price
+        else null end)
+    wed_sales,
+    sum(case when (d_day_name = 'thursday')
+      then sales_price
+        else null end)
+    thu_sales,
+    sum(case when (d_day_name = 'friday')
+      then sales_price
+        else null end)
+    fri_sales,
+    sum(case when (d_day_name = 'saturday')
+      then sales_price
+        else null end)
+    sat_sales
+  from wscs, date_dim
+  where d_date_sk = sold_date_sk
+  group by d_week_seq)
+select
+  d_week_seq1,
+  round(sun_sales1 / sun_sales2, 2),
+  round(mon_sales1 / mon_sales2, 2),
+  round(tue_sales1 / tue_sales2, 2),
+  round(wed_sales1 / wed_sales2, 2),
+  round(thu_sales1 / thu_sales2, 2),
+  round(fri_sales1 / fri_sales2, 2),
+  round(sat_sales1 / sat_sales2, 2)
+from
+  (select
+    wswscs.d_week_seq d_week_seq1,
+    sun_sales sun_sales1,
+    mon_sales mon_sales1,
+    tue_sales tue_sales1,
+    wed_sales wed_sales1,
+    thu_sales thu_sales1,
+    fri_sales fri_sales1,
+    sat_sales sat_sales1
+  from wswscs, date_dim
+  where date_dim.d_week_seq = wswscs.d_week_seq and d_year = 2001) y,
+  (select
+    wswscs.d_week_seq d_week_seq2,
+    sun_sales sun_sales2,
+    mon_sales mon_sales2,
+    tue_sales tue_sales2,
+    wed_sales wed_sales2,
+    thu_sales thu_sales2,
+    fri_sales fri_sales2,
+    sat_sales sat_sales2
+  from wswscs, date_dim
+  where date_dim.d_week_seq = wswscs.d_week_seq and d_year = 2001 + 1) z
+where d_week_seq1 = d_week_seq2 - 53
+order by d_week_seq1;
+
+with wscs as
+  (table web_sales
+  |> select
+       ws_sold_date_sk sold_date_sk,
+       ws_ext_sales_price sales_price
+  |> as x
+  |> union all (
+       table catalog_sales
+       |> select
+            cs_sold_date_sk sold_date_sk,
+            cs_ext_sales_price sales_price)
+  |> select
+       sold_date_sk,
+       sales_price),
+wswscs as
+  (table wscs
+  |> join date_dim
+  |> where d_date_sk = sold_date_sk
+  |> aggregate
+      sum(case when (d_day_name = 'sunday')
+        then sales_price
+          else null end)
+      sun_sales,
+      sum(case when (d_day_name = 'monday')
+        then sales_price
+          else null end)
+      mon_sales,
+      sum(case when (d_day_name = 'tuesday')
+        then sales_price
+          else null end)
+      tue_sales,
+      sum(case when (d_day_name = 'wednesday')
+        then sales_price
+          else null end)
+      wed_sales,
+      sum(case when (d_day_name = 'thursday')
+        then sales_price
+          else null end)
+      thu_sales,
+      sum(case when (d_day_name = 'friday')
+        then sales_price
+          else null end)
+      fri_sales,
+      sum(case when (d_day_name = 'saturday')
+        then sales_price
+          else null end)
+      sat_sales
+      group by d_week_seq)
+table wswscs
+|> join date_dim
+|> where date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001
+|> select
+     wswscs.d_week_seq d_week_seq1,
+     sun_sales sun_sales1,
+     mon_sales mon_sales1,
+     tue_sales tue_sales1,
+     wed_sales wed_sales1,
+     thu_sales thu_sales1,
+     fri_sales fri_sales1,
+     sat_sales sat_sales1
+|> as y
+|> join (
+     table wswscs
+     |> join date_dim
+     |> where date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1
+     |> select
+          wswscs.d_week_seq d_week_seq2,
+          sun_sales sun_sales2,
+          mon_sales mon_sales2,
+          tue_sales tue_sales2,
+          wed_sales wed_sales2,
+          thu_sales thu_sales2,
+          fri_sales fri_sales2,
+          sat_sales sat_sales2
+     |> as z)
+|> where d_week_seq1 = d_week_seq2 - 53
+|> order by d_week_seq1
+|> select
+     d_week_seq1,
+     round(sun_sales1 / sun_sales2, 2),
+     round(mon_sales1 / mon_sales2, 2),
+     round(tue_sales1 / tue_sales2, 2),
+     round(wed_sales1 / wed_sales2, 2),
+     round(thu_sales1 / thu_sales2, 2),
+     round(fri_sales1 / fri_sales2, 2),
+     round(sat_sales1 / sat_sales2, 2);
+
+-- Q3
+select
+  dt.d_year,
+  item.i_brand_id brand_id,
+  item.i_brand brand,
+  sum(ss_ext_sales_price) sum_agg
+from date_dim dt, store_sales, item
+where dt.d_date_sk = store_sales.ss_sold_date_sk
+  and store_sales.ss_item_sk = item.i_item_sk
+  and item.i_manufact_id = 128
+  and dt.d_moy = 11
+group by dt.d_year, item.i_brand, item.i_brand_id
+order by dt.d_year, sum_agg desc, brand_id
+limit 100;
+
+table date_dim
+|> as dt
+|> join store_sales
+|> join item
+|> where dt.d_date_sk = store_sales.ss_sold_date_sk
+     and store_sales.ss_item_sk = item.i_item_sk
+     and item.i_manufact_id = 128
+     and dt.d_moy = 11
+|> aggregate sum(ss_ext_sales_price) sum_agg
+     group by dt.d_year d_year, item.i_brand_id brand_id, item.i_brand brand
+|> order by d_year, sum_agg desc, brand_id
+|> limit 100;
+
+-- Q12
+select
+  i_item_desc,
+  i_category,
+  i_class,
+  i_current_price,
+  sum(ws_ext_sales_price) as itemrevenue,
+  sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price))
+  over
+  (partition by i_class) as revenueratio
+from
+  web_sales, item, date_dim
+where
+  ws_item_sk = i_item_sk
+    and i_category in ('sports', 'books', 'home')
+    and ws_sold_date_sk = d_date_sk
+    and d_date between cast('1999-02-22' as date)
+  and (cast('1999-02-22' as date) + interval 30 days)
+group by
+  i_item_id, i_item_desc, i_category, i_class, i_current_price
+order by
+  i_category, i_class, i_item_id, i_item_desc, revenueratio
+limit 100;
+
+table web_sales
+|> join item
+|> join date_dim
+|> where ws_item_sk = i_item_sk
+     and i_category in ('sports', 'books', 'home')
+     and ws_sold_date_sk = d_date_sk
+     and d_date between cast('1999-02-22' as date)
+     and (cast('1999-02-22' as date) + interval 30 days)
+|> aggregate sum(ws_ext_sales_price) AS itemrevenue
+     group by i_item_id, i_item_desc, i_category, i_class, i_current_price
+|> extend
+     itemrevenue * 100 / sum(itemrevenue)
+       over (partition by i_class) as revenueratio
+|> order by i_category, i_class, i_item_id, i_item_desc, revenueratio
+|> select i_item_desc, i_category, i_class, i_current_price, itemrevenue, revenueratio
+|> limit 100;
+
+-- Q44
+select
+  asceding.rnk,
+  i1.i_product_name best_performing,
+  i2.i_product_name worst_performing
+from (select *
+from (select
+  item_sk,
+  rank()
+  over (
+    order by rank_col asc) rnk
+from (select
+  ss_item_sk item_sk,
+  avg(ss_net_profit) rank_col
+from store_sales ss1
+where ss_store_sk = 4
+group by ss_item_sk
+having avg(ss_net_profit) > 0.9 * (select avg(ss_net_profit) rank_col
+from store_sales
+where ss_store_sk = 4
+  and ss_addr_sk is null
+group by ss_store_sk)) v1) v11
+where rnk < 11) asceding,
+  (select *
+  from (select
+    item_sk,
+    rank()
+    over (
+      order by rank_col desc) rnk
+  from (select
+    ss_item_sk item_sk,
+    avg(ss_net_profit) rank_col
+  from store_sales ss1
+  where ss_store_sk = 4
+  group by ss_item_sk
+  having avg(ss_net_profit) > 0.9 * (select avg(ss_net_profit) rank_col
+  from store_sales
+  where ss_store_sk = 4
+    and ss_addr_sk is null
+  group by ss_store_sk)) v2) v21
+  where rnk < 11) descending,
+  item i1, item i2
+where asceding.rnk = descending.rnk
+  and i1.i_item_sk = asceding.item_sk
+  and i2.i_item_sk = descending.item_sk
+order by asceding.rnk
+limit 100;
+
+from store_sales ss1
+|> where ss_store_sk = 4
+|> aggregate avg(ss_net_profit) rank_col
+     group by ss_item_sk as item_sk
+|> where rank_col > 0.9 * (
+     from store_sales
+     |> where ss_store_sk = 4
+          and ss_addr_sk is null
+     |> aggregate avg(ss_net_profit) rank_col
+          group by ss_store_sk
+     |> select rank_col)
+|> as v1
+|> select
+     item_sk,
+     rank() over (
+       order by rank_col asc) rnk
+|> as v11
+|> where rnk < 11
+|> as asceding
+|> join (
+     from store_sales ss1
+     |> where ss_store_sk = 4
+     |> aggregate avg(ss_net_profit) rank_col
+          group by ss_item_sk as item_sk
+     |> where rank_col > 0.9 * (
+          table store_sales
+          |> where ss_store_sk = 4
+               and ss_addr_sk is null
+          |> aggregate avg(ss_net_profit) rank_col
+               group by ss_store_sk
+          |> select rank_col)
+     |> as v2
+     |> select
+          item_sk,
+          rank() over (
+            order by rank_col asc) rnk
+     |> as v21
+     |> where rnk < 11) descending
+|> join item i1
+|> join item i2
+|> where asceding.rnk = descending.rnk
+     and i1.i_item_sk = asceding.item_sk
+     and i2.i_item_sk = descending.item_sk
+|> order by asceding.rnk
+|> select
+     asceding.rnk,
+     i1.i_product_name best_performing,
+     i2.i_product_name worst_performing;
+
+-- Q51
+with web_v1 as (
+  select
+    ws_item_sk item_sk,
+    d_date,
+    sum(sum(ws_sales_price))
+    over (partition by ws_item_sk
+      order by d_date
+      rows between unbounded preceding and current row) cume_sales
+  from web_sales, date_dim
+  where ws_sold_date_sk = d_date_sk
+    and d_month_seq between 1200 and 1200 + 11
+    and ws_item_sk is not null
+  group by ws_item_sk, d_date),
+    store_v1 as (
+    select
+      ss_item_sk item_sk,
+      d_date,
+      sum(sum(ss_sales_price))
+      over (partition by ss_item_sk
+        order by d_date
+        rows between unbounded preceding and current row) cume_sales
+    from store_sales, date_dim
+    where ss_sold_date_sk = d_date_sk
+      and d_month_seq between 1200 and 1200 + 11
+      and ss_item_sk is not null
+    group by ss_item_sk, d_date)
+select *
+from (select
+  item_sk,
+  d_date,
+  web_sales,
+  store_sales,
+  max(web_sales)
+  over (partition by item_sk
+    order by d_date
+    rows between unbounded preceding and current row) web_cumulative,
+  max(store_sales)
+  over (partition by item_sk
+    order by d_date
+    rows between unbounded preceding and current row) store_cumulative
+from (select
+  case when web.item_sk is not null
+    then web.item_sk
+  else store.item_sk end item_sk,
+  case when web.d_date is not null
+    then web.d_date
+  else store.d_date end d_date,
+  web.cume_sales web_sales,
+  store.cume_sales store_sales
+from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk
+  and web.d_date = store.d_date)
+     ) x) y
+where web_cumulative > store_cumulative
+order by item_sk, d_date
+limit 100;
+
+with web_v1 as (
+  table web_sales
+  |> join date_dim
+  |> where ws_sold_date_sk = d_date_sk
+       and d_month_seq between 1200 and 1200 + 11
+       and ws_item_sk is not null
+  |> aggregate sum(ws_sales_price) as sum_ws_sales_price
+       group by ws_item_sk as item_sk, d_date
+  |> extend sum(sum_ws_sales_price)
+       over (partition by item_sk
+         order by d_date
+         rows between unbounded preceding and current row)
+       as cume_sales),
+store_v1 as (
+  table store_sales
+  |> join date_dim
+  |> where ss_sold_date_sk = d_date_sk
+       and d_month_seq between 1200 and 1200 + 11
+       and ss_item_sk is not null
+  |> aggregate sum(ss_sales_price) as sum_ss_sales_price
+       group by ss_item_sk as item_sk, d_date
+  |> extend sum(sum_ss_sales_price)
+       over (partition by item_sk
+           order by d_date
+           rows between unbounded preceding and current row)
+       as cume_sales)
+table web_v1
+|> as web
+|> full outer join store_v1 store
+     on (web.item_sk = store.item_sk and web.d_date = store.d_date)
+|> select
+     case when web.item_sk is not null
+       then web.item_sk
+       else store.item_sk end item_sk,
+     case when web.d_date is not null
+       then web.d_date
+       else store.d_date end d_date,
+     web.cume_sales web_sales,
+     store.cume_sales store_sales
+|> as x
+|> select
+     item_sk,
+     d_date,
+     web_sales,
+     store_sales,
+     max(web_sales)
+       over (partition by item_sk
+         order by d_date
+         rows between unbounded preceding and current row) web_cumulative,
+     max(store_sales)
+       over (partition by item_sk
+         order by d_date
+         rows between unbounded preceding and current row) store_cumulative
+|> as y
+|> where web_cumulative > store_cumulative
+|> order by item_sk, d_date
+|> limit 100;
+
 -- Cleanup.
 -----------
 drop table t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/random.sql b/sql/core/src/test/resources/sql-tests/inputs/random.sql
index a71b0293295fc..95be99595cc8c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/random.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/random.sql
@@ -22,30 +22,59 @@ SELECT uniform(0, 1, 0) AS result;
 SELECT uniform(0, 10, 0) AS result;
 SELECT uniform(0L, 10L, 0) AS result;
 SELECT uniform(0, 10L, 0) AS result;
+SELECT uniform(0, cast(10 as tinyint), 0) AS result;
+SELECT uniform(0, cast(10 as smallint), 0) AS result;
 SELECT uniform(0, 10S, 0) AS result;
 SELECT uniform(10, 20, 0) AS result;
 SELECT uniform(10.0F, 20.0F, 0) AS result;
+SELECT uniform(cast(10 as decimal(10, 3)), cast(20 as decimal(10, 3)), 0) AS result;
+SELECT uniform(cast(10 as decimal(10, 3)), cast(20 as decimal(11, 4)), 0) AS result;
+SELECT uniform(10, cast(20 as decimal(10, 3)), 0) AS result;
+SELECT uniform(cast(10 as decimal(10, 3)), 20, 0) AS result;
 SELECT uniform(10.0D, 20.0D, CAST(3 / 7 AS LONG)) AS result;
 SELECT uniform(10, 20.0F, 0) AS result;
 SELECT uniform(10, 20, 0) AS result FROM VALUES (0), (1), (2) tab(col);
 SELECT uniform(10, 20.0F) IS NOT NULL AS result;
--- Negative test cases for the uniform random number generator.
+SELECT uniform(-10L, 10L, 0) AS result;
+SELECT uniform(-20L, -10L, 0) AS result;
+SELECT uniform(-20L, -10L, -10) AS result;
 SELECT uniform(NULL, 1, 0) AS result;
+SELECT uniform(cast(NULL AS int), 1, 0) AS result;
+SELECT uniform(cast(NULL AS float), 1, 0) AS result;
 SELECT uniform(0, NULL, 0) AS result;
+SELECT uniform(0, cast(NULL AS int), 0) AS result;
+SELECT uniform(0, cast(NULL AS float), 0) AS result;
 SELECT uniform(0, 1, NULL) AS result;
+SELECT uniform(NULL, NULL, 0) AS result;
+SELECT uniform(NULL, NULL, NULL) AS result;
+-- Negative test cases for the uniform random number generator.
+SELECT uniform(0, 1, cast(NULL as int)) AS result;
+SELECT uniform(0, 1, cast(NULL as float)) AS result;
 SELECT uniform(10, 20, col) AS result FROM VALUES (0), (1), (2) tab(col);
 SELECT uniform(col, 10, 0) AS result FROM VALUES (0), (1), (2) tab(col);
 SELECT uniform(10) AS result;
 SELECT uniform(10, 20, 30, 40) AS result;
+SELECT uniform(10.0F, 20.0F, 0.0F) AS result;
+SELECT uniform(10.0F, 20.0F, 0.0D) AS result;
+SELECT uniform(cast(10 as decimal(10, 3)), cast(20 as decimal(10, 3)), cast(0 as decimal(10, 3)));
+SELECT uniform('abc', 10, 0) AS result;
+SELECT uniform(0, 'def', 0) AS result;
+SELECT uniform(0, 10, 'ghi') AS result;
 
 -- The randstr random string generation function supports generating random strings within a
--- specified length. We use a seed of zero for these queries to keep tests deterministic.
+-- specified length. We use a seed of zero for most queries to keep tests deterministic.
 SELECT randstr(1, 0) AS result;
 SELECT randstr(5, 0) AS result;
 SELECT randstr(10, 0) AS result;
 SELECT randstr(10S, 0) AS result;
+SELECT randstr(CAST(10 AS TINYINT), 0) AS result;
+SELECT randstr(CAST(10 AS BIGINT), 0) AS result;
+SELECT randstr(1.0F, 0) AS result;
+SELECT randstr(1.0D, 0) AS result;
+SELECT randstr(cast(1 AS DECIMAL(10, 2)), 0) AS result;
 SELECT randstr(10, 0) AS result FROM VALUES (0), (1), (2) tab(col);
 SELECT randstr(10) IS NOT NULL AS result;
+SELECT randstr(1, -1) AS result;
 -- Negative test cases for the randstr random number generator.
 SELECT randstr(10L, 0) AS result;
 SELECT randstr(10.0F, 0) AS result;
@@ -55,3 +84,6 @@ SELECT randstr(0, NULL) AS result;
 SELECT randstr(col, 0) AS result FROM VALUES (0), (1), (2) tab(col);
 SELECT randstr(10, col) AS result FROM VALUES (0), (1), (2) tab(col);
 SELECT randstr(10, 0, 1) AS result;
+SELECT randstr(-1, 0) AS result;
+SELECT randstr(10, "a") AS result FROM VALUES (0) tab(a);
+SELECT randstr(10, 1.5) AS result FROM VALUES (0) tab(a);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql
index 8a00e4400e6b0..c3a16ca577ee9 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql
@@ -28,3 +28,5 @@ SELECT * FROM json.`${spark.sql.warehouse.dir}/sql_on_files.db/test_json`;
 DROP TABLE sql_on_files.test_json;
 
 DROP DATABASE sql_on_files;
+
+SELECT * FROM json.`https://raw.githubusercontent.com/apache/spark/refs/heads/master/examples/src/main/resources/employees.json`;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql
new file mode 100644
index 0000000000000..34cb41d726766
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-udf.sql
@@ -0,0 +1,122 @@
+-- test cases for SQL User Defined Functions
+
+-- 1. CREATE FUNCTION
+-- 1.1 Parameter
+-- 1.1.a A scalar function with various numbers of parameter
+-- Expect success
+CREATE FUNCTION foo1a0() RETURNS INT RETURN 1;
+-- Expect: 1
+SELECT foo1a0();
+-- Expect failure
+SELECT foo1a0(1);
+
+CREATE FUNCTION foo1a1(a INT) RETURNS INT RETURN 1;
+-- Expect: 1
+SELECT foo1a1(1);
+-- Expect failure
+SELECT foo1a1(1, 2);
+
+CREATE FUNCTION foo1a2(a INT, b INT, c INT, d INT) RETURNS INT RETURN 1;
+-- Expect: 1
+SELECT foo1a2(1, 2, 3, 4);
+
+-------------------------------
+-- 2. Scalar SQL UDF
+-- 2.1 deterministic simple expressions
+CREATE FUNCTION foo2_1a(a INT) RETURNS INT RETURN a;
+SELECT foo2_1a(5);
+
+CREATE FUNCTION foo2_1b(a INT, b INT) RETURNS INT RETURN a + b;
+SELECT foo2_1b(5, 6);
+
+CREATE FUNCTION foo2_1c(a INT, b INT) RETURNS INT RETURN 10 * (a + b) + 100 * (a -b);
+SELECT foo2_1c(5, 6);
+
+CREATE FUNCTION foo2_1d(a INT, b INT) RETURNS INT RETURN ABS(a) - LENGTH(CAST(b AS VARCHAR(10)));
+SELECT foo2_1d(-5, 6);
+
+-- 2.2 deterministic complex expression with subqueries
+-- 2.2.1 Nested Scalar subqueries
+CREATE FUNCTION foo2_2a(a INT) RETURNS INT RETURN SELECT a;
+SELECT foo2_2a(5);
+
+CREATE FUNCTION foo2_2b(a INT) RETURNS INT RETURN 1 + (SELECT a);
+SELECT foo2_2b(5);
+
+-- Expect error: deep correlation is not yet supported
+CREATE FUNCTION foo2_2c(a INT) RETURNS INT RETURN 1 + (SELECT (SELECT a));
+-- SELECT foo2_2c(5);
+
+-- Expect error: deep correlation is not yet supported
+CREATE FUNCTION foo2_2d(a INT) RETURNS INT RETURN 1 + (SELECT (SELECT (SELECT (SELECT a))));
+-- SELECT foo2_2d(5);
+
+-- 2.2.2 Set operations
+-- Expect error: correlated scalar subquery must be aggregated.
+CREATE FUNCTION foo2_2e(a INT) RETURNS INT RETURN
+SELECT a FROM (VALUES 1) AS V(c1) WHERE c1 = 2
+UNION ALL
+SELECT a + 1 FROM (VALUES 1) AS V(c1);
+-- SELECT foo2_2e(5);
+
+-- Expect error: correlated scalar subquery must be aggregated.
+CREATE FUNCTION foo2_2f(a INT) RETURNS INT RETURN
+SELECT a FROM (VALUES 1) AS V(c1)
+EXCEPT
+SELECT a + 1 FROM (VALUES 1) AS V(a);
+-- SELECT foo2_2f(5);
+
+-- Expect error: correlated scalar subquery must be aggregated.
+CREATE FUNCTION foo2_2g(a INT) RETURNS INT RETURN
+SELECT a FROM (VALUES 1) AS V(c1)
+INTERSECT
+SELECT a FROM (VALUES 1) AS V(a);
+-- SELECT foo2_2g(5);
+
+-- Prepare by dropping views or tables if they already exist.
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+DROP TABLE IF EXISTS ts;
+DROP TABLE IF EXISTS tm;
+DROP TABLE IF EXISTS ta;
+DROP TABLE IF EXISTS V1;
+DROP TABLE IF EXISTS V2;
+DROP VIEW IF EXISTS t1;
+DROP VIEW IF EXISTS t2;
+DROP VIEW IF EXISTS ts;
+DROP VIEW IF EXISTS tm;
+DROP VIEW IF EXISTS ta;
+DROP VIEW IF EXISTS V1;
+DROP VIEW IF EXISTS V2;
+
+-- 2.3 Calling Scalar UDF from various places
+CREATE FUNCTION foo2_3(a INT, b INT) RETURNS INT RETURN a + b;
+CREATE VIEW V1(c1, c2) AS VALUES (1, 2), (3, 4), (5, 6);
+CREATE VIEW V2(c1, c2) AS VALUES (-1, -2), (-3, -4), (-5, -6);
+
+-- 2.3.1 Multiple times in the select list
+SELECT foo2_3(c1, c2), foo2_3(c2, 1), foo2_3(c1, c2) - foo2_3(c2, c1 - 1) FROM V1 ORDER BY 1, 2, 3;
+
+-- 2.3.2 In the WHERE clause
+SELECT * FROM V1 WHERE foo2_3(c1, 0) = c1 AND foo2_3(c1, c2) < 8;
+
+-- 2.3.3 Different places around an aggregate
+SELECT foo2_3(SUM(c1), SUM(c2)), SUM(c1) + SUM(c2), SUM(foo2_3(c1, c2) + foo2_3(c2, c1) - foo2_3(c2, c1))
+FROM V1;
+
+-- 2.4 Scalar UDF with complex one row relation subquery
+-- 2.4.1 higher order functions
+CREATE FUNCTION foo2_4a(a ARRAY<STRING>) RETURNS STRING RETURN
+SELECT array_sort(a, (i, j) -> rank[i] - rank[j])[0] FROM (SELECT MAP('a', 1, 'b', 2) rank);
+
+SELECT foo2_4a(ARRAY('a', 'b'));
+
+-- 2.4.2 built-in functions
+CREATE FUNCTION foo2_4b(m MAP<STRING, STRING>, k STRING) RETURNS STRING RETURN
+SELECT v || ' ' || v FROM (SELECT upper(m[k]) AS v);
+
+SELECT foo2_4b(map('a', 'hello', 'b', 'world'), 'a');
+
+-- Clean up
+DROP VIEW V2;
+DROP VIEW V1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql b/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
index 5b98f056ebc5a..d2aef1f83863b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/table-aliases.sql
@@ -33,3 +33,8 @@ CREATE OR REPLACE TEMPORARY VIEW src2 AS SELECT * FROM VALUES (2, 1.0), (3, 3.2)
 SELECT * FROM (src1 s1 INNER JOIN src2 s2 ON s1.id = s2.id) dst(a, b, c, d);
 
 SELECT dst.* FROM (src1 s1 INNER JOIN src2 s2 ON s1.id = s2.id) dst(a, b, c, d);
+
+-- Negative examples after aliasing
+SELECT src1.* FROM src1 a ORDER BY id LIMIT 1;
+
+SELECT src1.id FROM (SELECT * FROM src1 ORDER BY id LIMIT 1) a;
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 70de8585ef782..0c141c08d436f 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -185,7 +185,6 @@ org.apache.spark.SparkArrayIndexOutOfBoundsException
   "condition" : "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "arraySize" : "3",
     "indexValue" : "5"
   },
@@ -209,7 +208,6 @@ org.apache.spark.SparkArrayIndexOutOfBoundsException
   "condition" : "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "arraySize" : "3",
     "indexValue" : "-5"
   },
@@ -252,7 +250,6 @@ org.apache.spark.SparkArrayIndexOutOfBoundsException
   "condition" : "INVALID_ARRAY_INDEX",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "arraySize" : "2",
     "indexValue" : "4"
   },
@@ -276,7 +273,6 @@ org.apache.spark.SparkArrayIndexOutOfBoundsException
   "condition" : "INVALID_ARRAY_INDEX",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "arraySize" : "2",
     "indexValue" : "0"
   },
@@ -300,7 +296,6 @@ org.apache.spark.SparkArrayIndexOutOfBoundsException
   "condition" : "INVALID_ARRAY_INDEX",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "arraySize" : "2",
     "indexValue" : "-1"
   },
@@ -356,7 +351,6 @@ org.apache.spark.SparkArrayIndexOutOfBoundsException
   "condition" : "INVALID_ARRAY_INDEX",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "arraySize" : "3",
     "indexValue" : "5"
   },
@@ -380,7 +374,6 @@ org.apache.spark.SparkArrayIndexOutOfBoundsException
   "condition" : "INVALID_ARRAY_INDEX",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "arraySize" : "3",
     "indexValue" : "-1"
   },
diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
index ef084a8ce47d1..93ff8dd4b320b 100644
--- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -195,7 +195,7 @@ View Text           	select * from char_tbl
 View Original Text  	select * from char_tbl	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c, v]
+View Query Output Columns	[`c`, `v`]
 
 
 -- !query
@@ -366,7 +366,7 @@ View Text           	select * from char_tbl2
 View Original Text  	select * from char_tbl2	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c, v]
+View Query Output Columns	[`c`, `v`]
 
 
 -- !query
@@ -427,7 +427,7 @@ View Text           	select * from char_tbl2
 View Original Text  	select * from char_tbl2	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c, v]              	                    
+View Query Output Columns	[`c`, `v`]          	                    
 Table Properties    	[yes=no]
 
 
@@ -488,7 +488,7 @@ View Text           	select * from char_tbl2
 View Original Text  	select * from char_tbl2	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c, v]
+View Query Output Columns	[`c`, `v`]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/collations.sql.out b/sql/core/src/test/resources/sql-tests/results/collations.sql.out
index 245e1dd0b56de..8c150b1de03e9 100644
--- a/sql/core/src/test/resources/sql-tests/results/collations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/collations.sql.out
@@ -479,7 +479,7 @@ struct<array_except(array(collate(aaa, utf8_lcase)), array(collate(AAA, utf8_lca
 -- !query
 select 'a' collate unicode < 'A'
 -- !query schema
-struct<(collate(a, unicode) < A):boolean>
+struct<(collate(a, unicode) < 'A' collate UNICODE):boolean>
 -- !query output
 true
 
@@ -487,7 +487,7 @@ true
 -- !query
 select 'a' collate unicode_ci = 'A'
 -- !query schema
-struct<(collate(a, unicode_ci) = A):boolean>
+struct<(collate(a, unicode_ci) = 'A' collate UNICODE_CI):boolean>
 -- !query output
 true
 
@@ -495,7 +495,7 @@ true
 -- !query
 select 'a' collate unicode_ai = 'å'
 -- !query schema
-struct<(collate(a, unicode_ai) = å):boolean>
+struct<(collate(a, unicode_ai) = 'å' collate UNICODE_AI):boolean>
 -- !query output
 true
 
@@ -503,7 +503,7 @@ true
 -- !query
 select 'a' collate unicode_ci_ai = 'Å'
 -- !query schema
-struct<(collate(a, unicode_ci_ai) = Å):boolean>
+struct<(collate(a, unicode_ci_ai) = 'Å' collate UNICODE_CI_AI):boolean>
 -- !query output
 true
 
@@ -511,7 +511,7 @@ true
 -- !query
 select 'a' collate en < 'A'
 -- !query schema
-struct<(collate(a, en) < A):boolean>
+struct<(collate(a, en) < 'A' collate en):boolean>
 -- !query output
 true
 
@@ -519,7 +519,7 @@ true
 -- !query
 select 'a' collate en_ci = 'A'
 -- !query schema
-struct<(collate(a, en_ci) = A):boolean>
+struct<(collate(a, en_ci) = 'A' collate en_CI):boolean>
 -- !query output
 true
 
@@ -527,7 +527,7 @@ true
 -- !query
 select 'a' collate en_ai = 'å'
 -- !query schema
-struct<(collate(a, en_ai) = å):boolean>
+struct<(collate(a, en_ai) = 'å' collate en_AI):boolean>
 -- !query output
 true
 
@@ -535,7 +535,7 @@ true
 -- !query
 select 'a' collate en_ci_ai = 'Å'
 -- !query schema
-struct<(collate(a, en_ci_ai) = Å):boolean>
+struct<(collate(a, en_ci_ai) = 'Å' collate en_CI_AI):boolean>
 -- !query output
 true
 
@@ -543,7 +543,7 @@ true
 -- !query
 select 'Kypper' collate sv < 'Köpfe'
 -- !query schema
-struct<(collate(Kypper, sv) < Köpfe):boolean>
+struct<(collate(Kypper, sv) < 'Köpfe' collate sv):boolean>
 -- !query output
 true
 
@@ -551,7 +551,7 @@ true
 -- !query
 select 'Kypper' collate de > 'Köpfe'
 -- !query schema
-struct<(collate(Kypper, de) > Köpfe):boolean>
+struct<(collate(Kypper, de) > 'Köpfe' collate de):boolean>
 -- !query output
 true
 
@@ -559,7 +559,7 @@ true
 -- !query
 select 'I' collate tr_ci = 'ı'
 -- !query schema
-struct<(collate(I, tr_ci) = ı):boolean>
+struct<(collate(I, tr_ci) = 'ı' collate tr_CI):boolean>
 -- !query output
 true
 
@@ -1109,7 +1109,7 @@ kitten
 -- !query
 select elt(1, utf8_binary, 'word'), elt(1, utf8_lcase, 'word') from t5
 -- !query schema
-struct<elt(1, utf8_binary, word):string,elt(1, utf8_lcase, word):string collate UTF8_LCASE>
+struct<elt(1, utf8_binary, word):string,elt(1, utf8_lcase, 'word' collate UTF8_LCASE):string collate UTF8_LCASE>
 -- !query output
 Hello, world! Nice day.	Hello, world! Nice day.
 Something else. Nothing here.	Something else. Nothing here.
@@ -2492,7 +2492,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "inputType" : "\"STRING COLLATE UNICODE_AI\"",
     "paramIndex" : "first",
     "requiredType" : "\"STRING\"",
-    "sqlExpr" : "\"replace(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), abc)\""
+    "sqlExpr" : "\"replace(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), 'abc' collate UNICODE_AI)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -3342,7 +3342,7 @@ ksitTing
 -- !query
 select overlay(utf8_binary, 'a', 2), overlay(utf8_lcase, 'a', 2) from t5
 -- !query schema
-struct<overlay(utf8_binary, a, 2, -1):string,overlay(utf8_lcase, a, 2, -1):string collate UTF8_LCASE>
+struct<overlay(utf8_binary, a, 2, -1):string,overlay(utf8_lcase, 'a' collate UTF8_LCASE, 2, -1):string collate UTF8_LCASE>
 -- !query output
 Hallo, world! Nice day.	Hallo, world! Nice day.
 Saark	SaL
@@ -3583,6 +3583,28 @@ struct<octet_length(collate(utf8_binary, utf8_lcase)):int,octet_length(collate(u
 8	8
 
 
+-- !query
+select octet_length(utf8_binary collate utf8_lcase_rtrim), octet_length(utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query schema
+struct<octet_length(collate(utf8_binary, utf8_lcase_rtrim)):int,octet_length(collate(utf8_lcase, utf8_binary_rtrim)):int>
+-- !query output
+23	23
+29	29
+3	3
+3	3
+3	4
+3	4
+4	3
+4	4
+5	3
+6	7
+7	7
+8	1
+8	24
+8	8
+8	8
+
+
 -- !query
 select luhn_check(num) from t9
 -- !query schema
@@ -3776,6 +3798,28 @@ true	true
 true	true
 
 
+-- !query
+select is_valid_utf8(utf8_binary collate utf8_lcase_rtrim), is_valid_utf8(utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query schema
+struct<is_valid_utf8(collate(utf8_binary, utf8_lcase_rtrim)):boolean,is_valid_utf8(collate(utf8_lcase, utf8_binary_rtrim)):boolean>
+-- !query output
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+
+
 -- !query
 select make_valid_utf8(utf8_binary), make_valid_utf8(utf8_lcase) from t5
 -- !query schema
@@ -3820,6 +3864,28 @@ kitten	sitTing
 İo 	İo
 
 
+-- !query
+select make_valid_utf8(utf8_binary collate utf8_lcase_rtrim), make_valid_utf8(utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query schema
+struct<make_valid_utf8(collate(utf8_binary, utf8_lcase_rtrim)):string collate UTF8_LCASE_RTRIM,make_valid_utf8(collate(utf8_lcase, utf8_binary_rtrim)):string collate UTF8_BINARY_RTRIM>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
 -- !query
 select validate_utf8(utf8_binary), validate_utf8(utf8_lcase) from t5
 -- !query schema
@@ -3864,6 +3930,28 @@ kitten	sitTing
 İo 	İo
 
 
+-- !query
+select validate_utf8(utf8_binary collate utf8_lcase_rtrim), validate_utf8(utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query schema
+struct<validate_utf8(collate(utf8_binary, utf8_lcase_rtrim)):string collate UTF8_LCASE_RTRIM,validate_utf8(collate(utf8_lcase, utf8_binary_rtrim)):string collate UTF8_BINARY_RTRIM>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
 -- !query
 select try_validate_utf8(utf8_binary), try_validate_utf8(utf8_lcase) from t5
 -- !query schema
@@ -3908,6 +3996,28 @@ kitten	sitTing
 İo 	İo
 
 
+-- !query
+select try_validate_utf8(utf8_binary collate utf8_lcase_rtrim), try_validate_utf8(utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query schema
+struct<try_validate_utf8(collate(utf8_binary, utf8_lcase_rtrim)):string collate UTF8_LCASE_RTRIM,try_validate_utf8(collate(utf8_lcase, utf8_binary_rtrim)):string collate UTF8_BINARY_RTRIM>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
 -- !query
 select substr(utf8_binary, 2, 2), substr(utf8_lcase, 2, 2) from t5
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
index 97ed7e2c4f06a..8b316207250ec 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
@@ -15,6 +15,14 @@ struct<>
 
 
 
+-- !query
+create temporary view t3 as select * from t
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 WITH s AS (SELECT 1 FROM s) SELECT * FROM s
 -- !query schema
@@ -70,6 +78,16 @@ struct<1:int>
 1
 
 
+-- !query
+WITH t AS (SELECT 1) SELECT * FROM t3
+-- !query schema
+struct<id:int>
+-- !query output
+0
+1
+2
+
+
 -- !query
 WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2
 -- !query schema
@@ -580,3 +598,11 @@ DROP VIEW IF EXISTS t2
 struct<>
 -- !query output
 
+
+
+-- !query
+DROP VIEW IF EXISTS t3
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/date.sql.out b/sql/core/src/test/resources/sql-tests/results/date.sql.out
index 6dc33b1f853e4..66d9e5419dd36 100644
--- a/sql/core/src/test/resources/sql-tests/results/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/date.sql.out
@@ -207,7 +207,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_date`",
     "message" : "Invalid date 'February 29' as '1970' is not a leap year"
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 22f98512ca5d9..9f68bb87776ab 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -207,7 +207,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_date`",
     "message" : "Unparseable date: \"02-29\""
   }
 }
@@ -1585,7 +1585,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12.\""
   }
 }
@@ -1601,7 +1601,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12.0\""
   }
 }
@@ -1617,7 +1617,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12.1\""
   }
 }
@@ -1633,7 +1633,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12.12\""
   }
 }
@@ -1649,7 +1649,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12.123UTC\""
   }
 }
@@ -1665,7 +1665,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12.1234\""
   }
 }
@@ -1681,7 +1681,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12.12345CST\""
   }
 }
@@ -1697,7 +1697,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12.123456PST\""
   }
 }
@@ -1713,7 +1713,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12.1234567PST\""
   }
 }
@@ -1729,7 +1729,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"123456 2019-10-06 10:11:12.123456PST\""
   }
 }
@@ -1745,7 +1745,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"223456 2019-10-06 10:11:12.123456PST\""
   }
 }
@@ -1761,7 +1761,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12.1234\""
   }
 }
@@ -1777,7 +1777,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12.123\""
   }
 }
@@ -1793,7 +1793,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12\""
   }
 }
@@ -1809,7 +1809,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11:12.12\""
   }
 }
@@ -1825,7 +1825,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 10:11\""
   }
 }
@@ -1841,7 +1841,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06S10:11:12.12345\""
   }
 }
@@ -1857,7 +1857,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"12.12342019-10-06S10:11\""
   }
 }
@@ -1873,7 +1873,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"12.1232019-10-06S10:11\""
   }
 }
@@ -1889,7 +1889,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"12.1232019-10-06S10:11\""
   }
 }
@@ -1905,7 +1905,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"12.1234019-10-06S10:11\""
   }
 }
@@ -1977,7 +1977,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"02-29\""
   }
 }
@@ -2208,7 +2208,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Unparseable date: \"2019-10-06 A\""
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
index 736eba0adf713..3a7537221d98f 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
@@ -18,7 +18,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '1' could not be parsed at index 0"
   }
 }
@@ -34,7 +34,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '-12' could not be parsed at index 0"
   }
 }
@@ -50,7 +50,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '123' could not be parsed, unparsed text found at index 2"
   }
 }
@@ -66,7 +66,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '1' could not be parsed at index 0"
   }
 }
@@ -99,7 +99,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Invalid date 'DayOfYear 366' as '1970' is not a leap year"
   }
 }
@@ -115,7 +115,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '9' could not be parsed at index 0"
   }
 }
@@ -131,7 +131,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Invalid date 'DayOfYear 366' as '1970' is not a leap year"
   }
 }
@@ -147,7 +147,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '9' could not be parsed at index 0"
   }
 }
@@ -163,7 +163,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '99' could not be parsed at index 0"
   }
 }
@@ -179,7 +179,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31."
   }
 }
@@ -195,7 +195,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Conflict found: Field MonthOfYear 11 differs from MonthOfYear 12 derived from 1970-12-31."
   }
 }
@@ -211,7 +211,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '2019-366' could not be parsed: Invalid date 'DayOfYear 366' as '2019' is not a leap year"
   }
 }
@@ -227,7 +227,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31."
   }
 }
@@ -243,7 +243,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '2020-01-365' could not be parsed: Conflict found: Field DayOfMonth 30 differs from DayOfMonth 1 derived from 2020-12-30"
   }
 }
@@ -259,7 +259,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '2020-10-350' could not be parsed: Conflict found: Field MonthOfYear 12 differs from MonthOfYear 10 derived from 2020-12-15"
   }
 }
@@ -275,7 +275,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '2020-11-31-366' could not be parsed: Invalid date 'NOVEMBER 31'"
   }
 }
@@ -299,7 +299,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_date`",
     "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
   }
 }
@@ -315,7 +315,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_date`",
     "message" : "Text 'Unparseable' could not be parsed at index 0"
   }
 }
@@ -331,7 +331,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
   }
 }
@@ -347,7 +347,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text 'Unparseable' could not be parsed at index 0"
   }
 }
@@ -363,7 +363,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
   }
 }
@@ -379,7 +379,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text 'Unparseable' could not be parsed at index 0"
   }
 }
@@ -395,7 +395,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
   }
 }
@@ -411,7 +411,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text 'Unparseable' could not be parsed at index 0"
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 39bf681d25a96..d945823191026 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -71,6 +71,41 @@ c                   	string
 d                   	string
 
 
+-- !query
+DESCRIBE EXTENDED t AS JSON
+-- !query schema
+struct<json_metadata:string>
+-- !query output
+{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string"},"nullable":true},{"name":"b","type":{"name":"int"},"nullable":true},{"name":"c","type":{"name":"string"},"nullable":true},{"name":"d","type":{"name":"string"},"nullable":true}],"num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"location":"file:[not included in comparison]/{warehouse_dir}/t","storage_properties":{"a":"1","b":"2","password":"*********(redacted)"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"parquet","comment":"table_comment","table_properties":{"e":"3","password":"*********(redacted)","t":"test"},"partition_provider":"Catalog","partition_columns":["c","d"]}
+
+
+-- !query
+DESCRIBE t AS JSON
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "DESCRIBE_JSON_NOT_EXTENDED",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "tableName" : "t"
+  }
+}
+
+
+-- !query
+DESC FORMATTED t a AS JSON
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON",
+  "sqlState" : "0A000"
+}
+
+
 -- !query
 DESC default.t
 -- !query schema
@@ -263,6 +298,14 @@ c                   	string
 d                   	string
 
 
+-- !query
+DESC EXTENDED t PARTITION (c='Us', d=1) AS JSON
+-- !query schema
+struct<json_metadata:string>
+-- !query output
+{"table_name":"t","catalog_name":"spark_catalog","namespace":["default"],"schema_name":"default","columns":[{"name":"a","type":{"name":"string"},"nullable":true},{"name":"b","type":{"name":"int"},"nullable":true},{"name":"c","type":{"name":"string"},"nullable":true},{"name":"d","type":{"name":"string"},"nullable":true}],"partition_values":{"c":"Us","d":"1"},"location":"file:[not included in comparison]/{warehouse_dir}/t/c=Us/d=1","storage_properties":{"a":"1","b":"2","password":"*********(redacted)"},"created_time [not included in comparison]":"None","last_access [not included in comparison]":"None","created_by [not included in comparison]":"None","type":"MANAGED","provider":"parquet","num_buckets":2,"bucket_columns":["a"],"sort_columns":["b"],"table_properties":{"password":"*********(redacted)","t":"test"},"partition_provider":"Catalog","partition_columns":["c","d"]}
+
+
 -- !query
 DESC EXTENDED t PARTITION (c='Us', d=1)
 -- !query schema
@@ -538,7 +581,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[a, b, c, d]
+View Query Output Columns	[`a`, `b`, `c`, `d`]
 
 
 -- !query
@@ -563,7 +606,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[a, b, c, d]
+View Query Output Columns	[`a`, `b`, `c`, `d`]
 
 
 -- !query
@@ -644,6 +687,17 @@ Execute DescribeTableCommand
    +- DescribeTableCommand `spark_catalog`.`default`.`t`, [c=Us, d=2], false, [col_name#x, data_type#x, comment#x]
 
 
+-- !query
+EXPLAIN DESCRIBE EXTENDED t PARTITION (c='Us', d=2) AS JSON
+-- !query schema
+struct<plan:string>
+-- !query output
+== Physical Plan ==
+Execute DescribeRelationJsonCommand
+   +- DescribeRelationJsonCommand [c=Us, d=2], true, [json_metadata#x]
+      +- ResolvedTable V2SessionCatalog(spark_catalog), default.t, V1Table(default.t), [a#x, b#x, c#x, d#x]
+
+
 -- !query
 DROP TABLE t
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 2a53427b57900..0f61924aa425e 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -987,6 +987,15 @@ struct<histogram_numeric(col, 3):array<struct<x:bigint,y:double>>>
 [{"x":1,"y":1.0},{"x":2,"y":1.0},{"x":3,"y":1.0}]
 
 
+-- !query
+SELECT histogram_numeric(col, 3) FROM VALUES
+  (CAST(1 AS DECIMAL(4, 2))), (CAST(2 AS DECIMAL(4, 2))), (CAST(3 AS DECIMAL(4, 2))) AS tab(col)
+-- !query schema
+struct<histogram_numeric(col, 3):array<struct<x:decimal(4,2),y:double>>>
+-- !query output
+[{"x":1.00,"y":1.0},{"x":2.00,"y":1.0},{"x":3.00,"y":1.0}]
+
+
 -- !query
 SELECT histogram_numeric(col, 3) FROM VALUES (TIMESTAMP '2017-03-01 00:00:00'),
   (TIMESTAMP '2017-04-01 00:00:00'), (TIMESTAMP '2017-05-01 00:00:00') AS tab(col)
diff --git a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
index 7d96a3e98c832..521b0afe19264 100644
--- a/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
@@ -170,6 +170,7 @@ IS	true
 ITEMS	false
 ITERATE	false
 JOIN	true
+JSON	false
 KEYS	false
 LANGUAGE	false
 LAST	false
@@ -252,6 +253,7 @@ REAL	false
 RECORDREADER	false
 RECORDWRITER	false
 RECOVER	false
+RECURSIVE	true
 REDUCE	false
 REFERENCES	true
 REFRESH	false
@@ -432,6 +434,7 @@ ORDER
 OUTER
 OVERLAPS
 PRIMARY
+RECURSIVE
 REFERENCES
 RIGHT
 SELECT
diff --git a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
index 6cbfe519a76f6..4d702588ad2b3 100644
--- a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
@@ -170,6 +170,7 @@ IS	false
 ITEMS	false
 ITERATE	false
 JOIN	false
+JSON	false
 KEYS	false
 LANGUAGE	false
 LAST	false
@@ -252,6 +253,7 @@ REAL	false
 RECORDREADER	false
 RECORDWRITER	false
 RECOVER	false
+RECURSIVE	false
 REDUCE	false
 REFERENCES	false
 REFRESH	false
diff --git a/sql/core/src/test/resources/sql-tests/results/listagg-collations.sql.out b/sql/core/src/test/resources/sql-tests/results/listagg-collations.sql.out
new file mode 100644
index 0000000000000..a21c0ced7a124
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/listagg-collations.sql.out
@@ -0,0 +1,82 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT listagg(c1) WITHIN GROUP (ORDER BY c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1)
+-- !query schema
+struct<listagg(c1, NULL) WITHIN GROUP (ORDER BY collate(c1, utf8_binary) ASC NULLS FIRST):string>
+-- !query output
+ABab
+
+
+-- !query
+SELECT listagg(c1) WITHIN GROUP (ORDER BY c1 COLLATE utf8_lcase) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1)
+-- !query schema
+struct<listagg(c1, NULL) WITHIN GROUP (ORDER BY collate(c1, utf8_lcase) ASC NULLS FIRST):string>
+-- !query output
+aAbB
+
+
+-- !query
+SELECT listagg(DISTINCT c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1)
+-- !query schema
+struct<listagg(DISTINCT collate(c1, utf8_binary), NULL):string>
+-- !query output
+aAbB
+
+
+-- !query
+SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) FROM (VALUES ('a'), ('A'), ('b'), ('B')) AS t(c1)
+-- !query schema
+struct<listagg(DISTINCT collate(c1, utf8_lcase), NULL):string collate UTF8_LCASE>
+-- !query output
+ab
+
+
+-- !query
+SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) WITHIN GROUP (ORDER BY c1 COLLATE utf8_lcase) FROM (VALUES ('a'), ('B'), ('b'), ('A')) AS t(c1)
+-- !query schema
+struct<listagg(DISTINCT collate(c1, utf8_lcase), NULL) WITHIN GROUP (ORDER BY collate(c1, utf8_lcase) ASC NULLS FIRST):string collate UTF8_LCASE>
+-- !query output
+aB
+
+
+-- !query
+SELECT listagg(DISTINCT c1 COLLATE unicode_rtrim) FROM (VALUES ('abc  '), ('abc '), ('x'), ('abc')) AS t(c1)
+-- !query schema
+struct<listagg(DISTINCT collate(c1, unicode_rtrim), NULL):string collate UNICODE_RTRIM>
+-- !query output
+abc  x
+
+
+-- !query
+SELECT listagg(c1) WITHIN GROUP (ORDER BY c1) FROM (VALUES ('abc  '), ('abc '), ('abc\n'), ('abc'), ('x')) AS t(c1)
+-- !query schema
+struct<listagg(c1, NULL) WITHIN GROUP (ORDER BY c1 ASC NULLS FIRST):string>
+-- !query output
+abcabc
+abc abc  x
+
+
+-- !query
+SELECT listagg(c1) WITHIN GROUP (ORDER BY c1 COLLATE unicode_rtrim) FROM (VALUES ('abc  '), ('abc '), ('abc\n'), ('abc'), ('x')) AS t(c1)
+-- !query schema
+struct<listagg(c1, NULL) WITHIN GROUP (ORDER BY collate(c1, unicode_rtrim) ASC NULLS FIRST):string>
+-- !query output
+abc  abc abcabc
+x
+
+
+-- !query
+SELECT listagg(DISTINCT c1 COLLATE utf8_lcase) WITHIN GROUP (ORDER BY c1 COLLATE utf8_binary) FROM (VALUES ('a'), ('b'), ('A'), ('B')) AS t(c1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.MISMATCH_WITH_DISTINCT_INPUT",
+  "sqlState" : "42K0K",
+  "messageParameters" : {
+    "funcArg" : "\"collate(c1, utf8_lcase)\"",
+    "funcName" : "`listagg`",
+    "orderingExpr" : "\"collate(c1, utf8_binary)\""
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/listagg.sql.out b/sql/core/src/test/resources/sql-tests/results/listagg.sql.out
new file mode 100644
index 0000000000000..4dce4cfc858d7
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/listagg.sql.out
@@ -0,0 +1,368 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMP VIEW df AS
+SELECT * FROM (VALUES ('a', 'b'), ('a', 'c'), ('b', 'c'), ('b', 'd'), (NULL, NULL)) AS t(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TEMP VIEW df2 AS
+SELECT * FROM (VALUES (1, true), (2, false), (3, false)) AS t(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT listagg(b) FROM df GROUP BY a
+-- !query schema
+struct<listagg(b, NULL):string>
+-- !query output
+NULL
+bc
+cd
+
+
+-- !query
+SELECT string_agg(b) FROM df GROUP BY a
+-- !query schema
+struct<string_agg(b, NULL):string>
+-- !query output
+NULL
+bc
+cd
+
+
+-- !query
+SELECT listagg(b, NULL) FROM df GROUP BY a
+-- !query schema
+struct<listagg(b, NULL):string>
+-- !query output
+NULL
+bc
+cd
+
+
+-- !query
+SELECT listagg(b) FROM df WHERE 1 != 1
+-- !query schema
+struct<listagg(b, NULL):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT listagg(b, '|') FROM df GROUP BY a
+-- !query schema
+struct<listagg(b, |):string>
+-- !query output
+NULL
+b|c
+c|d
+
+
+-- !query
+SELECT listagg(a) FROM df
+-- !query schema
+struct<listagg(a, NULL):string>
+-- !query output
+aabb
+
+
+-- !query
+SELECT listagg(DISTINCT a) FROM df
+-- !query schema
+struct<listagg(DISTINCT a, NULL):string>
+-- !query output
+ab
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY a) FROM df
+-- !query schema
+struct<listagg(a, NULL) WITHIN GROUP (ORDER BY a ASC NULLS FIRST):string>
+-- !query output
+aabb
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY a DESC) FROM df
+-- !query schema
+struct<listagg(a, NULL) WITHIN GROUP (ORDER BY a DESC NULLS LAST):string>
+-- !query output
+bbaa
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY a DESC) OVER (PARTITION BY b) FROM df
+-- !query schema
+struct<listagg(a, NULL) WITHIN GROUP (ORDER BY a DESC NULLS LAST) OVER (PARTITION BY b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):string>
+-- !query output
+NULL
+a
+b
+ba
+ba
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY b) FROM df
+-- !query schema
+struct<listagg(a, NULL) WITHIN GROUP (ORDER BY b ASC NULLS FIRST):string>
+-- !query output
+aabb
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY b DESC) FROM df
+-- !query schema
+struct<listagg(a, NULL) WITHIN GROUP (ORDER BY b DESC NULLS LAST):string>
+-- !query output
+baba
+
+
+-- !query
+SELECT listagg(a, '|') WITHIN GROUP (ORDER BY b DESC) FROM df
+-- !query schema
+struct<listagg(a, |) WITHIN GROUP (ORDER BY b DESC NULLS LAST):string>
+-- !query output
+b|a|b|a
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY b DESC, a ASC) FROM df
+-- !query schema
+struct<listagg(a, NULL) WITHIN GROUP (ORDER BY b DESC NULLS LAST, a ASC NULLS FIRST):string>
+-- !query output
+baba
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY b DESC, a DESC) FROM df
+-- !query schema
+struct<listagg(a, NULL) WITHIN GROUP (ORDER BY b DESC NULLS LAST, a DESC NULLS LAST):string>
+-- !query output
+bbaa
+
+
+-- !query
+SELECT listagg(c1) FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1)
+-- !query schema
+struct<listagg(c1, NULL):binary>
+-- !query output
+ޭ��
+
+
+-- !query
+SELECT listagg(c1, NULL) FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1)
+-- !query schema
+struct<listagg(c1, NULL):binary>
+-- !query output
+ޭ��
+
+
+-- !query
+SELECT listagg(c1, X'42') FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1)
+-- !query schema
+struct<listagg(c1, X'42'):binary>
+-- !query output
+ޭB��
+
+
+-- !query
+SELECT listagg(a), listagg(b, ',') FROM df2
+-- !query schema
+struct<listagg(a, NULL):string,listagg(b, ,):string>
+-- !query output
+123	true,false,false
+
+
+-- !query
+SELECT listagg(c1) FROM (VALUES (ARRAY('a', 'b'))) AS t(c1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"c1\"",
+    "inputType" : "\"ARRAY<STRING>\"",
+    "paramIndex" : "first",
+    "requiredType" : "(\"STRING\" or \"BINARY\")",
+    "sqlExpr" : "\"listagg(c1, NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "listagg(c1)"
+  } ]
+}
+
+
+-- !query
+SELECT listagg(c1, ', ') FROM (VALUES (X'DEAD'), (X'BEEF')) AS t(c1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"BINARY\" or \"STRING\")",
+    "functionName" : "`listagg`",
+    "sqlExpr" : "\"listagg(c1, , )\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "listagg(c1, ', ')"
+  } ]
+}
+
+
+-- !query
+SELECT listagg(b, a) FROM df GROUP BY a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"a\"",
+    "inputName" : "`delimiter`",
+    "inputType" : "\"STRING\"",
+    "sqlExpr" : "\"listagg(b, a)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "listagg(b, a)"
+  } ]
+}
+
+
+-- !query
+SELECT listagg(a) OVER (ORDER BY a) FROM df
+-- !query schema
+struct<listagg(a, NULL) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):string>
+-- !query output
+NULL
+aa
+aa
+aabb
+aabb
+
+
+-- !query
+SELECT listagg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a) FROM df
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "INVALID_WINDOW_SPEC_FOR_AGGREGATION_FUNC",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "aggFunc" : "\"listagg(a, NULL, a ASC NULLS FIRST)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "listagg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a)"
+  } ]
+}
+
+
+-- !query
+SELECT string_agg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a) FROM df
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "INVALID_WINDOW_SPEC_FOR_AGGREGATION_FUNC",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "aggFunc" : "\"listagg(a, NULL, a ASC NULLS FIRST)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 64,
+    "fragment" : "string_agg(a) WITHIN GROUP (ORDER BY a) OVER (ORDER BY a)"
+  } ]
+}
+
+
+-- !query
+SELECT listagg(DISTINCT a) OVER (ORDER BY a) FROM df
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DISTINCT_WINDOW_FUNCTION_UNSUPPORTED",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "windowExpr" : "\"listagg(DISTINCT a, NULL) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "listagg(DISTINCT a) OVER (ORDER BY a)"
+  } ]
+}
+
+
+-- !query
+SELECT listagg(DISTINCT a) WITHIN GROUP (ORDER BY b) FROM df
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.MISMATCH_WITH_DISTINCT_INPUT",
+  "sqlState" : "42K0K",
+  "messageParameters" : {
+    "funcArg" : "\"a\"",
+    "funcName" : "`listagg`",
+    "orderingExpr" : "\"b\""
+  }
+}
+
+
+-- !query
+SELECT listagg(DISTINCT a) WITHIN GROUP (ORDER BY a, b) FROM df
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.MISMATCH_WITH_DISTINCT_INPUT",
+  "sqlState" : "42K0K",
+  "messageParameters" : {
+    "funcArg" : "\"a\"",
+    "funcName" : "`listagg`",
+    "orderingExpr" : "\"a\", \"b\""
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/mode.sql.out b/sql/core/src/test/resources/sql-tests/results/mode.sql.out
index 77f008b6b0204..d5ab4509102b9 100644
--- a/sql/core/src/test/resources/sql-tests/results/mode.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/mode.sql.out
@@ -51,7 +51,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.DISTINCT_UNSUPPORTED",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`mode`"
@@ -373,7 +373,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.DISTINCT_UNSUPPORTED",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`mode`"
@@ -397,7 +397,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WITHIN_GROUP_MISSING",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.WITHIN_GROUP_MISSING",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`mode`"
@@ -421,7 +421,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WRONG_NUM_ORDERINGS",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.WRONG_NUM_ORDERINGS",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "actualNum" : "1",
diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out
index 6cbfe519a76f6..4d702588ad2b3 100644
--- a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out
@@ -170,6 +170,7 @@ IS	false
 ITEMS	false
 ITERATE	false
 JOIN	false
+JSON	false
 KEYS	false
 LANGUAGE	false
 LAST	false
@@ -252,6 +253,7 @@ REAL	false
 RECORDREADER	false
 RECORDWRITER	false
 RECOVER	false
+RECURSIVE	false
 REDUCE	false
 REFERENCES	false
 REFRESH	false
diff --git a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
index 6f73e928e2345..5f052c8ff22c5 100644
--- a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
@@ -222,7 +222,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.DISTINCT_UNSUPPORTED",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`percentile_cont`"
@@ -246,7 +246,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.DISTINCT_UNSUPPORTED",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.DISTINCT_UNSUPPORTED",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`percentile_cont`"
@@ -324,7 +324,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WITHIN_GROUP_MISSING",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.WITHIN_GROUP_MISSING",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`percentile_cont`"
@@ -348,7 +348,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WITHIN_GROUP_MISSING",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.WITHIN_GROUP_MISSING",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "funcName" : "`percentile_cont`"
@@ -372,7 +372,7 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "INVALID_INVERSE_DISTRIBUTION_FUNCTION.WRONG_NUM_ORDERINGS",
+  "condition" : "INVALID_WITHIN_GROUP_EXPRESSION.WRONG_NUM_ORDERINGS",
   "sqlState" : "42K0K",
   "messageParameters" : {
     "actualNum" : "2",
diff --git a/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out b/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out
index 7ac81c6671a1c..8473fe0cec8ca 100644
--- a/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out
@@ -232,6 +232,136 @@ struct<>
 
 
 
+-- !query
+from t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+from t
+|> select 1 as x
+-- !query schema
+struct<x:int>
+-- !query output
+1
+1
+
+
+-- !query
+from t as t_alias
+|> select t_alias.x
+-- !query schema
+struct<x:int>
+-- !query output
+0
+1
+
+
+-- !query
+from t as t_alias
+|> select t_alias.x as tx, t_alias.y as ty
+|> where ty = 'def'
+|> select tx
+-- !query schema
+struct<tx:int>
+-- !query output
+1
+
+
+-- !query
+from t, other
+|> select t.x + other.a as z
+-- !query schema
+struct<z:int>
+-- !query output
+1
+1
+2
+2
+2
+3
+
+
+-- !query
+from t join other on (t.x = other.a)
+|> select t.x + other.a as z
+-- !query schema
+struct<z:int>
+-- !query output
+2
+2
+
+
+-- !query
+from t lateral view explode(array(100, 101)) as ly
+|> select t.x + ly as z
+-- !query schema
+struct<z:int>
+-- !query output
+100
+101
+101
+102
+
+
+-- !query
+from st
+|> select col.i1
+-- !query schema
+struct<i1:int>
+-- !query output
+2
+
+
+-- !query
+from st as st_alias
+|> select st_alias.col.i1
+-- !query schema
+struct<i1:int>
+-- !query output
+2
+
+
+-- !query
+from values (0), (1) tab(col)
+|> select col as x
+-- !query schema
+struct<x:int>
+-- !query output
+0
+1
+
+
+-- !query
+from t
+|> from t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'from'",
+    "hint" : ""
+  }
+}
+
+
 -- !query
 table t
 |> select 1 as x
@@ -511,7 +641,7 @@ struct<x:int,y:string,z:int>
 table t
 |> extend 1
 -- !query schema
-struct<x:int,y:string,pipeexpression(1):int>
+struct<x:int,y:string,1:int>
 -- !query output
 0	abc	1
 1	def	1
@@ -538,51 +668,532 @@ struct<x:int,y:string,z:int>
 
 
 -- !query
-table t
-|> extend x + length(y) as z, x + 1 as zz
+table t
+|> extend x + length(y) as z, x + 1 as zz
+-- !query schema
+struct<x:int,y:string,z:int,zz:int>
+-- !query output
+0	abc	3	1
+1	def	4	2
+
+
+-- !query
+table t
+|> extend x + length(y) as z
+|> extend z + 1 as zz
+-- !query schema
+struct<x:int,y:string,z:int,zz:int>
+-- !query output
+0	abc	3	4
+1	def	4	5
+
+
+-- !query
+select col from st
+|> extend col.i1 as z
+-- !query schema
+struct<col:struct<i1:int,i2:int>,z:int>
+-- !query output
+{"i1":2,"i2":3}	2
+
+
+-- !query
+table t
+|> extend (select a from other where x = a limit 1) as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	NULL
+1	def	1
+
+
+-- !query
+table t
+|> where exists (
+    table other
+    |> extend t.x
+    |> select * except (a, b))
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> extend 1 as x
+-- !query schema
+struct<x:int,y:string,x:int>
+-- !query output
+0	abc	1
+1	def	1
+
+
+-- !query
+table t
+|> extend first_value(x) over (partition by y) as result
+-- !query schema
+struct<x:int,y:string,result:int>
+-- !query output
+0	abc	0
+1	def	1
+
+
+-- !query
+table t
+|> extend x + length(y) as z, z + 1 as plus_one
+-- !query schema
+struct<x:int,y:string,z:int,plus_one:int>
+-- !query output
+0	abc	3	4
+1	def	4	5
+
+
+-- !query
+table t
+|> extend sum(x) as z
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "EXTEND",
+    "expr" : "sum(x#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 19,
+    "stopIndex" : 24,
+    "fragment" : "sum(x)"
+  } ]
+}
+
+
+-- !query
+table t
+|> extend distinct x as z
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'as'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table t
+|> extend *
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "INVALID_USAGE_OF_STAR_OR_REGEX",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "elem" : "'*'",
+    "prettyName" : "expression `pipeexpression`"
+  }
+}
+
+
+-- !query
+table t
+|> set x = 1
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+1	abc
+1	def
+
+
+-- !query
+table t
+|> set y = x
+-- !query schema
+struct<x:int,y:int>
+-- !query output
+0	0
+1	1
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = x + length(y)
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	3
+1	def	4
+
+
+-- !query
+table t
+|> extend 1 as z
+|> extend 2 as zz
+|> set z = x + length(y), zz = x + 1
+-- !query schema
+struct<x:int,y:string,z:int,zz:int>
+-- !query output
+0	abc	3	1
+1	def	4	2
+
+
+-- !query
+table other
+|> extend 3 as c
+|> set a = b, b = c
+-- !query schema
+struct<a:int,b:int,c:int>
+-- !query output
+1	3	3
+2	3	3
+4	3	3
+
+
+-- !query
+table t
+|> extend 1 as z
+|> extend 2 as zz
+|> set z = x + length(y), zz = z + 1
+-- !query schema
+struct<x:int,y:string,z:int,zz:int>
+-- !query output
+0	abc	3	4
+1	def	4	5
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = x + length(y)
+|> set z = z + 1
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	4
+1	def	5
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = x + length(y), z = z + 1
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	4
+1	def	5
+
+
+-- !query
+select col from st
+|> extend 1 as z
+|> set z = col.i1
+-- !query schema
+struct<col:struct<i1:int,i2:int>,z:int>
+-- !query output
+{"i1":2,"i2":3}	2
+
+
+-- !query
+table t
+|> set y = (select a from other where x = a limit 1)
+-- !query schema
+struct<x:int,y:int>
+-- !query output
+0	NULL
+1	1
+
+
+-- !query
+table t
+|> extend 1 as `x.y.z`
+|> set `x.y.z` = x + length(y)
+-- !query schema
+struct<x:int,y:string,x.y.z:int>
+-- !query output
+0	abc	3
+1	def	4
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = first_value(x) over (partition by y)
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	0
+1	def	1
+
+
+-- !query
+values (0), (1) lhs(a)
+|> inner join values (1), (2) rhs(a) using (a)
+|> extend lhs.a + rhs.a as z1
+|> extend lhs.a - rhs.a as z2
+|> drop z1
+|> where z2 = 0
+|> order by lhs.a, rhs.a, z2
+|> set z2 = 4
+|> limit 2
+|> select lhs.a, rhs.a, z2
+-- !query schema
+struct<a:int,a:int,z2:int>
+-- !query output
+1	1	4
+
+
+-- !query
+table t
+|> set z = 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`z`",
+    "proposal" : "`x`, `y`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 20,
+    "fragment" : "table t\n|> set z = 1"
+  } ]
+}
+
+
+-- !query
+table t
+|> set x = 1 as z
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'as'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+select col from st
+|> set col.i1 = 42
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "_LEGACY_ERROR_TEMP_0035",
+  "messageParameters" : {
+    "message" : "SQL pipe syntax |> SET operator with multi-part assignment key (only single-part keys are allowed)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 37,
+    "fragment" : "col.i1 = 42"
+  } ]
+}
+
+
+-- !query
+table t
+|> drop y
+-- !query schema
+struct<x:int>
+-- !query output
+0
+1
+
+
+-- !query
+select 1 as x, 2 as y, 3 as z
+|> drop z, y
+-- !query schema
+struct<x:int>
+-- !query output
+1
+
+
+-- !query
+select 1 as x, 2 as y, 3 as z
+|> drop z
+|> drop y
+-- !query schema
+struct<x:int>
+-- !query output
+1
+
+
+-- !query
+select x from t
+|> drop x
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+table t
+|> extend 1 as `x.y.z`
+|> drop `x.y.z`
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> drop z
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`z`",
+    "proposal" : "`x`, `y`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 17,
+    "fragment" : "table t\n|> drop z"
+  } ]
+}
+
+
+-- !query
+table st
+|> drop col.i1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'.'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table st
+|> drop `col.i1`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`col.i1`",
+    "proposal" : "`col`, `x`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 25,
+    "fragment" : "table st\n|> drop `col.i1`"
+  } ]
+}
+
+
+-- !query
+select 1 as x, 2 as y, 3 as z
+|> drop z, y, z
 -- !query schema
-struct<x:int,y:string,z:int,zz:int>
+struct<>
 -- !query output
-0	abc	3	1
-1	def	4	2
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "EXCEPT_OVERLAPPING_COLUMNS",
+  "sqlState" : "42702",
+  "messageParameters" : {
+    "columns" : "z, y, z"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 45,
+    "fragment" : "select 1 as x, 2 as y, 3 as z\n|> drop z, y, z"
+  } ]
+}
 
 
 -- !query
 table t
-|> extend x + length(y) as z
-|> extend z + 1 as zz
+|> as u
+|> select u.x, u.y
 -- !query schema
-struct<x:int,y:string,z:int,zz:int>
+struct<x:int,y:string>
 -- !query output
-0	abc	3	4
-1	def	4	5
+0	abc
+1	def
 
 
 -- !query
-select col from st
-|> extend col.i1 as z
+select 1 as x, 2 as y
+|> as u
+|> select u.x, u.y
 -- !query schema
-struct<col:struct<i1:int,i2:int>,z:int>
+struct<x:int,y:int>
 -- !query output
-{"i1":2,"i2":3}	2
+1	2
 
 
 -- !query
 table t
-|> extend (select a from other where x = a limit 1) as z
+|> as `u.v`
+|> select `u.v`.x, `u.v`.y
 -- !query schema
-struct<x:int,y:string,z:int>
+struct<x:int,y:string>
 -- !query output
-0	abc	NULL
-1	def	1
+0	abc
+1	def
 
 
 -- !query
 table t
-|> where exists (
-    table other
-    |> extend t.x
-    |> select * except (a, b))
+|> as u
+|> as v
+|> select v.x, v.y
 -- !query schema
 struct<x:int,y:string>
 -- !query output
@@ -592,61 +1203,67 @@ struct<x:int,y:string>
 
 -- !query
 table t
-|> extend 1 as x
+|> as u
+|> where u.x = 1
 -- !query schema
-struct<x:int,y:string,x:int>
+struct<x:int,y:string>
 -- !query output
-0	abc	1
-1	def	1
+1	def
 
 
 -- !query
 table t
-|> extend first_value(x) over (partition by y) as result
+|> as u, v
 -- !query schema
-struct<x:int,y:string,result:int>
+struct<>
 -- !query output
-0	abc	0
-1	def	1
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "','",
+    "hint" : ""
+  }
+}
 
 
 -- !query
 table t
-|> extend x + length(y) as z, z + 1 as plus_one
+|> as 1 + 2
 -- !query schema
-struct<x:int,y:string,z:int,plus_one:int>
+struct<>
 -- !query output
-0	abc	3	4
-1	def	4	5
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'1'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
 table t
-|> extend sum(x) as z
+|> as u-v
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "condition" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
-  "sqlState" : "0A000",
+  "condition" : "INVALID_IDENTIFIER",
+  "sqlState" : "42602",
   "messageParameters" : {
-    "clause" : "EXTEND",
-    "expr" : "sum(x#x)"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 19,
-    "stopIndex" : 24,
-    "fragment" : "sum(x)"
-  } ]
+    "ident" : "u-v"
+  }
 }
 
 
 -- !query
 table t
-|> extend distinct x as z
+|> as u@v
 -- !query schema
 struct<>
 -- !query output
@@ -655,7 +1272,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
   "condition" : "PARSE_SYNTAX_ERROR",
   "sqlState" : "42601",
   "messageParameters" : {
-    "error" : "'as'",
+    "error" : "'@'",
     "hint" : ""
   }
 }
@@ -663,17 +1280,17 @@ org.apache.spark.sql.catalyst.parser.ParseException
 
 -- !query
 table t
-|> extend *
+|> as u#######v
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "condition" : "INVALID_USAGE_OF_STAR_OR_REGEX",
-  "sqlState" : "42000",
+  "condition" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
   "messageParameters" : {
-    "elem" : "'*'",
-    "prettyName" : "expression `pipeexpression`"
+    "error" : "'#'",
+    "hint" : ""
   }
 }
 
@@ -881,7 +1498,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42703",
   "messageParameters" : {
     "objectName" : "`y`",
-    "proposal" : "`x`, `z`"
+    "proposal" : "`z`, `spark_catalog`.`default`.`t`.`x`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -893,6 +1510,84 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+table t
+|> select x, length(y) as z
+|> limit 1000
+|> where x + length(y) < 4
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`y`",
+    "proposal" : "`z`, `spark_catalog`.`default`.`t`.`x`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 71,
+    "stopIndex" : 71,
+    "fragment" : "y"
+  } ]
+}
+
+
+-- !query
+table t
+|> select x, length(y) as z
+|> limit 1000 offset 1
+|> where x + length(y) < 4
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`y`",
+    "proposal" : "`z`, `spark_catalog`.`default`.`t`.`x`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 80,
+    "stopIndex" : 80,
+    "fragment" : "y"
+  } ]
+}
+
+
+-- !query
+table t
+|> select x, length(y) as z
+|> order by x, y
+|> where x + length(y) < 4
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`y`",
+    "proposal" : "`z`, `spark_catalog`.`default`.`t`.`x`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 52,
+    "stopIndex" : 52,
+    "fragment" : "y"
+  } ]
+}
+
+
 -- !query
 (select x, sum(length(y)) as sum_len from t group by x)
 |> where sum(length(y)) = 3
@@ -905,7 +1600,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42703",
   "messageParameters" : {
     "objectName" : "`y`",
-    "proposal" : "`x`, `sum_len`"
+    "proposal" : "`sum_len`, `spark_catalog`.`default`.`t`.`x`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1762,29 +2457,25 @@ struct<x:int,y:string>
 
 
 -- !query
-values (0, 1) tab(x, y)
+values (2, 'xyz') tab(x, y)
 |> union table t
 |> where x = 0
 -- !query schema
-struct<>
+struct<x:int,y:string>
 -- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "condition" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "expression" : "'abc'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BIGINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 1,
-    "stopIndex" : 55,
-    "fragment" : "values (0, 1) tab(x, y)\n|> union table t\n|> where x = 0"
-  } ]
-}
+0	abc
+
+
+-- !query
+values (2, 'xyz') tab(x, y)
+|> union table t
+|> drop x
+-- !query schema
+struct<y:string>
+-- !query output
+abc
+def
+xyz
 
 
 -- !query
@@ -2179,16 +2870,107 @@ struct<x:int,y:int>
 select 3 as x, 4 as y
 |> aggregate group by 1, 2
 -- !query schema
-struct<1:int,2:int>
+struct<x:int,y:int>
+-- !query output
+3	4
+
+
+-- !query
+values (3, 4) as tab(x, y)
+|> aggregate sum(y) group by 1
+-- !query schema
+struct<x:int,sum(y):bigint>
+-- !query output
+3	4
+
+
+-- !query
+values (3, 4), (5, 4) as tab(x, y)
+|> aggregate sum(y) group by 1
+-- !query schema
+struct<x:int,sum(y):bigint>
+-- !query output
+3	4
+5	4
+
+
+-- !query
+select 3 as x, 4 as y
+|> aggregate sum(y) group by 1, 1
+-- !query schema
+struct<x:int,x:int,sum(y):bigint>
+-- !query output
+3	3	4
+
+
+-- !query
+select 1 as `1`, 2 as `2`
+|> aggregate sum(`2`) group by `1`
+-- !query schema
+struct<1:int,sum(2):bigint>
 -- !query output
 1	2
 
 
+-- !query
+select 3 as x, 4 as y
+|> aggregate sum(y) group by 2
+-- !query schema
+struct<y:int,sum(y):bigint>
+-- !query output
+4	4
+
+
+-- !query
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by 2
+-- !query schema
+struct<y:int,sum(y):bigint>
+-- !query output
+4	4
+
+
+-- !query
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by 3
+-- !query schema
+struct<z:int,sum(y):bigint>
+-- !query output
+5	4
+
+
+-- !query
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by 2, 3
+-- !query schema
+struct<y:int,z:int,sum(y):bigint>
+-- !query output
+4	5	4
+
+
+-- !query
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by 1, 2, 3
+-- !query schema
+struct<x:int,y:int,z:int,sum(y):bigint>
+-- !query output
+3	4	5	4
+
+
+-- !query
+select 3 as x, 4 as y, 5 as z
+|> aggregate sum(y) group by x, 2, 3
+-- !query schema
+struct<x:int,y:int,z:int,sum(y):bigint>
+-- !query output
+3	4	5	4
+
+
 -- !query
 table t
 |> aggregate sum(x)
 -- !query schema
-struct<pipeexpression(sum(x)):bigint>
+struct<sum(x):bigint>
 -- !query output
 1
 
@@ -2264,7 +3046,7 @@ struct<x:int>
 table other
 |> aggregate a + count(b) group by a
 -- !query schema
-struct<a:int,pipeexpression((a + count(b))):bigint>
+struct<a:int,(a + count(b)):bigint>
 -- !query output
 1	3
 2	3
@@ -2576,7 +3358,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
   "condition" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
   "sqlState" : "0A000",
   "messageParameters" : {
-    "case" : "window functions"
+    "case" : "window functions; please update the query to move the window functions to a subsequent |> SELECT operator instead"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2846,6 +3628,565 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+with customer_total_return as
+(select
+    sr_customer_sk as ctr_customer_sk,
+    sr_store_sk as ctr_store_sk,
+    sum(sr_return_amt) as ctr_total_return
+  from store_returns, date_dim
+  where sr_returned_date_sk = d_date_sk and d_year = 2000
+  group by sr_customer_sk, sr_store_sk)
+select c_customer_id
+from customer_total_return ctr1, store, customer
+where ctr1.ctr_total_return >
+  (select avg(ctr_total_return) * 1.2
+  from customer_total_return ctr2
+  where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+  and s_store_sk = ctr1.ctr_store_sk
+  and s_state = 'tn'
+  and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+-- !query schema
+struct<c_customer_id:string>
+-- !query output
+
+
+
+-- !query
+with customer_total_return as
+  (from store_returns
+  |> join date_dim
+  |> where sr_returned_date_sk = d_date_sk and d_year = 2000
+  |> aggregate sum(sr_return_amt) as ctr_total_return
+       group by sr_customer_sk as ctr_customer_sk, sr_store_sk as ctr_store_sk)
+from customer_total_return ctr1
+|> join store
+|> join customer
+|> where ctr1.ctr_total_return >
+     (table customer_total_return
+      |> as ctr2
+      |> where ctr1.ctr_store_sk = ctr2.ctr_store_sk
+      |> aggregate avg(ctr_total_return) * 1.2)
+     and s_store_sk = ctr1.ctr_store_sk
+     and s_state = 'tn'
+     and ctr1.ctr_customer_sk = c_customer_sk
+|> order by c_customer_id
+|> limit 100
+|> select c_customer_id
+-- !query schema
+struct<c_customer_id:string>
+-- !query output
+
+
+
+-- !query
+with wscs as
+( select
+    sold_date_sk,
+    sales_price
+  from (select
+    ws_sold_date_sk sold_date_sk,
+    ws_ext_sales_price sales_price
+  from web_sales) x
+  union all
+  (select
+    cs_sold_date_sk sold_date_sk,
+    cs_ext_sales_price sales_price
+  from catalog_sales)),
+    wswscs as
+  ( select
+    d_week_seq,
+    sum(case when (d_day_name = 'sunday')
+      then sales_price
+        else null end)
+    sun_sales,
+    sum(case when (d_day_name = 'monday')
+      then sales_price
+        else null end)
+    mon_sales,
+    sum(case when (d_day_name = 'tuesday')
+      then sales_price
+        else null end)
+    tue_sales,
+    sum(case when (d_day_name = 'wednesday')
+      then sales_price
+        else null end)
+    wed_sales,
+    sum(case when (d_day_name = 'thursday')
+      then sales_price
+        else null end)
+    thu_sales,
+    sum(case when (d_day_name = 'friday')
+      then sales_price
+        else null end)
+    fri_sales,
+    sum(case when (d_day_name = 'saturday')
+      then sales_price
+        else null end)
+    sat_sales
+  from wscs, date_dim
+  where d_date_sk = sold_date_sk
+  group by d_week_seq)
+select
+  d_week_seq1,
+  round(sun_sales1 / sun_sales2, 2),
+  round(mon_sales1 / mon_sales2, 2),
+  round(tue_sales1 / tue_sales2, 2),
+  round(wed_sales1 / wed_sales2, 2),
+  round(thu_sales1 / thu_sales2, 2),
+  round(fri_sales1 / fri_sales2, 2),
+  round(sat_sales1 / sat_sales2, 2)
+from
+  (select
+    wswscs.d_week_seq d_week_seq1,
+    sun_sales sun_sales1,
+    mon_sales mon_sales1,
+    tue_sales tue_sales1,
+    wed_sales wed_sales1,
+    thu_sales thu_sales1,
+    fri_sales fri_sales1,
+    sat_sales sat_sales1
+  from wswscs, date_dim
+  where date_dim.d_week_seq = wswscs.d_week_seq and d_year = 2001) y,
+  (select
+    wswscs.d_week_seq d_week_seq2,
+    sun_sales sun_sales2,
+    mon_sales mon_sales2,
+    tue_sales tue_sales2,
+    wed_sales wed_sales2,
+    thu_sales thu_sales2,
+    fri_sales fri_sales2,
+    sat_sales sat_sales2
+  from wswscs, date_dim
+  where date_dim.d_week_seq = wswscs.d_week_seq and d_year = 2001 + 1) z
+where d_week_seq1 = d_week_seq2 - 53
+order by d_week_seq1
+-- !query schema
+struct<d_week_seq1:int,round((sun_sales1 / sun_sales2), 2):decimal(20,2),round((mon_sales1 / mon_sales2), 2):decimal(20,2),round((tue_sales1 / tue_sales2), 2):decimal(20,2),round((wed_sales1 / wed_sales2), 2):decimal(20,2),round((thu_sales1 / thu_sales2), 2):decimal(20,2),round((fri_sales1 / fri_sales2), 2):decimal(20,2),round((sat_sales1 / sat_sales2), 2):decimal(20,2)>
+-- !query output
+
+
+
+-- !query
+with wscs as
+  (table web_sales
+  |> select
+       ws_sold_date_sk sold_date_sk,
+       ws_ext_sales_price sales_price
+  |> as x
+  |> union all (
+       table catalog_sales
+       |> select
+            cs_sold_date_sk sold_date_sk,
+            cs_ext_sales_price sales_price)
+  |> select
+       sold_date_sk,
+       sales_price),
+wswscs as
+  (table wscs
+  |> join date_dim
+  |> where d_date_sk = sold_date_sk
+  |> aggregate
+      sum(case when (d_day_name = 'sunday')
+        then sales_price
+          else null end)
+      sun_sales,
+      sum(case when (d_day_name = 'monday')
+        then sales_price
+          else null end)
+      mon_sales,
+      sum(case when (d_day_name = 'tuesday')
+        then sales_price
+          else null end)
+      tue_sales,
+      sum(case when (d_day_name = 'wednesday')
+        then sales_price
+          else null end)
+      wed_sales,
+      sum(case when (d_day_name = 'thursday')
+        then sales_price
+          else null end)
+      thu_sales,
+      sum(case when (d_day_name = 'friday')
+        then sales_price
+          else null end)
+      fri_sales,
+      sum(case when (d_day_name = 'saturday')
+        then sales_price
+          else null end)
+      sat_sales
+      group by d_week_seq)
+table wswscs
+|> join date_dim
+|> where date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001
+|> select
+     wswscs.d_week_seq d_week_seq1,
+     sun_sales sun_sales1,
+     mon_sales mon_sales1,
+     tue_sales tue_sales1,
+     wed_sales wed_sales1,
+     thu_sales thu_sales1,
+     fri_sales fri_sales1,
+     sat_sales sat_sales1
+|> as y
+|> join (
+     table wswscs
+     |> join date_dim
+     |> where date_dim.d_week_seq = wswscs.d_week_seq AND d_year = 2001 + 1
+     |> select
+          wswscs.d_week_seq d_week_seq2,
+          sun_sales sun_sales2,
+          mon_sales mon_sales2,
+          tue_sales tue_sales2,
+          wed_sales wed_sales2,
+          thu_sales thu_sales2,
+          fri_sales fri_sales2,
+          sat_sales sat_sales2
+     |> as z)
+|> where d_week_seq1 = d_week_seq2 - 53
+|> order by d_week_seq1
+|> select
+     d_week_seq1,
+     round(sun_sales1 / sun_sales2, 2),
+     round(mon_sales1 / mon_sales2, 2),
+     round(tue_sales1 / tue_sales2, 2),
+     round(wed_sales1 / wed_sales2, 2),
+     round(thu_sales1 / thu_sales2, 2),
+     round(fri_sales1 / fri_sales2, 2),
+     round(sat_sales1 / sat_sales2, 2)
+-- !query schema
+struct<d_week_seq1:int,round((sun_sales1 / sun_sales2), 2):decimal(20,2),round((mon_sales1 / mon_sales2), 2):decimal(20,2),round((tue_sales1 / tue_sales2), 2):decimal(20,2),round((wed_sales1 / wed_sales2), 2):decimal(20,2),round((thu_sales1 / thu_sales2), 2):decimal(20,2),round((fri_sales1 / fri_sales2), 2):decimal(20,2),round((sat_sales1 / sat_sales2), 2):decimal(20,2)>
+-- !query output
+
+
+
+-- !query
+select
+  dt.d_year,
+  item.i_brand_id brand_id,
+  item.i_brand brand,
+  sum(ss_ext_sales_price) sum_agg
+from date_dim dt, store_sales, item
+where dt.d_date_sk = store_sales.ss_sold_date_sk
+  and store_sales.ss_item_sk = item.i_item_sk
+  and item.i_manufact_id = 128
+  and dt.d_moy = 11
+group by dt.d_year, item.i_brand, item.i_brand_id
+order by dt.d_year, sum_agg desc, brand_id
+limit 100
+-- !query schema
+struct<d_year:int,brand_id:int,brand:string,sum_agg:decimal(17,2)>
+-- !query output
+
+
+
+-- !query
+table date_dim
+|> as dt
+|> join store_sales
+|> join item
+|> where dt.d_date_sk = store_sales.ss_sold_date_sk
+     and store_sales.ss_item_sk = item.i_item_sk
+     and item.i_manufact_id = 128
+     and dt.d_moy = 11
+|> aggregate sum(ss_ext_sales_price) sum_agg
+     group by dt.d_year d_year, item.i_brand_id brand_id, item.i_brand brand
+|> order by d_year, sum_agg desc, brand_id
+|> limit 100
+-- !query schema
+struct<d_year:int,brand_id:int,brand:string,sum_agg:decimal(17,2)>
+-- !query output
+
+
+
+-- !query
+select
+  i_item_desc,
+  i_category,
+  i_class,
+  i_current_price,
+  sum(ws_ext_sales_price) as itemrevenue,
+  sum(ws_ext_sales_price) * 100 / sum(sum(ws_ext_sales_price))
+  over
+  (partition by i_class) as revenueratio
+from
+  web_sales, item, date_dim
+where
+  ws_item_sk = i_item_sk
+    and i_category in ('sports', 'books', 'home')
+    and ws_sold_date_sk = d_date_sk
+    and d_date between cast('1999-02-22' as date)
+  and (cast('1999-02-22' as date) + interval 30 days)
+group by
+  i_item_id, i_item_desc, i_category, i_class, i_current_price
+order by
+  i_category, i_class, i_item_id, i_item_desc, revenueratio
+limit 100
+-- !query schema
+struct<i_item_desc:string,i_category:string,i_class:string,i_current_price:decimal(7,2),itemrevenue:decimal(17,2),revenueratio:decimal(38,17)>
+-- !query output
+
+
+
+-- !query
+table web_sales
+|> join item
+|> join date_dim
+|> where ws_item_sk = i_item_sk
+     and i_category in ('sports', 'books', 'home')
+     and ws_sold_date_sk = d_date_sk
+     and d_date between cast('1999-02-22' as date)
+     and (cast('1999-02-22' as date) + interval 30 days)
+|> aggregate sum(ws_ext_sales_price) AS itemrevenue
+     group by i_item_id, i_item_desc, i_category, i_class, i_current_price
+|> extend
+     itemrevenue * 100 / sum(itemrevenue)
+       over (partition by i_class) as revenueratio
+|> order by i_category, i_class, i_item_id, i_item_desc, revenueratio
+|> select i_item_desc, i_category, i_class, i_current_price, itemrevenue, revenueratio
+|> limit 100
+-- !query schema
+struct<i_item_desc:string,i_category:string,i_class:string,i_current_price:decimal(7,2),itemrevenue:decimal(17,2),revenueratio:decimal(38,17)>
+-- !query output
+
+
+
+-- !query
+select
+  asceding.rnk,
+  i1.i_product_name best_performing,
+  i2.i_product_name worst_performing
+from (select *
+from (select
+  item_sk,
+  rank()
+  over (
+    order by rank_col asc) rnk
+from (select
+  ss_item_sk item_sk,
+  avg(ss_net_profit) rank_col
+from store_sales ss1
+where ss_store_sk = 4
+group by ss_item_sk
+having avg(ss_net_profit) > 0.9 * (select avg(ss_net_profit) rank_col
+from store_sales
+where ss_store_sk = 4
+  and ss_addr_sk is null
+group by ss_store_sk)) v1) v11
+where rnk < 11) asceding,
+  (select *
+  from (select
+    item_sk,
+    rank()
+    over (
+      order by rank_col desc) rnk
+  from (select
+    ss_item_sk item_sk,
+    avg(ss_net_profit) rank_col
+  from store_sales ss1
+  where ss_store_sk = 4
+  group by ss_item_sk
+  having avg(ss_net_profit) > 0.9 * (select avg(ss_net_profit) rank_col
+  from store_sales
+  where ss_store_sk = 4
+    and ss_addr_sk is null
+  group by ss_store_sk)) v2) v21
+  where rnk < 11) descending,
+  item i1, item i2
+where asceding.rnk = descending.rnk
+  and i1.i_item_sk = asceding.item_sk
+  and i2.i_item_sk = descending.item_sk
+order by asceding.rnk
+limit 100
+-- !query schema
+struct<rnk:int,best_performing:string,worst_performing:string>
+-- !query output
+
+
+
+-- !query
+from store_sales ss1
+|> where ss_store_sk = 4
+|> aggregate avg(ss_net_profit) rank_col
+     group by ss_item_sk as item_sk
+|> where rank_col > 0.9 * (
+     from store_sales
+     |> where ss_store_sk = 4
+          and ss_addr_sk is null
+     |> aggregate avg(ss_net_profit) rank_col
+          group by ss_store_sk
+     |> select rank_col)
+|> as v1
+|> select
+     item_sk,
+     rank() over (
+       order by rank_col asc) rnk
+|> as v11
+|> where rnk < 11
+|> as asceding
+|> join (
+     from store_sales ss1
+     |> where ss_store_sk = 4
+     |> aggregate avg(ss_net_profit) rank_col
+          group by ss_item_sk as item_sk
+     |> where rank_col > 0.9 * (
+          table store_sales
+          |> where ss_store_sk = 4
+               and ss_addr_sk is null
+          |> aggregate avg(ss_net_profit) rank_col
+               group by ss_store_sk
+          |> select rank_col)
+     |> as v2
+     |> select
+          item_sk,
+          rank() over (
+            order by rank_col asc) rnk
+     |> as v21
+     |> where rnk < 11) descending
+|> join item i1
+|> join item i2
+|> where asceding.rnk = descending.rnk
+     and i1.i_item_sk = asceding.item_sk
+     and i2.i_item_sk = descending.item_sk
+|> order by asceding.rnk
+|> select
+     asceding.rnk,
+     i1.i_product_name best_performing,
+     i2.i_product_name worst_performing
+-- !query schema
+struct<rnk:int,best_performing:string,worst_performing:string>
+-- !query output
+
+
+
+-- !query
+with web_v1 as (
+  select
+    ws_item_sk item_sk,
+    d_date,
+    sum(sum(ws_sales_price))
+    over (partition by ws_item_sk
+      order by d_date
+      rows between unbounded preceding and current row) cume_sales
+  from web_sales, date_dim
+  where ws_sold_date_sk = d_date_sk
+    and d_month_seq between 1200 and 1200 + 11
+    and ws_item_sk is not null
+  group by ws_item_sk, d_date),
+    store_v1 as (
+    select
+      ss_item_sk item_sk,
+      d_date,
+      sum(sum(ss_sales_price))
+      over (partition by ss_item_sk
+        order by d_date
+        rows between unbounded preceding and current row) cume_sales
+    from store_sales, date_dim
+    where ss_sold_date_sk = d_date_sk
+      and d_month_seq between 1200 and 1200 + 11
+      and ss_item_sk is not null
+    group by ss_item_sk, d_date)
+select *
+from (select
+  item_sk,
+  d_date,
+  web_sales,
+  store_sales,
+  max(web_sales)
+  over (partition by item_sk
+    order by d_date
+    rows between unbounded preceding and current row) web_cumulative,
+  max(store_sales)
+  over (partition by item_sk
+    order by d_date
+    rows between unbounded preceding and current row) store_cumulative
+from (select
+  case when web.item_sk is not null
+    then web.item_sk
+  else store.item_sk end item_sk,
+  case when web.d_date is not null
+    then web.d_date
+  else store.d_date end d_date,
+  web.cume_sales web_sales,
+  store.cume_sales store_sales
+from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk
+  and web.d_date = store.d_date)
+     ) x) y
+where web_cumulative > store_cumulative
+order by item_sk, d_date
+limit 100
+-- !query schema
+struct<item_sk:int,d_date:date,web_sales:decimal(27,2),store_sales:decimal(27,2),web_cumulative:decimal(27,2),store_cumulative:decimal(27,2)>
+-- !query output
+
+
+
+-- !query
+with web_v1 as (
+  table web_sales
+  |> join date_dim
+  |> where ws_sold_date_sk = d_date_sk
+       and d_month_seq between 1200 and 1200 + 11
+       and ws_item_sk is not null
+  |> aggregate sum(ws_sales_price) as sum_ws_sales_price
+       group by ws_item_sk as item_sk, d_date
+  |> extend sum(sum_ws_sales_price)
+       over (partition by item_sk
+         order by d_date
+         rows between unbounded preceding and current row)
+       as cume_sales),
+store_v1 as (
+  table store_sales
+  |> join date_dim
+  |> where ss_sold_date_sk = d_date_sk
+       and d_month_seq between 1200 and 1200 + 11
+       and ss_item_sk is not null
+  |> aggregate sum(ss_sales_price) as sum_ss_sales_price
+       group by ss_item_sk as item_sk, d_date
+  |> extend sum(sum_ss_sales_price)
+       over (partition by item_sk
+           order by d_date
+           rows between unbounded preceding and current row)
+       as cume_sales)
+table web_v1
+|> as web
+|> full outer join store_v1 store
+     on (web.item_sk = store.item_sk and web.d_date = store.d_date)
+|> select
+     case when web.item_sk is not null
+       then web.item_sk
+       else store.item_sk end item_sk,
+     case when web.d_date is not null
+       then web.d_date
+       else store.d_date end d_date,
+     web.cume_sales web_sales,
+     store.cume_sales store_sales
+|> as x
+|> select
+     item_sk,
+     d_date,
+     web_sales,
+     store_sales,
+     max(web_sales)
+       over (partition by item_sk
+         order by d_date
+         rows between unbounded preceding and current row) web_cumulative,
+     max(store_sales)
+       over (partition by item_sk
+         order by d_date
+         rows between unbounded preceding and current row) store_cumulative
+|> as y
+|> where web_cumulative > store_cumulative
+|> order by item_sk, d_date
+|> limit 100
+-- !query schema
+struct<item_sk:int,d_date:date,web_sales:decimal(27,2),store_sales:decimal(27,2),web_cumulative:decimal(27,2),store_cumulative:decimal(27,2)>
+-- !query output
+
+
+
 -- !query
 drop table t
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
index d14161e93a9f0..2583d14b512ba 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
@@ -269,7 +269,7 @@ View Text           	SELECT * FROM base_table
 View Original Text  	SELECT * FROM base_table	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
-View Query Output Columns	[a, id]
+View Query Output Columns	[`a`, `id`]
 
 
 -- !query
@@ -335,7 +335,7 @@ View Text           	SELECT * FROM base_table
 View Original Text  	SELECT * FROM base_table	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
-View Query Output Columns	[a, id]
+View Query Output Columns	[`a`, `id`]
 
 
 -- !query
@@ -391,7 +391,7 @@ View Original Text  	SELECT t1.a AS t1_a, t2.a AS t2_a
     WHERE t1.id = t2.id	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
-View Query Output Columns	[t1_a, t2_a]
+View Query Output Columns	[`t1_a`, `t2_a`]
 
 
 -- !query
@@ -464,7 +464,7 @@ View Text           	SELECT * FROM base_table WHERE id IN (SELECT id FROM base_t
 View Original Text  	SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2)	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
-View Query Output Columns	[a, id]
+View Query Output Columns	[`a`, `id`]
 
 
 -- !query
@@ -495,7 +495,7 @@ View Text           	SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_
 View Original Text  	SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
-View Query Output Columns	[id, a]
+View Query Output Columns	[`id`, `a`]
 
 
 -- !query
@@ -526,7 +526,7 @@ View Text           	SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_t
 View Original Text  	SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2)	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
-View Query Output Columns	[a, id]
+View Query Output Columns	[`a`, `id`]
 
 
 -- !query
@@ -557,7 +557,7 @@ View Text           	SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM ba
 View Original Text  	SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2)	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
-View Query Output Columns	[a, id]
+View Query Output Columns	[`a`, `id`]
 
 
 -- !query
@@ -588,7 +588,7 @@ View Text           	SELECT * FROM base_table WHERE EXISTS (SELECT 1)
 View Original Text  	SELECT * FROM base_table WHERE EXISTS (SELECT 1)	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
-View Query Output Columns	[a, id]
+View Query Output Columns	[`a`, `id`]
 
 
 -- !query
@@ -800,7 +800,7 @@ View Text           	SELECT * FROM t1 CROSS JOIN t2
 View Original Text  	SELECT * FROM t1 CROSS JOIN t2	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
-View Query Output Columns	[num, name, num2, value]
+View Query Output Columns	[`num`, `name`, `num2`, `value`]
 
 
 -- !query
@@ -851,7 +851,7 @@ View Text           	SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2
 View Original Text  	SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
-View Query Output Columns	[num, name, num2, value]
+View Query Output Columns	[`num`, `name`, `num2`, `value`]
 
 
 -- !query
@@ -902,7 +902,7 @@ View Text           	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2
 View Original Text  	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
-View Query Output Columns	[num, name, num2, value]
+View Query Output Columns	[`num`, `name`, `num2`, `value`]
 
 
 -- !query
@@ -953,7 +953,7 @@ View Text           	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.va
 View Original Text  	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx'	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
-View Query Output Columns	[num, name, num2, value]
+View Query Output Columns	[`num`, `name`, `num2`, `value`]
 
 
 -- !query
@@ -1074,7 +1074,7 @@ BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
 AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
-View Query Output Columns	[a, b]
+View Query Output Columns	[`a`, `b`]
 
 
 -- !query
@@ -1114,7 +1114,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
 AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j)	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
-View Query Output Columns	[a, b]
+View Query Output Columns	[`a`, `b`]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/random.sql.out b/sql/core/src/test/resources/sql-tests/results/random.sql.out
index d0bc5afe463dd..0f6f8dcb47561 100644
--- a/sql/core/src/test/resources/sql-tests/results/random.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/random.sql.out
@@ -147,6 +147,22 @@ struct<result:bigint>
 7
 
 
+-- !query
+SELECT uniform(0, cast(10 as tinyint), 0) AS result
+-- !query schema
+struct<result:int>
+-- !query output
+7
+
+
+-- !query
+SELECT uniform(0, cast(10 as smallint), 0) AS result
+-- !query schema
+struct<result:int>
+-- !query output
+7
+
+
 -- !query
 SELECT uniform(0, 10S, 0) AS result
 -- !query schema
@@ -171,6 +187,38 @@ struct<result:float>
 17.604954
 
 
+-- !query
+SELECT uniform(cast(10 as decimal(10, 3)), cast(20 as decimal(10, 3)), 0) AS result
+-- !query schema
+struct<result:decimal(10,3)>
+-- !query output
+17.605
+
+
+-- !query
+SELECT uniform(cast(10 as decimal(10, 3)), cast(20 as decimal(11, 4)), 0) AS result
+-- !query schema
+struct<result:decimal(11,4)>
+-- !query output
+17.6050
+
+
+-- !query
+SELECT uniform(10, cast(20 as decimal(10, 3)), 0) AS result
+-- !query schema
+struct<result:decimal(10,3)>
+-- !query output
+17.605
+
+
+-- !query
+SELECT uniform(cast(10 as decimal(10, 3)), 20, 0) AS result
+-- !query schema
+struct<result:decimal(10,3)>
+-- !query output
+17.605
+
+
 -- !query
 SELECT uniform(10.0D, 20.0D, CAST(3 / 7 AS LONG)) AS result
 -- !query schema
@@ -205,10 +253,50 @@ struct<result:boolean>
 true
 
 
+-- !query
+SELECT uniform(-10L, 10L, 0) AS result
+-- !query schema
+struct<result:bigint>
+-- !query output
+5
+
+
+-- !query
+SELECT uniform(-20L, -10L, 0) AS result
+-- !query schema
+struct<result:bigint>
+-- !query output
+-12
+
+
+-- !query
+SELECT uniform(-20L, -10L, -10) AS result
+-- !query schema
+struct<result:bigint>
+-- !query output
+-17
+
+
 -- !query
 SELECT uniform(NULL, 1, 0) AS result
 -- !query schema
-struct<result:void>
+struct<result:double>
+-- !query output
+NULL
+
+
+-- !query
+SELECT uniform(cast(NULL AS int), 1, 0) AS result
+-- !query schema
+struct<result:int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT uniform(cast(NULL AS float), 1, 0) AS result
+-- !query schema
+struct<result:float>
 -- !query output
 NULL
 
@@ -216,7 +304,23 @@ NULL
 -- !query
 SELECT uniform(0, NULL, 0) AS result
 -- !query schema
-struct<result:void>
+struct<result:double>
+-- !query output
+NULL
+
+
+-- !query
+SELECT uniform(0, cast(NULL AS int), 0) AS result
+-- !query schema
+struct<result:int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT uniform(0, cast(NULL AS float), 0) AS result
+-- !query schema
+struct<result:float>
 -- !query output
 NULL
 
@@ -224,11 +328,61 @@ NULL
 -- !query
 SELECT uniform(0, 1, NULL) AS result
 -- !query schema
-struct<result:void>
+struct<result:int>
+-- !query output
+0
+
+
+-- !query
+SELECT uniform(NULL, NULL, 0) AS result
+-- !query schema
+struct<result:double>
+-- !query output
+NULL
+
+
+-- !query
+SELECT uniform(NULL, NULL, NULL) AS result
+-- !query schema
+struct<result:double>
 -- !query output
 NULL
 
 
+-- !query
+SELECT uniform(0, 1, cast(NULL as int)) AS result
+-- !query schema
+struct<result:int>
+-- !query output
+0
+
+
+-- !query
+SELECT uniform(0, 1, cast(NULL as float)) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(NULL AS FLOAT)\"",
+    "inputType" : "\"FLOAT\"",
+    "paramIndex" : "third",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"uniform(0, 1, CAST(NULL AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "uniform(0, 1, cast(NULL as float))"
+  } ]
+}
+
+
 -- !query
 SELECT uniform(10, 20, col) AS result FROM VALUES (0), (1), (2) tab(col)
 -- !query schema
@@ -330,57 +484,33 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT randstr(1, 0) AS result
--- !query schema
-struct<result:string>
--- !query output
-c
-
-
--- !query
-SELECT randstr(5, 0) AS result
--- !query schema
-struct<result:string>
--- !query output
-ceV0P
-
-
--- !query
-SELECT randstr(10, 0) AS result
--- !query schema
-struct<result:string>
--- !query output
-ceV0PXaR2I
-
-
--- !query
-SELECT randstr(10S, 0) AS result
+SELECT uniform(10.0F, 20.0F, 0.0F) AS result
 -- !query schema
-struct<result:string>
--- !query output
-ceV0PXaR2I
-
-
--- !query
-SELECT randstr(10, 0) AS result FROM VALUES (0), (1), (2) tab(col)
--- !query schema
-struct<result:string>
--- !query output
-ceV0PXaR2I
-fYxVfArnv7
-iSIv0VT2XL
-
-
--- !query
-SELECT randstr(10) IS NOT NULL AS result
--- !query schema
-struct<result:boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"0.0\"",
+    "inputType" : "\"FLOAT\"",
+    "paramIndex" : "third",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"uniform(10.0, 20.0, 0.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "uniform(10.0F, 20.0F, 0.0F)"
+  } ]
+}
 
 
 -- !query
-SELECT randstr(10L, 0) AS result
+SELECT uniform(10.0F, 20.0F, 0.0D) AS result
 -- !query schema
 struct<>
 -- !query output
@@ -389,24 +519,24 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"10\"",
-    "inputType" : "\"BIGINT\"",
-    "paramIndex" : "first",
-    "requiredType" : "INT or SMALLINT",
-    "sqlExpr" : "\"randstr(10, 0)\""
+    "inputSql" : "\"0.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "third",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"uniform(10.0, 20.0, 0.0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 22,
-    "fragment" : "randstr(10L, 0)"
+    "stopIndex" : 34,
+    "fragment" : "uniform(10.0F, 20.0F, 0.0D)"
   } ]
 }
 
 
 -- !query
-SELECT randstr(10.0F, 0) AS result
+SELECT uniform(cast(10 as decimal(10, 3)), cast(20 as decimal(10, 3)), cast(0 as decimal(10, 3)))
 -- !query schema
 struct<>
 -- !query output
@@ -415,24 +545,24 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"10.0\"",
-    "inputType" : "\"FLOAT\"",
-    "paramIndex" : "first",
-    "requiredType" : "INT or SMALLINT",
-    "sqlExpr" : "\"randstr(10.0, 0)\""
+    "inputSql" : "\"CAST(0 AS DECIMAL(10,3))\"",
+    "inputType" : "\"DECIMAL(10,3)\"",
+    "paramIndex" : "third",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"uniform(CAST(10 AS DECIMAL(10,3)), CAST(20 AS DECIMAL(10,3)), CAST(0 AS DECIMAL(10,3)))\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 24,
-    "fragment" : "randstr(10.0F, 0)"
+    "stopIndex" : 97,
+    "fragment" : "uniform(cast(10 as decimal(10, 3)), cast(20 as decimal(10, 3)), cast(0 as decimal(10, 3)))"
   } ]
 }
 
 
 -- !query
-SELECT randstr(10.0D, 0) AS result
+SELECT uniform('abc', 10, 0) AS result
 -- !query schema
 struct<>
 -- !query output
@@ -441,24 +571,24 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"10.0\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputSql" : "\"abc\"",
+    "inputType" : "\"STRING\"",
     "paramIndex" : "first",
-    "requiredType" : "INT or SMALLINT",
-    "sqlExpr" : "\"randstr(10.0, 0)\""
+    "requiredType" : "\"NUMERIC\"",
+    "sqlExpr" : "\"uniform(abc, 10, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 24,
-    "fragment" : "randstr(10.0D, 0)"
+    "stopIndex" : 28,
+    "fragment" : "uniform('abc', 10, 0)"
   } ]
 }
 
 
 -- !query
-SELECT randstr(NULL, 0) AS result
+SELECT uniform(0, 'def', 0) AS result
 -- !query schema
 struct<>
 -- !query output
@@ -467,24 +597,24 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"NULL\"",
-    "inputType" : "\"VOID\"",
-    "paramIndex" : "first",
-    "requiredType" : "INT or SMALLINT",
-    "sqlExpr" : "\"randstr(NULL, 0)\""
+    "inputSql" : "\"def\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "\"NUMERIC\"",
+    "sqlExpr" : "\"uniform(0, def, 0)\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 23,
-    "fragment" : "randstr(NULL, 0)"
+    "stopIndex" : 27,
+    "fragment" : "uniform(0, 'def', 0)"
   } ]
 }
 
 
 -- !query
-SELECT randstr(0, NULL) AS result
+SELECT uniform(0, 10, 'ghi') AS result
 -- !query schema
 struct<>
 -- !query output
@@ -493,22 +623,160 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"NULL\"",
-    "inputType" : "\"VOID\"",
-    "paramIndex" : "second",
-    "requiredType" : "INT or SMALLINT",
-    "sqlExpr" : "\"randstr(0, NULL)\""
+    "inputSql" : "\"ghi\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "third",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"uniform(0, 10, ghi)\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 23,
-    "fragment" : "randstr(0, NULL)"
+    "stopIndex" : 28,
+    "fragment" : "uniform(0, 10, 'ghi')"
   } ]
 }
 
 
+-- !query
+SELECT randstr(1, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+c
+
+
+-- !query
+SELECT randstr(5, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0P
+
+
+-- !query
+SELECT randstr(10, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+
+
+-- !query
+SELECT randstr(10S, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+
+
+-- !query
+SELECT randstr(CAST(10 AS TINYINT), 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+
+
+-- !query
+SELECT randstr(CAST(10 AS BIGINT), 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+
+
+-- !query
+SELECT randstr(1.0F, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+c
+
+
+-- !query
+SELECT randstr(1.0D, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+c
+
+
+-- !query
+SELECT randstr(cast(1 AS DECIMAL(10, 2)), 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+c
+
+
+-- !query
+SELECT randstr(10, 0) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+fYxVfArnv7
+iSIv0VT2XL
+
+
+-- !query
+SELECT randstr(10) IS NOT NULL AS result
+-- !query schema
+struct<result:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT randstr(1, -1) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+S
+
+
+-- !query
+SELECT randstr(10L, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+
+
+-- !query
+SELECT randstr(10.0F, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+
+
+-- !query
+SELECT randstr(10.0D, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+
+
+-- !query
+SELECT randstr(NULL, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+
+
+
+-- !query
+SELECT randstr(0, NULL) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+
+
+
 -- !query
 SELECT randstr(col, 0) AS result FROM VALUES (0), (1), (2) tab(col)
 -- !query schema
@@ -521,7 +789,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "messageParameters" : {
     "inputExpr" : "\"col\"",
     "inputName" : "`length`",
-    "inputType" : "INT or SMALLINT",
+    "inputType" : "integer",
     "sqlExpr" : "\"randstr(col, 0)\""
   },
   "queryContext" : [ {
@@ -546,7 +814,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "messageParameters" : {
     "inputExpr" : "\"col\"",
     "inputName" : "`seed`",
-    "inputType" : "INT or SMALLINT",
+    "inputType" : "integer",
     "sqlExpr" : "\"randstr(10, col)\""
   },
   "queryContext" : [ {
@@ -582,3 +850,72 @@ org.apache.spark.sql.AnalysisException
     "fragment" : "randstr(10, 0, 1)"
   } ]
 }
+
+
+-- !query
+SELECT randstr(-1, 0) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "condition" : "INVALID_PARAMETER_VALUE.LENGTH",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "functionName" : "`randstr`",
+    "length" : "-1",
+    "parameter" : "`length`"
+  }
+}
+
+
+-- !query
+SELECT randstr(10, "a") AS result FROM VALUES (0) tab(a)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"a\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"randstr(10, a)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "randstr(10, \"a\")"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(10, 1.5) AS result FROM VALUES (0) tab(a)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.5\"",
+    "inputType" : "\"DECIMAL(2,1)\"",
+    "paramIndex" : "second",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"randstr(10, 1.5)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "randstr(10, 1.5)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index 4ecf65d0cc51a..0911efe3e09c6 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -249,10 +249,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "condition" : "_LEGACY_ERROR_TEMP_1231",
+  "condition" : "PARTITIONS_NOT_FOUND",
+  "sqlState" : "428FT",
   "messageParameters" : {
-    "key" : "a",
-    "tblName" : "`spark_catalog`.`showdb`.`show_t1`"
+    "partitionList" : "`a`",
+    "tableName" : "`spark_catalog`.`showdb`.`show_t1`"
   }
 }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out
index 9b9ac5a9edd55..13f43167c4dda 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out
@@ -257,3 +257,27 @@ DROP DATABASE sql_on_files
 struct<>
 -- !query output
 
+
+
+-- !query
+SELECT * FROM json.`https://raw.githubusercontent.com/apache/spark/refs/heads/master/examples/src/main/resources/employees.json`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "FAILED_READ_FILE.UNSUPPORTED_FILE_SYSTEM",
+  "sqlState" : "KD001",
+  "messageParameters" : {
+    "fileSystemClass" : "org.apache.hadoop.fs.http.HttpsFileSystem",
+    "method" : "listStatus",
+    "path" : "https://raw.githubusercontent.com/apache/spark/refs/heads/master/examples/src/main/resources/employees.json"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 128,
+    "fragment" : "json.`https://raw.githubusercontent.com/apache/spark/refs/heads/master/examples/src/main/resources/employees.json`"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
new file mode 100644
index 0000000000000..08f2d75cce9d7
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/sql-udf.sql.out
@@ -0,0 +1,484 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE FUNCTION foo1a0() RETURNS INT RETURN 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1a0()
+-- !query schema
+struct<spark_catalog.default.foo1a0():int>
+-- !query output
+1
+
+
+-- !query
+SELECT foo1a0(1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "0",
+    "functionName" : "`spark_catalog`.`default`.`foo1a0`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "foo1a0(1)"
+  } ]
+}
+
+
+-- !query
+CREATE FUNCTION foo1a1(a INT) RETURNS INT RETURN 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1a1(1)
+-- !query schema
+struct<spark_catalog.default.foo1a1(1):int>
+-- !query output
+1
+
+
+-- !query
+SELECT foo1a1(1, 2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "2",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "1",
+    "functionName" : "`spark_catalog`.`default`.`foo1a1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "foo1a1(1, 2)"
+  } ]
+}
+
+
+-- !query
+CREATE FUNCTION foo1a2(a INT, b INT, c INT, d INT) RETURNS INT RETURN 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo1a2(1, 2, 3, 4)
+-- !query schema
+struct<spark_catalog.default.foo1a2(1, 2, 3, 4):int>
+-- !query output
+1
+
+
+-- !query
+CREATE FUNCTION foo2_1a(a INT) RETURNS INT RETURN a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo2_1a(5)
+-- !query schema
+struct<spark_catalog.default.foo2_1a(5):int>
+-- !query output
+5
+
+
+-- !query
+CREATE FUNCTION foo2_1b(a INT, b INT) RETURNS INT RETURN a + b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo2_1b(5, 6)
+-- !query schema
+struct<spark_catalog.default.foo2_1b(5, 6):int>
+-- !query output
+11
+
+
+-- !query
+CREATE FUNCTION foo2_1c(a INT, b INT) RETURNS INT RETURN 10 * (a + b) + 100 * (a -b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo2_1c(5, 6)
+-- !query schema
+struct<spark_catalog.default.foo2_1c(5, 6):int>
+-- !query output
+10
+
+
+-- !query
+CREATE FUNCTION foo2_1d(a INT, b INT) RETURNS INT RETURN ABS(a) - LENGTH(CAST(b AS VARCHAR(10)))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo2_1d(-5, 6)
+-- !query schema
+struct<spark_catalog.default.foo2_1d(-5, 6):int>
+-- !query output
+4
+
+
+-- !query
+CREATE FUNCTION foo2_2a(a INT) RETURNS INT RETURN SELECT a
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo2_2a(5)
+-- !query schema
+struct<spark_catalog.default.foo2_2a(5):int>
+-- !query output
+5
+
+
+-- !query
+CREATE FUNCTION foo2_2b(a INT) RETURNS INT RETURN 1 + (SELECT a)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo2_2b(5)
+-- !query schema
+struct<spark_catalog.default.foo2_2b(5):int>
+-- !query output
+6
+
+
+-- !query
+CREATE FUNCTION foo2_2c(a INT) RETURNS INT RETURN 1 + (SELECT (SELECT a))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`a`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 21,
+    "stopIndex" : 21,
+    "fragment" : "a"
+  } ]
+}
+
+
+-- !query
+CREATE FUNCTION foo2_2d(a INT) RETURNS INT RETURN 1 + (SELECT (SELECT (SELECT (SELECT a))))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`a`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 37,
+    "stopIndex" : 37,
+    "fragment" : "a"
+  } ]
+}
+
+
+-- !query
+CREATE FUNCTION foo2_2e(a INT) RETURNS INT RETURN
+SELECT a FROM (VALUES 1) AS V(c1) WHERE c1 = 2
+UNION ALL
+SELECT a + 1 FROM (VALUES 1) AS V(c1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE FUNCTION foo2_2f(a INT) RETURNS INT RETURN
+SELECT a FROM (VALUES 1) AS V(c1)
+EXCEPT
+SELECT a + 1 FROM (VALUES 1) AS V(a)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE FUNCTION foo2_2g(a INT) RETURNS INT RETURN
+SELECT a FROM (VALUES 1) AS V(c1)
+INTERSECT
+SELECT a FROM (VALUES 1) AS V(a)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS ts
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS tm
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS ta
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS V1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS V2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW IF EXISTS t1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW IF EXISTS t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW IF EXISTS ts
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW IF EXISTS tm
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW IF EXISTS ta
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW IF EXISTS V1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW IF EXISTS V2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE FUNCTION foo2_3(a INT, b INT) RETURNS INT RETURN a + b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW V1(c1, c2) AS VALUES (1, 2), (3, 4), (5, 6)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW V2(c1, c2) AS VALUES (-1, -2), (-3, -4), (-5, -6)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo2_3(c1, c2), foo2_3(c2, 1), foo2_3(c1, c2) - foo2_3(c2, c1 - 1) FROM V1 ORDER BY 1, 2, 3
+-- !query schema
+struct<spark_catalog.default.foo2_3(c1, c2):int,spark_catalog.default.foo2_3(c2, 1):int,(spark_catalog.default.foo2_3(c1, c2) - spark_catalog.default.foo2_3(c2, (c1 - 1))):int>
+-- !query output
+3	3	1
+7	5	1
+11	7	1
+
+
+-- !query
+SELECT * FROM V1 WHERE foo2_3(c1, 0) = c1 AND foo2_3(c1, c2) < 8
+-- !query schema
+struct<c1:int,c2:int>
+-- !query output
+1	2
+3	4
+
+
+-- !query
+SELECT foo2_3(SUM(c1), SUM(c2)), SUM(c1) + SUM(c2), SUM(foo2_3(c1, c2) + foo2_3(c2, c1) - foo2_3(c2, c1))
+FROM V1
+-- !query schema
+struct<spark_catalog.default.foo2_3(sum(c1), sum(c2)):int,(sum(c1) + sum(c2)):bigint,sum(((spark_catalog.default.foo2_3(c1, c2) + spark_catalog.default.foo2_3(c2, c1)) - spark_catalog.default.foo2_3(c2, c1))):bigint>
+-- !query output
+21	21	21
+
+
+-- !query
+CREATE FUNCTION foo2_4a(a ARRAY<STRING>) RETURNS STRING RETURN
+SELECT array_sort(a, (i, j) -> rank[i] - rank[j])[0] FROM (SELECT MAP('a', 1, 'b', 2) rank)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo2_4a(ARRAY('a', 'b'))
+-- !query schema
+struct<spark_catalog.default.foo2_4a(array(a, b)):string>
+-- !query output
+a
+
+
+-- !query
+CREATE FUNCTION foo2_4b(m MAP<STRING, STRING>, k STRING) RETURNS STRING RETURN
+SELECT v || ' ' || v FROM (SELECT upper(m[k]) AS v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT foo2_4b(map('a', 'hello', 'b', 'world'), 'a')
+-- !query schema
+struct<spark_catalog.default.foo2_4b(map(a, hello, b, world), a):string>
+-- !query output
+HELLO HELLO
+
+
+-- !query
+DROP VIEW V2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW V1
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
index d6d4a411ef59e..6fe77cd062253 100644
--- a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
@@ -168,3 +168,49 @@ struct<a:int,b:string,c:int,d:decimal(2,1)>
 1	a	1	8.5
 2	b	2	1.0
 3	c	3	3.2
+
+
+-- !query
+SELECT src1.* FROM src1 a ORDER BY id LIMIT 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "condition" : "CANNOT_RESOLVE_STAR_EXPAND",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "columns" : "`id`, `v1`",
+    "targetString" : "`src1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 13,
+    "fragment" : "src1.*"
+  } ]
+}
+
+
+-- !query
+SELECT src1.id FROM (SELECT * FROM src1 ORDER BY id LIMIT 1) a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "condition" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`src1`.`id`",
+    "proposal" : "`a`.`id`, `a`.`v1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 14,
+    "fragment" : "src1.id"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
index 020b97baa8eee..432f77c93f553 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
@@ -395,7 +395,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '2019-10-06 10:11:12.' could not be parsed at index 20"
   }
 }
@@ -467,7 +467,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '2019-10-06 10:11:12.1234567PST' could not be parsed, unparsed text found at index 26"
   }
 }
@@ -491,7 +491,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '223456 2019-10-06 10:11:12.123456PST' could not be parsed at index 27"
   }
 }
@@ -563,7 +563,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '12.1232019-10-06S10:11' could not be parsed at index 7"
   }
 }
@@ -579,7 +579,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '12.1232019-10-06S10:11' could not be parsed at index 9"
   }
 }
@@ -659,7 +659,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Invalid date 'February 29' as '1970' is not a leap year"
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
index bf1e13a1a0239..b503287804bd4 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
@@ -409,7 +409,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '2019-10-06 10:11:12.' could not be parsed at index 20"
   }
 }
@@ -481,7 +481,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '2019-10-06 10:11:12.1234567PST' could not be parsed, unparsed text found at index 26"
   }
 }
@@ -505,7 +505,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '223456 2019-10-06 10:11:12.123456PST' could not be parsed at index 27"
   }
 }
@@ -577,7 +577,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '12.1232019-10-06S10:11' could not be parsed at index 7"
   }
 }
@@ -593,7 +593,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text '12.1232019-10-06S10:11' could not be parsed at index 9"
   }
 }
@@ -673,7 +673,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Invalid date 'February 29' as '1970' is not a leap year"
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
index f5dc87b7266de..01ba71a6a6782 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
@@ -376,7 +376,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text 'aa' could not be parsed at index 0"
   }
 }
@@ -409,7 +409,7 @@ org.apache.spark.SparkDateTimeException
   "condition" : "CANNOT_PARSE_TIMESTAMP",
   "sqlState" : "22007",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "func" : "`try_to_timestamp`",
     "message" : "Text 'aa' could not be parsed at index 0"
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out
index 30ba31e71cc92..d59fc412d3f53 100644
--- a/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out
@@ -137,7 +137,7 @@ Type                	VIEW
 View Text           	SELECT 1            	                    
 View Original Text  	SELECT 1            	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[1]
+View Query Output Columns	[`1`]
 
 
 -- !query
@@ -155,7 +155,7 @@ Type: VIEW
 View Text: SELECT 1
 View Original Text: SELECT 1
 View Catalog and Namespace: spark_catalog.default
-View Query Output Columns: [1]
+View Query Output Columns: [`1`]
 Schema: root
  |-- 1: integer (nullable = false)
 
@@ -206,7 +206,7 @@ Created By [not included in comparison]
 Type: VIEW
 View Text: SELECT 1
 View Catalog and Namespace: spark_catalog.default
-View Query Output Columns: [1]
+View Query Output Columns: [`1`]
 Schema: root
  |-- 1: integer (nullable = false)
 
@@ -269,7 +269,7 @@ Type                	VIEW
 View Text           	SELECT * FROM t     	                    
 View Original Text  	SELECT * FROM t     	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -324,7 +324,7 @@ Type                	VIEW
 View Text           	SELECT * FROM t     	                    
 View Original Text  	SELECT * FROM t     	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -402,7 +402,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	BINDING             	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -477,7 +477,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	BINDING             	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -550,7 +550,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -615,7 +615,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -680,7 +680,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -774,7 +774,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -837,7 +837,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -895,7 +895,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -953,7 +953,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -1007,7 +1007,7 @@ View Text           	SELECT 1
 View Original Text  	SELECT 1            	                    
 View Schema Mode    	BINDING             	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[1]
+View Query Output Columns	[`1`]
 
 
 -- !query
@@ -1026,7 +1026,7 @@ View Text: SELECT 1
 View Original Text: SELECT 1
 View Schema Mode: BINDING
 View Catalog and Namespace: spark_catalog.default
-View Query Output Columns: [1]
+View Query Output Columns: [`1`]
 Schema: root
  |-- 1: integer (nullable = false)
 
@@ -1069,7 +1069,7 @@ View Text           	SELECT 1
 View Original Text  	SELECT 1            	                    
 View Schema Mode    	BINDING             	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[1]
+View Query Output Columns	[`1`]
 
 
 -- !query
@@ -1088,7 +1088,7 @@ View Text: SELECT 1
 View Original Text: SELECT 1
 View Schema Mode: BINDING
 View Catalog and Namespace: spark_catalog.default
-View Query Output Columns: [1]
+View Query Output Columns: [`1`]
 Schema: root
  |-- 1: integer (nullable = false)
 
@@ -1165,7 +1165,7 @@ Type: VIEW
 View Text: SELECT 1
 View Schema Mode: BINDING
 View Catalog and Namespace: spark_catalog.default
-View Query Output Columns: [1]
+View Query Output Columns: [`1`]
 Schema: root
  |-- 1: integer (nullable = false)
 
@@ -1199,7 +1199,7 @@ Type: VIEW
 View Text: SELECT 1
 View Schema Mode: BINDING
 View Catalog and Namespace: spark_catalog.default
-View Query Output Columns: [1]
+View Query Output Columns: [`1`]
 Schema: root
  |-- 1: integer (nullable = false)
 
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-binding.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-binding.sql.out
index 7dd3d2114d99f..3e76ba1db9d0e 100644
--- a/sql/core/src/test/resources/sql-tests/results/view-schema-binding.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-binding.sql.out
@@ -50,7 +50,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	BINDING             	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -106,7 +106,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	BINDING             	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -161,7 +161,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	BINDING             	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -219,7 +219,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	BINDING             	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -281,7 +281,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	BINDING             	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -311,7 +311,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	BINDING             	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -367,7 +367,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	BINDING             	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-compensation.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-compensation.sql.out
index a8bfe0891f72f..330c151b051db 100644
--- a/sql/core/src/test/resources/sql-tests/results/view-schema-compensation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-compensation.sql.out
@@ -58,7 +58,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -112,7 +112,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -166,7 +166,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -260,7 +260,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -323,7 +323,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -381,7 +381,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -439,7 +439,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -493,7 +493,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	BINDING             	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -565,7 +565,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-evolution.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-evolution.sql.out
index 92cde7735c96f..0b49aafe04932 100644
--- a/sql/core/src/test/resources/sql-tests/results/view-schema-evolution.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-evolution.sql.out
@@ -59,7 +59,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	EVOLUTION           	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -114,7 +114,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	EVOLUTION           	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c4, c5]
+View Query Output Columns	[`c4`, `c5`]
 
 
 -- !query
@@ -170,7 +170,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	EVOLUTION           	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c4, c5, c6]
+View Query Output Columns	[`c4`, `c5`, `c6`]
 
 
 -- !query
@@ -233,7 +233,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	EVOLUTION           	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -279,7 +279,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	EVOLUTION           	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -342,7 +342,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -397,7 +397,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -452,7 +452,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -515,7 +515,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -573,7 +573,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -631,7 +631,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -678,7 +678,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -709,7 +709,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -756,7 +756,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -787,7 +787,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -834,7 +834,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -865,7 +865,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	EVOLUTION           	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -912,7 +912,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	EVOLUTION           	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -1094,7 +1094,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	EVOLUTION           	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-type-evolution.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-type-evolution.sql.out
index 57193c610c0a9..de0655750d503 100644
--- a/sql/core/src/test/resources/sql-tests/results/view-schema-type-evolution.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-type-evolution.sql.out
@@ -59,7 +59,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -114,7 +114,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -169,7 +169,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -232,7 +232,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -290,7 +290,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -348,7 +348,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -395,7 +395,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -442,7 +442,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -473,7 +473,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -520,7 +520,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1, c2]
+View Query Output Columns	[`c1`, `c2`]
 
 
 -- !query
@@ -606,7 +606,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
@@ -644,7 +644,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Schema Mode    	TYPE EVOLUTION      	                    
 View Catalog and Namespace	spark_catalog.default	                    
-View Query Output Columns	[c1]
+View Query Output Columns	[`c1`]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/commits/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/commits/.0.crc
new file mode 100644
index 0000000000000..dd09db7ad216c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/commits/.0.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/commits/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/commits/.1.crc
new file mode 100644
index 0000000000000..dd09db7ad216c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/commits/.1.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/commits/0
new file mode 100644
index 0000000000000..7e7f3b21c4e78
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0,"stateUniqueIds":{}}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/commits/1
new file mode 100644
index 0000000000000..7e7f3b21c4e78
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0,"stateUniqueIds":{}}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/metadata
new file mode 100644
index 0000000000000..6888983b0bc5d
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/metadata
@@ -0,0 +1 @@
+{"id":"f3f30619-9175-4329-97a7-f5629deaad89"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/offsets/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/offsets/.0.crc
new file mode 100644
index 0000000000000..400184017c910
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/offsets/.0.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/offsets/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/offsets/.1.crc
new file mode 100644
index 0000000000000..397dde18c6d5a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/offsets/.1.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/offsets/0
new file mode 100644
index 0000000000000..8177241a333b1
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1734074255407,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.stateStore.encodingFormat":"avro","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.databricks.sql.optimizer.pruneFiltersCanPruneStreamingSubplan":"false"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/offsets/1
new file mode 100644
index 0000000000000..cf51e39873cd2
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1734074257473,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.stateStore.encodingFormat":"avro","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.databricks.sql.optimizer.pruneFiltersCanPruneStreamingSubplan":"false"}}
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/.1.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/.1.changelog.crc
new file mode 100644
index 0000000000000..d03e8f51a6f7f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/.1.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/.1.zip.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/.1.zip.crc
new file mode 100644
index 0000000000000..9fe6838ba35e9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/.1.zip.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/.2.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/.2.changelog.crc
new file mode 100644
index 0000000000000..c790833be99a5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/.2.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/1.changelog
new file mode 100644
index 0000000000000..a579fe940633b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/1.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/1.zip
new file mode 100644
index 0000000000000..1f2b15b840c20
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/1.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/2.changelog
new file mode 100644
index 0000000000000..6df7672a3d0ec
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/SSTs/.000008-7b1ee246-6831-4c62-9fd7-7741cb534368.sst.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/SSTs/.000008-7b1ee246-6831-4c62-9fd7-7741cb534368.sst.crc
new file mode 100644
index 0000000000000..2026a6dcab3bf
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/SSTs/.000008-7b1ee246-6831-4c62-9fd7-7741cb534368.sst.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/SSTs/000008-7b1ee246-6831-4c62-9fd7-7741cb534368.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/SSTs/000008-7b1ee246-6831-4c62-9fd7-7741cb534368.sst
new file mode 100644
index 0000000000000..c4f2175a47371
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/0/SSTs/000008-7b1ee246-6831-4c62-9fd7-7741cb534368.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/.1.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/.1.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/.1.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/.1.zip.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/.1.zip.crc
new file mode 100644
index 0000000000000..0b52e7b4922a0
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/.1.zip.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/.2.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/.2.changelog.crc
new file mode 100644
index 0000000000000..889a96a47f0a3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/.2.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/1.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/1.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/1.zip
new file mode 100644
index 0000000000000..3986c42e7e5ef
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/1.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/2.changelog
new file mode 100644
index 0000000000000..13ec6c9cdd843
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/1/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/.1.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/.1.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/.1.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/.1.zip.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/.1.zip.crc
new file mode 100644
index 0000000000000..6e5b8098e6e40
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/.1.zip.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/.2.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/.2.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/.2.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/1.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/1.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/1.zip
new file mode 100644
index 0000000000000..59318e5f734e9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/1.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/2.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/2/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/.1.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/.1.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/.1.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/.1.zip.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/.1.zip.crc
new file mode 100644
index 0000000000000..4899bd7e696cb
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/.1.zip.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/.2.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/.2.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/.2.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/1.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/1.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/1.zip
new file mode 100644
index 0000000000000..29a07a94b5dbc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/1.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/2.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/3/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/.1.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/.1.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/.1.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/.1.zip.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/.1.zip.crc
new file mode 100644
index 0000000000000..846c94b6dfcfc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/.1.zip.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/.2.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/.2.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/.2.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/1.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/1.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/1.zip
new file mode 100644
index 0000000000000..38e6a75814585
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/1.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/2.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/4/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/_metadata/v2/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/_metadata/v2/.0.crc
new file mode 100644
index 0000000000000..9850f2cdfc9b6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/_metadata/v2/.0.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/_metadata/v2/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/_metadata/v2/0
new file mode 100644
index 0000000000000..5c53036530462
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/_metadata/v2/0
@@ -0,0 +1,2 @@
+v2
+{"operatorInfo":{"operatorId":0,"operatorName":"transformWithStateExec"},"stateStoreInfo":[{"storeName":"default","numColsPrefixKey":0,"numPartitions":5,"stateSchemaFilePath":"file:/Users/anish.shrigondekar/spark/spark/target/tmp/spark-dcaeba6f-ff09-4f91-ba1b-4d14fe53cc9f/state/0/_stateSchema/default/0_6b12d3c5-57e6-4001-8321-3ae63d6be7a0"}],"operatorPropertiesJson":"{\"timeMode\":\"NoTime\",\"outputMode\":\"Update\",\"stateVariables\":[{\"stateName\":\"countState\",\"stateVariableType\":\"ValueState\",\"ttlEnabled\":false}]}"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/_stateSchema/default/.0_6b12d3c5-57e6-4001-8321-3ae63d6be7a0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/_stateSchema/default/.0_6b12d3c5-57e6-4001-8321-3ae63d6be7a0.crc
new file mode 100644
index 0000000000000..3c16c8244a3b7
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/_stateSchema/default/.0_6b12d3c5-57e6-4001-8321-3ae63d6be7a0.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/_stateSchema/default/0_6b12d3c5-57e6-4001-8321-3ae63d6be7a0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/_stateSchema/default/0_6b12d3c5-57e6-4001-8321-3ae63d6be7a0
new file mode 100644
index 0000000000000..cd3e8f6d96bf5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-avro/state/0/_stateSchema/default/0_6b12d3c5-57e6-4001-8321-3ae63d6be7a0 differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/commits/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/commits/.0.crc
new file mode 100644
index 0000000000000..dd09db7ad216c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/commits/.0.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/commits/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/commits/.1.crc
new file mode 100644
index 0000000000000..dd09db7ad216c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/commits/.1.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/commits/0
new file mode 100644
index 0000000000000..7e7f3b21c4e78
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0,"stateUniqueIds":{}}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/commits/1
new file mode 100644
index 0000000000000..7e7f3b21c4e78
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0,"stateUniqueIds":{}}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/metadata
new file mode 100644
index 0000000000000..d236981545754
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/metadata
@@ -0,0 +1 @@
+{"id":"1341f9d1-5100-4426-876c-2754aeaca02b"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/offsets/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/offsets/.0.crc
new file mode 100644
index 0000000000000..15e5afc45b88a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/offsets/.0.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/offsets/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/offsets/.1.crc
new file mode 100644
index 0000000000000..de2378b2b16e9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/offsets/.1.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/offsets/0
new file mode 100644
index 0000000000000..2f2a25bda6322
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1734074067729,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.stateStore.encodingFormat":"unsaferow","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.databricks.sql.optimizer.pruneFiltersCanPruneStreamingSubplan":"false"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/offsets/1
new file mode 100644
index 0000000000000..3295f1a1579b9
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1734074071551,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.stateStore.encodingFormat":"unsaferow","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.databricks.sql.optimizer.pruneFiltersCanPruneStreamingSubplan":"false"}}
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/.1.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/.1.changelog.crc
new file mode 100644
index 0000000000000..a1ca2c3ed5c1c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/.1.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/.1.zip.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/.1.zip.crc
new file mode 100644
index 0000000000000..ad0d75698608c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/.1.zip.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/.2.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/.2.changelog.crc
new file mode 100644
index 0000000000000..4ad793ade5782
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/.2.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/1.changelog
new file mode 100644
index 0000000000000..21cc1e7055f47
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/1.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/1.zip
new file mode 100644
index 0000000000000..2c6fbb713b436
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/1.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/2.changelog
new file mode 100644
index 0000000000000..2375a971fdc20
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/SSTs/.000008-9b6e23ce-e7de-4df8-b320-2b0378b53e52.sst.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/SSTs/.000008-9b6e23ce-e7de-4df8-b320-2b0378b53e52.sst.crc
new file mode 100644
index 0000000000000..d72d0acb543e1
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/SSTs/.000008-9b6e23ce-e7de-4df8-b320-2b0378b53e52.sst.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/SSTs/000008-9b6e23ce-e7de-4df8-b320-2b0378b53e52.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/SSTs/000008-9b6e23ce-e7de-4df8-b320-2b0378b53e52.sst
new file mode 100644
index 0000000000000..bed4218de7ece
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/0/SSTs/000008-9b6e23ce-e7de-4df8-b320-2b0378b53e52.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/.1.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/.1.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/.1.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/.1.zip.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/.1.zip.crc
new file mode 100644
index 0000000000000..2e4c44069438a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/.1.zip.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/.2.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/.2.changelog.crc
new file mode 100644
index 0000000000000..57fb3ea9d4a67
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/.2.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/1.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/1.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/1.zip
new file mode 100644
index 0000000000000..85388bed9dc40
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/1.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/2.changelog
new file mode 100644
index 0000000000000..daf1c187e8b70
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/1/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/.1.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/.1.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/.1.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/.1.zip.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/.1.zip.crc
new file mode 100644
index 0000000000000..03b4665cea145
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/.1.zip.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/.2.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/.2.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/.2.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/1.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/1.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/1.zip
new file mode 100644
index 0000000000000..bf6c8277bc195
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/1.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/2.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/2/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/.1.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/.1.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/.1.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/.1.zip.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/.1.zip.crc
new file mode 100644
index 0000000000000..a28994f96defc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/.1.zip.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/.2.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/.2.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/.2.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/1.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/1.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/1.zip
new file mode 100644
index 0000000000000..f4b734fa36955
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/1.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/2.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/3/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/.1.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/.1.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/.1.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/.1.zip.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/.1.zip.crc
new file mode 100644
index 0000000000000..80ad867da34f3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/.1.zip.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/.2.changelog.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/.2.changelog.crc
new file mode 100644
index 0000000000000..22e87bcdbe201
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/.2.changelog.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/1.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/1.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/1.zip
new file mode 100644
index 0000000000000..e91c9a6741613
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/1.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/2.changelog
new file mode 100644
index 0000000000000..85a6a13b976fc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/4/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/_metadata/v2/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/_metadata/v2/.0.crc
new file mode 100644
index 0000000000000..257c878a1611b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/_metadata/v2/.0.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/_metadata/v2/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/_metadata/v2/0
new file mode 100644
index 0000000000000..313c13df69acb
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/_metadata/v2/0
@@ -0,0 +1,2 @@
+v2
+{"operatorInfo":{"operatorId":0,"operatorName":"transformWithStateExec"},"stateStoreInfo":[{"storeName":"default","numColsPrefixKey":0,"numPartitions":5,"stateSchemaFilePath":"file:/Users/anish.shrigondekar/spark/spark/target/tmp/spark-ae28252a-e696-4653-a9a5-7a9a0766f4c1/state/0/_stateSchema/default/0_2e8e6b52-e3c3-4184-b8ef-8d391b75d751"}],"operatorPropertiesJson":"{\"timeMode\":\"NoTime\",\"outputMode\":\"Update\",\"stateVariables\":[{\"stateName\":\"countState\",\"stateVariableType\":\"ValueState\",\"ttlEnabled\":false}]}"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/_stateSchema/default/.0_2e8e6b52-e3c3-4184-b8ef-8d391b75d751.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/_stateSchema/default/.0_2e8e6b52-e3c3-4184-b8ef-8d391b75d751.crc
new file mode 100644
index 0000000000000..3c16c8244a3b7
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/_stateSchema/default/.0_2e8e6b52-e3c3-4184-b8ef-8d391b75d751.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/_stateSchema/default/0_2e8e6b52-e3c3-4184-b8ef-8d391b75d751 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/_stateSchema/default/0_2e8e6b52-e3c3-4184-b8ef-8d391b75d751
new file mode 100644
index 0000000000000..cd3e8f6d96bf5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0-tws-unsaferow/state/0/_stateSchema/default/0_2e8e6b52-e3c3-4184-b8ef-8d391b75d751 differ
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/resources/testCommitLogV1/testCommitLog b/sql/core/src/test/resources/structured-streaming/testCommitLogV1/testCommitLog
similarity index 100%
rename from sql/core/src/test/scala/org/apache/spark/sql/streaming/resources/testCommitLogV1/testCommitLog
rename to sql/core/src/test/resources/structured-streaming/testCommitLogV1/testCommitLog
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/resources/testCommitLogV2/testCommitLog b/sql/core/src/test/resources/structured-streaming/testCommitLogV2/testCommitLog
similarity index 100%
rename from sql/core/src/test/scala/org/apache/spark/sql/streaming/resources/testCommitLogV2/testCommitLog
rename to sql/core/src/test/resources/structured-streaming/testCommitLogV2/testCommitLog
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala
index b253c4a70bbf9..e1b0676831549 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/AggregateHashMapSuite.scala
@@ -21,7 +21,9 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.tags.SlowSQLTest
 
+@SlowSQLTest
 class SingleLevelAggregateHashMapSuite extends DataFrameAggregateSuite with BeforeAndAfter {
   override protected def sparkConf: SparkConf = super.sparkConf
     .set(SQLConf.CODEGEN_FALLBACK.key, "false")
@@ -37,6 +39,7 @@ class SingleLevelAggregateHashMapSuite extends DataFrameAggregateSuite with Befo
   }
 }
 
+@SlowSQLTest
 class TwoLevelAggregateHashMapSuite extends DataFrameAggregateSuite with BeforeAndAfter {
   override protected def sparkConf: SparkConf = super.sparkConf
     .set(SQLConf.CODEGEN_FALLBACK.key, "false")
@@ -52,6 +55,7 @@ class TwoLevelAggregateHashMapSuite extends DataFrameAggregateSuite with BeforeA
   }
 }
 
+@SlowSQLTest
 class TwoLevelAggregateHashMapWithVectorizedMapSuite
   extends DataFrameAggregateSuite
   with BeforeAndAfter {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index f22d90d9f35d7..e8b9ffe284940 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.sql.catalyst.expressions.{And, GreaterThan, LessThan, Literal, Or}
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, GreaterThan, LessThan, Literal, Or, Rand}
+import org.apache.spark.sql.catalyst.optimizer.InlineCTE
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.adaptive._
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
@@ -715,7 +716,7 @@ abstract class CTEInlineSuiteBase
     checkAnswer(df, Row(1))
   }
 
-  test("SPARK-49816: should only update out-going-ref-count for referenced outer CTE relation") {
+  test("SPARK-49816: detect self-contained WithCTE nodes") {
     withView("v") {
       sql(
         """
@@ -735,6 +736,86 @@ abstract class CTEInlineSuiteBase
       checkAnswer(df, Row(1))
     }
   }
+
+  test("SPARK-49816: complicated reference count") {
+    // Manually build the logical plan for
+    // WITH
+    //  r1 AS (SELECT random()),
+    //  r2 AS (
+    //    WITH
+    //      t1 AS (SELECT * FROM r1),
+    //      t2 AS (SELECT * FROM r1)
+    //    SELECT * FROM t2
+    //  )
+    // SELECT * FROM r2
+    // r1 should be inlined as it's only referenced once: main query -> r2 -> t2 -> r1
+    val r1 = CTERelationDef(Project(Seq(Alias(Rand(Literal(0)), "r")()), OneRowRelation()))
+    val r1Ref = CTERelationRef(r1.id, r1.resolved, r1.output, r1.isStreaming)
+    val t1 = CTERelationDef(Project(r1.output, r1Ref))
+    val t2 = CTERelationDef(Project(r1.output, r1Ref))
+    val t2Ref = CTERelationRef(t2.id, t2.resolved, t2.output, t2.isStreaming)
+    val r2 = CTERelationDef(WithCTE(Project(t2.output, t2Ref), Seq(t1, t2)))
+    val r2Ref = CTERelationRef(r2.id, r2.resolved, r2.output, r2.isStreaming)
+    val query = WithCTE(Project(r2.output, r2Ref), Seq(r1, r2))
+    val inlined = InlineCTE().apply(query)
+    assert(!inlined.exists(_.isInstanceOf[WithCTE]))
+  }
+
+  test("SPARK-49816: complicated reference count 2") {
+    // Manually build the logical plan for
+    // WITH
+    //  r1 AS (SELECT random()),
+    //  r2 AS (
+    //    WITH
+    //      t1 AS (SELECT * FROM r1),
+    //      t2 AS (SELECT * FROM t1)
+    //    SELECT * FROM t2
+    //  )
+    // SELECT * FROM r1
+    // This is similar to the previous test case, but t2 reference t1 instead of r1, and the main
+    // query references r1. r1 should be inlined as r2 is not referenced at all.
+    val r1 = CTERelationDef(Project(Seq(Alias(Rand(Literal(0)), "r")()), OneRowRelation()))
+    val r1Ref = CTERelationRef(r1.id, r1.resolved, r1.output, r1.isStreaming)
+    val t1 = CTERelationDef(Project(r1.output, r1Ref))
+    val t1Ref = CTERelationRef(t1.id, t1.resolved, t1.output, t1.isStreaming)
+    val t2 = CTERelationDef(Project(t1.output, t1Ref))
+    val t2Ref = CTERelationRef(t2.id, t2.resolved, t2.output, t2.isStreaming)
+    val r2 = CTERelationDef(WithCTE(Project(t2.output, t2Ref), Seq(t1, t2)))
+    val query = WithCTE(Project(r1.output, r1Ref), Seq(r1, r2))
+    val inlined = InlineCTE().apply(query)
+    assert(!inlined.exists(_.isInstanceOf[WithCTE]))
+  }
+
+  test("SPARK-49816: complicated reference count 3") {
+    // Manually build the logical plan for
+    // WITH
+    //  r1 AS (
+    //    WITH
+    //      t1 AS (SELECT random()),
+    //      t2 AS (SELECT * FROM t1)
+    //    SELECT * FROM t2
+    //  ),
+    //  r2 AS (
+    //    WITH
+    //      t1 AS (SELECT random()),
+    //      t2 AS (SELECT * FROM r1)
+    //    SELECT * FROM t2
+    //  )
+    // SELECT * FROM r1 UNION ALL SELECT * FROM r2
+    // The inner WITH in r1 and r2 should become `SELECT random()` and r1/r2 should be inlined.
+    val t1 = CTERelationDef(Project(Seq(Alias(Rand(Literal(0)), "r")()), OneRowRelation()))
+    val t1Ref = CTERelationRef(t1.id, t1.resolved, t1.output, t1.isStreaming)
+    val t2 = CTERelationDef(Project(t1.output, t1Ref))
+    val t2Ref = CTERelationRef(t2.id, t2.resolved, t2.output, t2.isStreaming)
+    val cte = WithCTE(Project(t2.output, t2Ref), Seq(t1, t2))
+    val r1 = CTERelationDef(cte)
+    val r1Ref = CTERelationRef(r1.id, r1.resolved, r1.output, r1.isStreaming)
+    val r2 = CTERelationDef(cte)
+    val r2Ref = CTERelationRef(r2.id, r2.resolved, r2.output, r2.isStreaming)
+    val query = WithCTE(Union(r1Ref, r2Ref), Seq(r1, r2))
+    val inlined = InlineCTE().apply(query)
+    assert(!inlined.exists(_.isInstanceOf[WithCTE]))
+  }
 }
 
 class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with DisableAdaptiveExecutionSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index d3b11274fe1c8..47ebd387e89a3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -86,6 +86,27 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("preserve char/varchar type info") {
+    Seq(CharType(5), VarcharType(5)).foreach { typ =>
+      for {
+        char_varchar_as_string <- Seq(false, true)
+        preserve_char_varchar <- Seq(false, true)
+      } {
+        withSQLConf(SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key -> char_varchar_as_string.toString,
+          SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> preserve_char_varchar.toString) {
+          withTable("t") {
+            val name = typ.typeName
+            sql(s"CREATE TABLE t(i STRING, c $name) USING $format")
+            val schema = spark.table("t").schema
+            assert(schema.fields(0).dataType == StringType)
+            val expectedType = if (preserve_char_varchar) typ else StringType
+            assert(schema.fields(1).dataType == expectedType)
+          }
+        }
+      }
+    }
+  }
+
   test("char type values should be padded or trimmed: partitioned columns") {
     // via dynamic partitioned columns
     withTable("t") {
@@ -674,6 +695,90 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test(s"insert string literal into char/varchar column when " +
+    s"${SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key} is true") {
+    withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
+      withTable("t") {
+        sql(s"CREATE TABLE t(c1 CHAR(5), c2 VARCHAR(5)) USING $format")
+        sql("INSERT INTO t VALUES ('1234', '1234')")
+        checkAnswer(spark.table("t"), Row("1234 ", "1234"))
+        assertLengthCheckFailure("INSERT INTO t VALUES ('123456', '1')")
+        assertLengthCheckFailure("INSERT INTO t VALUES ('1', '123456')")
+      }
+    }
+  }
+
+  test(s"insert from string column into char/varchar column when " +
+    s"${SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key} is true") {
+    withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
+      withTable("a", "b") {
+        sql(s"CREATE TABLE a AS SELECT '1234' as c1, '1234' as c2")
+        sql(s"CREATE TABLE b(c1 CHAR(5), c2 VARCHAR(5)) USING $format")
+        sql("INSERT INTO b SELECT * FROM a")
+        checkAnswer(spark.table("b"), Row("1234 ", "1234"))
+        spark.table("b").show()
+      }
+    }
+  }
+
+  test(s"cast from char/varchar when ${SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key} is true") {
+    withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
+      Seq("char(5)", "varchar(5)").foreach { typ =>
+        Seq(
+          "int" -> ("123", 123),
+          "long" -> ("123 ", 123L),
+          "boolean" -> ("true ", true),
+          "boolean" -> ("false", false),
+          "double" -> ("1.2", 1.2)
+        ).foreach { case (toType, (from, to)) =>
+          assert(sql(s"select cast($from :: $typ as $toType)").collect() === Array(Row(to)))
+        }
+      }
+    }
+  }
+
+  test(s"cast to char/varchar when ${SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key} is true") {
+    withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true") {
+      Seq("char(10)", "varchar(10)").foreach { typ =>
+        Seq(
+          123 -> "123",
+          123L-> "123",
+          true -> "true",
+          false -> "false",
+          1.2 -> "1.2"
+        ).foreach { case (from, to) =>
+          val paddedTo = if (typ == "char(10)") {
+            to.padTo(10, ' ')
+          } else {
+            to
+          }
+          sql(s"select cast($from as $typ)").collect() === Array(Row(paddedTo))
+        }
+      }
+    }
+  }
+
+  test("implicitly cast char/varchar into atomics") {
+    Seq("char", "varchar").foreach { typ =>
+      withSQLConf(SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key -> "true",
+        SQLConf.ANSI_ENABLED.key -> "true") {
+        checkAnswer(sql(
+          s"""
+             |SELECT
+             |NOT('false'::$typ(5)),
+             |1 + ('4'::$typ(5)),
+             |2L + ('4'::$typ(5)),
+             |3S + ('4'::$typ(5)),
+             |4Y - ('4'::$typ(5)),
+             |1.2 / ('0.6'::$typ(5)),
+             |MINUTE('2009-07-30 12:58:59'::$typ(30)),
+             |if(true, '0'::$typ(5), 1),
+             |if(false, '0'::$typ(5), 1)
+          """.stripMargin), Row(true, 5, 6, 7, 0, 2.0, 58, 0, 1))
+      }
+    }
+  }
 }
 
 // Some basic char/varchar tests which doesn't rely on table implementation.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationExpressionWalkerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationExpressionWalkerSuite.scala
index bc62fa5fdd331..1f9589c1c9ce4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationExpressionWalkerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationExpressionWalkerSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.{SparkFunSuite, SparkRuntimeException}
 import org.apache.spark.sql.catalyst.analysis.ExpressionBuilder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.variant.ParseJson
-import org.apache.spark.sql.internal.SqlApiConf
+import org.apache.spark.sql.internal.{SqlApiConf, SQLConf}
 import org.apache.spark.sql.internal.types._
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -104,6 +104,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
           Literal.create("DuMmY sTrInG".getBytes)
       }
       case BooleanType => Literal(true)
+      case ByteType => Literal(5.toByte)
       case _: DatetimeType => Literal(Timestamp.valueOf("2009-07-30 12:58:59"))
       case DecimalType => Literal((new Decimal).set(5))
       case _: DecimalType => Literal((new Decimal).set(5))
@@ -183,6 +184,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
           case Utf8Lcase => "Cast('DuMmY sTrInG' collate utf8_lcase as BINARY)"
         }
       case BooleanType => "True"
+      case ByteType => "cast(5 as tinyint)"
       case _: DatetimeType => "date'2016-04-08'"
       case DecimalType => "5.0"
       case _: DecimalType => "5.0"
@@ -243,6 +245,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       case AnyTimestampType => "TIMESTAMP"
       case BinaryType => "BINARY"
       case BooleanType => "BOOLEAN"
+      case ByteType => "TINYINT"
       case _: DatetimeType => "DATE"
       case DecimalType => "DECIMAL(2, 1)"
       case _: DecimalType => "DECIMAL(2, 1)"
@@ -636,48 +639,49 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       val expr = headConstructor.newInstance(args: _*).asInstanceOf[ExpectsInputTypes]
 
       withTable("tbl", "tbl_lcase") {
+        withSQLConf(SQLConf.ALLOW_COLLATIONS_IN_MAP_KEYS.key -> "true") {
+          val utf8_df = generateTableData(expr.inputTypes.take(2), Utf8Binary)
+          val utf8_lcase_df = generateTableData(expr.inputTypes.take(2), Utf8Lcase)
+
+          val utf8BinaryResult = try {
+            val df = utf8_df.selectExpr(transformExpressionToString(expr, Utf8Binary))
+            df.getRows(1, 0)
+            scala.util.Right(df)
+          } catch {
+            case e: Throwable => scala.util.Left(e)
+          }
+          val utf8LcaseResult = try {
+            val df = utf8_lcase_df.selectExpr(transformExpressionToString(expr, Utf8Lcase))
+            df.getRows(1, 0)
+            scala.util.Right(df)
+          } catch {
+            case e: Throwable => scala.util.Left(e)
+          }
 
-        val utf8_df = generateTableData(expr.inputTypes.take(2), Utf8Binary)
-        val utf8_lcase_df = generateTableData(expr.inputTypes.take(2), Utf8Lcase)
-
-        val utf8BinaryResult = try {
-          val df = utf8_df.selectExpr(transformExpressionToString(expr, Utf8Binary))
-          df.getRows(1, 0)
-          scala.util.Right(df)
-        } catch {
-          case e: Throwable => scala.util.Left(e)
-        }
-        val utf8LcaseResult = try {
-          val df = utf8_lcase_df.selectExpr(transformExpressionToString(expr, Utf8Lcase))
-          df.getRows(1, 0)
-          scala.util.Right(df)
-        } catch {
-          case e: Throwable => scala.util.Left(e)
-        }
-
-        assert(utf8BinaryResult.isLeft === utf8LcaseResult.isLeft)
+          assert(utf8BinaryResult.isLeft === utf8LcaseResult.isLeft)
 
-        if (utf8BinaryResult.isRight) {
-          val utf8BinaryResultChecked = utf8BinaryResult.getOrElse(null)
-          val utf8LcaseResultChecked = utf8LcaseResult.getOrElse(null)
+          if (utf8BinaryResult.isRight) {
+            val utf8BinaryResultChecked = utf8BinaryResult.getOrElse(null)
+            val utf8LcaseResultChecked = utf8LcaseResult.getOrElse(null)
 
-          val dt = utf8BinaryResultChecked.schema.fields.head.dataType
+            val dt = utf8BinaryResultChecked.schema.fields.head.dataType
 
-          dt match {
-            case st if utf8BinaryResultChecked != null && utf8LcaseResultChecked != null &&
-              hasStringType(st) =>
-              // scalastyle:off caselocale
-              assert(utf8BinaryResultChecked.getRows(1, 0).map(_.map(_.toLowerCase))(1) ===
-                utf8LcaseResultChecked.getRows(1, 0).map(_.map(_.toLowerCase))(1))
+            dt match {
+              case st if utf8BinaryResultChecked != null && utf8LcaseResultChecked != null &&
+                hasStringType(st) =>
+                // scalastyle:off caselocale
+                assert(utf8BinaryResultChecked.getRows(1, 0).map(_.map(_.toLowerCase))(1) ===
+                  utf8LcaseResultChecked.getRows(1, 0).map(_.map(_.toLowerCase))(1))
               // scalastyle:on caselocale
-            case _ =>
-              assert(utf8BinaryResultChecked.getRows(1, 0)(1) ===
-                utf8LcaseResultChecked.getRows(1, 0)(1))
+              case _ =>
+                assert(utf8BinaryResultChecked.getRows(1, 0)(1) ===
+                  utf8LcaseResultChecked.getRows(1, 0)(1))
+            }
+          }
+          else {
+            assert(utf8BinaryResult.getOrElse(new Exception()).getClass
+              == utf8LcaseResult.getOrElse(new Exception()).getClass)
           }
-        }
-        else {
-          assert(utf8BinaryResult.getOrElse(new Exception()).getClass
-            == utf8LcaseResult.getOrElse(new Exception()).getClass)
         }
       }
     }
@@ -728,6 +732,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       // other functions which are not yet supported
       "to_avro",
       "from_avro",
+      "schema_of_avro",
       "to_protobuf",
       "from_protobuf"
     )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala
index 6feb4587b816f..384411a0fd342 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.{SparkConf, SparkException, SparkIllegalArgumentExceptio
 import org.apache.spark.sql.catalyst.{ExtendedAnalysisException, InternalRow}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.Mode
+import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.internal.{SqlApiConf, SQLConf}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -38,8 +39,19 @@ class CollationSQLExpressionsSuite
     with SharedSparkSession
     with ExpressionEvalHelper {
 
-  private val testSuppCollations = Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI")
-  private val testAdditionalCollations = Seq("UNICODE", "SR", "SR_CI", "SR_AI", "SR_CI_AI")
+  private val testSuppCollations =
+    Seq(
+      "UTF8_BINARY",
+      "UTF8_BINARY_RTRIM",
+      "UTF8_LCASE",
+      "UTF8_LCASE_RTRIM",
+      "UNICODE",
+      "UNICODE_RTRIM",
+      "UNICODE_CI",
+      "UNICODE_CI_RTRIM")
+  private val testAdditionalCollations = Seq("UNICODE",
+    "SR", "SR_RTRIM", "SR_CI", "SR_AI", "SR_CI_AI")
+  private val fullyQualifiedPrefix = s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}."
 
   test("Support Md5 hash expression with collation") {
     case class Md5TestCase(
@@ -262,11 +274,19 @@ class CollationSQLExpressionsSuite
     val testCases = Seq(
       UrlEncodeTestCase("https://spark.apache.org", "UTF8_BINARY",
         "https%3A%2F%2Fspark.apache.org"),
+      UrlEncodeTestCase("https://spark.apache.org", "UTF8_BINARY_RTRIM",
+        "https%3A%2F%2Fspark.apache.org"),
       UrlEncodeTestCase("https://spark.apache.org", "UTF8_LCASE",
         "https%3A%2F%2Fspark.apache.org"),
+      UrlEncodeTestCase("https://spark.apache.org", "UTF8_LCASE_RTRIM",
+        "https%3A%2F%2Fspark.apache.org"),
       UrlEncodeTestCase("https://spark.apache.org", "UNICODE",
         "https%3A%2F%2Fspark.apache.org"),
+      UrlEncodeTestCase("https://spark.apache.org", "UNICODE_RTRIM",
+        "https%3A%2F%2Fspark.apache.org"),
       UrlEncodeTestCase("https://spark.apache.org", "UNICODE_CI",
+        "https%3A%2F%2Fspark.apache.org"),
+      UrlEncodeTestCase("https://spark.apache.org", "UNICODE_CI_RTRIM",
         "https%3A%2F%2Fspark.apache.org")
     )
 
@@ -296,11 +316,19 @@ class CollationSQLExpressionsSuite
     val testCases = Seq(
       UrlDecodeTestCase("https%3A%2F%2Fspark.apache.org", "UTF8_BINARY",
         "https://spark.apache.org"),
+      UrlDecodeTestCase("https%3A%2F%2Fspark.apache.org", "UTF8_BINARY_RTRIM",
+        "https://spark.apache.org"),
       UrlDecodeTestCase("https%3A%2F%2Fspark.apache.org", "UTF8_LCASE",
         "https://spark.apache.org"),
+      UrlDecodeTestCase("https%3A%2F%2Fspark.apache.org", "UTF8_LCASE_RTRIM",
+        "https://spark.apache.org"),
       UrlDecodeTestCase("https%3A%2F%2Fspark.apache.org", "UNICODE",
         "https://spark.apache.org"),
+      UrlDecodeTestCase("https%3A%2F%2Fspark.apache.org", "UNICODE_RTRIM",
+        "https://spark.apache.org"),
       UrlDecodeTestCase("https%3A%2F%2Fspark.apache.org", "UNICODE_CI",
+        "https://spark.apache.org"),
+      UrlDecodeTestCase("https%3A%2F%2Fspark.apache.org", "UNICODE_CI_RTRIM",
         "https://spark.apache.org")
     )
 
@@ -331,11 +359,19 @@ class CollationSQLExpressionsSuite
     val testCases = Seq(
       ParseUrlTestCase("http://spark.apache.org/path?query=1", "UTF8_BINARY", "HOST",
         "spark.apache.org"),
+      ParseUrlTestCase("http://spark.apache.org/path?query=1", "UTF8_BINARY_RTRIM", "HOST",
+        "spark.apache.org"),
       ParseUrlTestCase("http://spark.apache.org/path?query=2", "UTF8_LCASE", "PATH",
         "/path"),
+      ParseUrlTestCase("http://spark.apache.org/path?query=2", "UTF8_LCASE_RTRIM", "PATH",
+        "/path"),
       ParseUrlTestCase("http://spark.apache.org/path?query=3", "UNICODE", "QUERY",
         "query=3"),
+      ParseUrlTestCase("http://spark.apache.org/path?query=3", "UNICODE_RTRIM", "QUERY",
+        "query=3"),
       ParseUrlTestCase("http://spark.apache.org/path?query=4", "UNICODE_CI", "PROTOCOL",
+        "http"),
+      ParseUrlTestCase("http://spark.apache.org/path?query=4", "UNICODE_CI_RTRIM", "PROTOCOL",
         "http")
     )
 
@@ -370,15 +406,32 @@ class CollationSQLExpressionsSuite
         Row(1), Seq(
           StructField("a", IntegerType, nullable = true)
         )),
+      CsvToStructsTestCase("1", "UTF8_BINARY_RTRIM", "'a INT'", "",
+        Row(1), Seq(
+          StructField("a", IntegerType, nullable = true)
+        )),
       CsvToStructsTestCase("true, 0.8", "UTF8_LCASE", "'A BOOLEAN, B DOUBLE'", "",
         Row(true, 0.8), Seq(
           StructField("A", BooleanType, nullable = true),
           StructField("B", DoubleType, nullable = true)
         )),
+      CsvToStructsTestCase("true, 0.8", "UTF8_LCASE_RTRIM", "'A BOOLEAN, B DOUBLE'", "",
+        Row(true, 0.8), Seq(
+          StructField("A", BooleanType, nullable = true),
+          StructField("B", DoubleType, nullable = true)
+        )),
       CsvToStructsTestCase("\"Spark\"", "UNICODE", "'a STRING'", "",
+        Row("Spark"), Seq(
+          StructField("a", StringType, nullable = true)
+        )),
+      CsvToStructsTestCase("\"Spark\"", "UTF8_BINARY", "'a STRING COLLATE UNICODE'", "",
         Row("Spark"), Seq(
           StructField("a", StringType("UNICODE"), nullable = true)
         )),
+      CsvToStructsTestCase("\"Spark\"", "UNICODE_RTRIM", "'a STRING COLLATE UNICODE_RTRIM'", "",
+        Row("Spark"), Seq(
+          StructField("a", StringType("UNICODE_RTRIM"), nullable = true)
+        )),
       CsvToStructsTestCase("26/08/2015", "UTF8_BINARY", "'time Timestamp'",
         ", map('timestampFormat', 'dd/MM/yyyy')", Row(
           new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S").parse("2015-08-26 00:00:00.0")
@@ -413,10 +466,16 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       SchemaOfCsvTestCase("1", "UTF8_BINARY", "STRUCT<_c0: INT>"),
+      SchemaOfCsvTestCase("1", "UTF8_BINARY_RTRIM", "STRUCT<_c0: INT>"),
       SchemaOfCsvTestCase("true,0.8", "UTF8_LCASE",
         "STRUCT<_c0: BOOLEAN, _c1: DOUBLE>"),
+      SchemaOfCsvTestCase("true,0.8", "UTF8_LCASE_RTRIM",
+        "STRUCT<_c0: BOOLEAN, _c1: DOUBLE>"),
       SchemaOfCsvTestCase("2015-08-26", "UNICODE", "STRUCT<_c0: DATE>"),
+      SchemaOfCsvTestCase("2015-08-26", "UNICODE_RTRIM", "STRUCT<_c0: DATE>"),
       SchemaOfCsvTestCase("abc", "UNICODE_CI",
+        "STRUCT<_c0: STRING>"),
+      SchemaOfCsvTestCase("abc", "UNICODE_CI_RTRIM",
         "STRUCT<_c0: STRING>")
     )
 
@@ -445,9 +504,14 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       StructsToCsvTestCase("named_struct('a', 1, 'b', 2)", "UTF8_BINARY", "1,2"),
+      StructsToCsvTestCase("named_struct('a', 1, 'b', 2)", "UTF8_BINARY_RTRIM", "1,2"),
       StructsToCsvTestCase("named_struct('A', true, 'B', 2.0)", "UTF8_LCASE", "true,2.0"),
+      StructsToCsvTestCase("named_struct('A', true, 'B', 2.0)", "UTF8_LCASE_RTRIM", "true,2.0"),
       StructsToCsvTestCase("named_struct()", "UNICODE", null),
+      StructsToCsvTestCase("named_struct()", "UNICODE_RTRIM", null),
       StructsToCsvTestCase("named_struct('time', to_timestamp('2015-08-26'))", "UNICODE_CI",
+        "2015-08-26T00:00:00.000-07:00"),
+      StructsToCsvTestCase("named_struct('time', to_timestamp('2015-08-26'))", "UNICODE_CI_RTRIM",
         "2015-08-26T00:00:00.000-07:00")
     )
 
@@ -478,9 +542,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       ConvTestCase("100", "2", "10", "UTF8_BINARY", "4"),
+      ConvTestCase("100", "2", "10", "UTF8_BINARY_RTRIM", "4"),
       ConvTestCase("100", "2", "10", "UTF8_LCASE", "4"),
+      ConvTestCase("100", "2", "10", "UTF8_LCASE_RTRIM", "4"),
       ConvTestCase("100", "2", "10", "UNICODE", "4"),
-      ConvTestCase("100", "2", "10", "UNICODE_CI", "4")
+      ConvTestCase("100", "2", "10", "UNICODE_RTRIM", "4"),
+      ConvTestCase("100", "2", "10", "UNICODE_CI", "4"),
+      ConvTestCase("100", "2", "10", "UNICODE_CI_RTRIM", "4")
     )
     testCases.foreach(t => {
       val query =
@@ -502,9 +570,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       BinTestCase("13", "UTF8_BINARY", "1101"),
+      BinTestCase("13", "UTF8_BINARY_RTRIM", "1101"),
       BinTestCase("13", "UTF8_LCASE", "1101"),
+      BinTestCase("13", "UTF8_LCASE_RTRIM", "1101"),
       BinTestCase("13", "UNICODE", "1101"),
-      BinTestCase("13", "UNICODE_CI", "1101")
+      BinTestCase("13", "UNICODE_RTRIM", "1101"),
+      BinTestCase("13", "UNICODE_CI", "1101"),
+      BinTestCase("13", "UNICODE_CI_RTRIM", "1101")
     )
     testCases.foreach(t => {
       val query =
@@ -527,9 +599,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       HexTestCase("13", "UTF8_BINARY", "D"),
+      HexTestCase("13", "UTF8_BINARY_RTRIM", "D"),
       HexTestCase("13", "UTF8_LCASE", "D"),
+      HexTestCase("13", "UTF8_LCASE_RTRIM", "D"),
       HexTestCase("13", "UNICODE", "D"),
-      HexTestCase("13", "UNICODE_CI", "D")
+      HexTestCase("13", "UNICODE_RTRIM", "D"),
+      HexTestCase("13", "UNICODE_CI", "D"),
+      HexTestCase("13", "UNICODE_CI_RTRIM", "D")
     )
     testCases.foreach(t => {
       val query =
@@ -552,10 +628,15 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       HexTestCase("Spark SQL", "UTF8_BINARY", "537061726B2053514C"),
+      HexTestCase("Spark SQL", "UTF8_BINARY_RTRIM", "537061726B2053514C"),
       HexTestCase("Spark SQL", "UTF8_LCASE", "537061726B2053514C"),
+      HexTestCase("Spark SQL", "UTF8_LCASE_RTRIM", "537061726B2053514C"),
       HexTestCase("Spark SQL", "UNICODE", "537061726B2053514C"),
+      HexTestCase("Spark SQL", "UNICODE_RTRIM", "537061726B2053514C"),
       HexTestCase("Spark SQL", "UNICODE_CI", "537061726B2053514C"),
-      HexTestCase("Spark SQL", "DE_CI_AI", "537061726B2053514C")
+      HexTestCase("Spark SQL", "UNICODE_CI_RTRIM", "537061726B2053514C"),
+      HexTestCase("Spark SQL", "DE_CI_AI", "537061726B2053514C"),
+      HexTestCase("Spark SQL", "DE_CI_AI_RTRIM", "537061726B2053514C")
     )
     testCases.foreach(t => {
       val query =
@@ -576,9 +657,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       UnHexTestCase("537061726B2053514C", "UTF8_BINARY", "Spark SQL"),
+      UnHexTestCase("537061726B2053514C", "UTF8_BINARY_RTRIM", "Spark SQL"),
       UnHexTestCase("537061726B2053514C", "UTF8_LCASE", "Spark SQL"),
+      UnHexTestCase("537061726B2053514C", "UTF8_LCASE_RTRIM", "Spark SQL"),
       UnHexTestCase("537061726B2053514C", "UNICODE", "Spark SQL"),
+      UnHexTestCase("537061726B2053514C", "UNICODE_RTRIM", "Spark SQL"),
       UnHexTestCase("537061726B2053514C", "UNICODE_CI", "Spark SQL"),
+      UnHexTestCase("537061726B2053514C", "UNICODE_CI_RTRIM", "Spark SQL"),
       UnHexTestCase("537061726B2053514C", "DE", "Spark SQL")
     )
     testCases.foreach(t => {
@@ -607,16 +692,30 @@ class CollationSQLExpressionsSuite
         "xpath_boolean", "UTF8_BINARY", true, BooleanType),
       XPathTestCase("<A><B>1</B><B>2</B></A>", "sum(A/B)",
         "xpath_short", "UTF8_BINARY", 3, ShortType),
+      XPathTestCase("<a><b>1</b></a>", "a/b",
+        "xpath_boolean", "UTF8_BINARY_RTRIM", true, BooleanType),
+      XPathTestCase("<A><B>1</B><B>2</B></A>", "sum(A/B)",
+        "xpath_short", "UTF8_BINARY_RTRIM", 3, ShortType),
       XPathTestCase("<a><b>3</b><b>4</b></a>", "sum(a/b)",
         "xpath_int", "UTF8_LCASE", 7, IntegerType),
       XPathTestCase("<A><B>5</B><B>6</B></A>", "sum(A/B)",
         "xpath_long", "UTF8_LCASE", 11, LongType),
+      XPathTestCase("<a><b>3</b><b>4</b></a>", "sum(a/b)",
+        "xpath_int", "UTF8_LCASE_RTRIM", 7, IntegerType),
+      XPathTestCase("<A><B>5</B><B>6</B></A>", "sum(A/B)",
+        "xpath_long", "UTF8_LCASE_RTRIM", 11, LongType),
       XPathTestCase("<a><b>7</b><b>8</b></a>", "sum(a/b)",
         "xpath_float", "UNICODE", 15.0, FloatType),
       XPathTestCase("<A><B>9</B><B>0</B></A>", "sum(A/B)",
         "xpath_double", "UNICODE", 9.0, DoubleType),
+      XPathTestCase("<a><b>7</b><b>8</b></a>", "sum(a/b)",
+        "xpath_float", "UNICODE_RTRIM", 15.0, FloatType),
+      XPathTestCase("<A><B>9</B><B>0</B></A>", "sum(A/B)",
+        "xpath_double", "UNICODE_RTRIM", 9.0, DoubleType),
       XPathTestCase("<a><b>b</b><c>cc</c></a>", "a/c",
         "xpath_string", "UNICODE_CI", "cc", StringType("UNICODE_CI")),
+      XPathTestCase("<a><b>b</b><c>cc </c></a>", "a/c",
+        "xpath_string", "UNICODE_CI_RTRIM", "cc ", StringType("UNICODE_CI_RTRIM")),
       XPathTestCase("<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>", "a/b/text()",
         "xpath", "UNICODE_CI", Array("b1", "b2", "b3"), ArrayType(StringType("UNICODE_CI")))
     )
@@ -645,10 +744,15 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       StringSpaceTestCase(1, "UTF8_BINARY", " "),
+      StringSpaceTestCase(1, "UTF8_BINARY_RTRIM", " "),
       StringSpaceTestCase(2, "UTF8_LCASE", "  "),
+      StringSpaceTestCase(2, "UTF8_LCASE_RTRIM", "  "),
       StringSpaceTestCase(3, "UNICODE", "   "),
+      StringSpaceTestCase(3, "UNICODE_RTRIM", "   "),
       StringSpaceTestCase(4, "UNICODE_CI", "    "),
-      StringSpaceTestCase(5, "AF_CI_AI", "     ")
+      StringSpaceTestCase(4, "UNICODE_CI_RTRIM", "    "),
+      StringSpaceTestCase(5, "AF_CI_AI", "     "),
+      StringSpaceTestCase(5, "AF_CI_AI_RTRIM", "     ")
     )
 
     // Supported collations
@@ -678,9 +782,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       ToNumberTestCase("123", "UTF8_BINARY", "999", 123, DecimalType(3, 0)),
+      ToNumberTestCase("123", "UTF8_BINARY_RTRIM", "999", 123, DecimalType(3, 0)),
       ToNumberTestCase("1", "UTF8_LCASE", "0.00", 1.00, DecimalType(3, 2)),
+      ToNumberTestCase("1", "UTF8_LCASE_RTRIM", "0.00", 1.00, DecimalType(3, 2)),
       ToNumberTestCase("99,999", "UNICODE", "99,999", 99999, DecimalType(5, 0)),
-      ToNumberTestCase("$14.99", "UNICODE_CI", "$99.99", 14.99, DecimalType(4, 2))
+      ToNumberTestCase("99,999", "UNICODE_RTRIM", "99,999", 99999, DecimalType(5, 0)),
+      ToNumberTestCase("$14.99", "UNICODE_CI", "$99.99", 14.99, DecimalType(4, 2)),
+      ToNumberTestCase("$14.99", "UNICODE_CI_RTRIM", "$99.99", 14.99, DecimalType(4, 2))
     )
 
     // Supported collations (ToNumber)
@@ -748,9 +856,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       ToCharTestCase(12, "UTF8_BINARY", "999", " 12"),
+      ToCharTestCase(12, "UTF8_BINARY_RTRIM", "999", " 12"),
       ToCharTestCase(34, "UTF8_LCASE", "000D00", "034.00"),
+      ToCharTestCase(34, "UTF8_LCASE_RTRIM", "000D00", "034.00"),
       ToCharTestCase(56, "UNICODE", "$99.99", "$56.00"),
-      ToCharTestCase(78, "UNICODE_CI", "99D9S", "78.0+")
+      ToCharTestCase(56, "UNICODE_RTRIM", "$99.99", "$56.00"),
+      ToCharTestCase(78, "UNICODE_CI", "99D9S", "78.0+"),
+      ToCharTestCase(78, "UNICODE_CI_RTRIM", "99D9S", "78.0+")
     )
 
     // Supported collations
@@ -779,9 +891,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       GetJsonObjectTestCase("{\"a\":\"b\"}", "$.a", "UTF8_BINARY", "b"),
+      GetJsonObjectTestCase("{\"a\":\"b\"}", "$.a", "UTF8_BINARY_RTRIM", "b"),
       GetJsonObjectTestCase("{\"A\":\"1\"}", "$.A", "UTF8_LCASE", "1"),
+      GetJsonObjectTestCase("{\"A\":\"1\"}", "$.A", "UTF8_LCASE_RTRIM", "1"),
       GetJsonObjectTestCase("{\"x\":true}", "$.x", "UNICODE", "true"),
-      GetJsonObjectTestCase("{\"X\":1}", "$.X", "UNICODE_CI", "1")
+      GetJsonObjectTestCase("{\"x\":true}", "$.x", "UNICODE_RTRIM", "true"),
+      GetJsonObjectTestCase("{\"X\":1}", "$.X", "UNICODE_CI", "1"),
+      GetJsonObjectTestCase("{\"X\":1}", "$.X", "UNICODE_CI_RTRIM", "1")
     )
 
     // Supported collations
@@ -811,10 +927,16 @@ class CollationSQLExpressionsSuite
     val testCases = Seq(
       JsonTupleTestCase("{\"a\":1, \"b\":2}", "'a', 'b'", "UTF8_BINARY",
         Row("1", "2")),
+      JsonTupleTestCase("{\"a\":1, \"b\":2}", "'a', 'b'", "UTF8_BINARY_RTRIM",
+        Row("1", "2")),
       JsonTupleTestCase("{\"A\":\"3\", \"B\":\"4\"}", "'A', 'B'", "UTF8_LCASE",
         Row("3", "4")),
+      JsonTupleTestCase("{\"A\":\"3\", \"B\":\"4\"}", "'A', 'B'", "UTF8_LCASE_RTRIM",
+        Row("3", "4")),
       JsonTupleTestCase("{\"x\":true, \"y\":false}", "'x', 'y'", "UNICODE",
         Row("true", "false")),
+      JsonTupleTestCase("{\"x\":true, \"y\":false}", "'x', 'y'", "UNICODE_RTRIM",
+        Row("true", "false")),
       JsonTupleTestCase("{\"X\":null, \"Y\":null}", "'X', 'Y'", "UNICODE_CI",
         Row(null, null))
     )
@@ -846,12 +968,20 @@ class CollationSQLExpressionsSuite
     val testCases = Seq(
       JsonToStructsTestCase("{\"a\":1, \"b\":2.0}", "a INT, b DOUBLE",
         "UTF8_BINARY", Row(Row(1, 2.0))),
+      JsonToStructsTestCase("{\"a\":1, \"b\":2.0}", "a INT, b DOUBLE",
+        "UTF8_BINARY_RTRIM", Row(Row(1, 2.0))),
       JsonToStructsTestCase("{\"A\":\"3\", \"B\":4}", "A STRING COLLATE UTF8_LCASE, B INT",
         "UTF8_LCASE", Row(Row("3", 4))),
+      JsonToStructsTestCase("{\"A\":\"3\", \"B\":4}", "A STRING COLLATE UTF8_LCASE, B INT",
+        "UTF8_LCASE_RTRIM", Row(Row("3", 4))),
       JsonToStructsTestCase("{\"x\":true, \"y\":null}", "x BOOLEAN, y VOID",
         "UNICODE", Row(Row(true, null))),
+      JsonToStructsTestCase("{\"x\":true, \"y\":null}", "x BOOLEAN, y VOID",
+        "UNICODE_RTRIM", Row(Row(true, null))),
+      JsonToStructsTestCase("{\"X\":null, \"Y\":false}", "X VOID, Y BOOLEAN",
+        "UNICODE_CI", Row(Row(null, false))),
       JsonToStructsTestCase("{\"X\":null, \"Y\":false}", "X VOID, Y BOOLEAN",
-        "UNICODE_CI", Row(Row(null, false)))
+        "UNICODE_CI_RTRIM", Row(Row(null, false)))
     )
 
     // Supported collations
@@ -880,12 +1010,20 @@ class CollationSQLExpressionsSuite
     val testCases = Seq(
       StructsToJsonTestCase("named_struct('a', 1, 'b', 2)",
         "UTF8_BINARY", Row("{\"a\":1,\"b\":2}")),
+      StructsToJsonTestCase("named_struct('a', 1, 'b', 2)",
+        "UTF8_BINARY_RTRIM", Row("{\"a\":1,\"b\":2}")),
       StructsToJsonTestCase("array(named_struct('a', 1, 'b', 2))",
         "UTF8_LCASE", Row("[{\"a\":1,\"b\":2}]")),
+      StructsToJsonTestCase("array(named_struct('a', 1, 'b', 2))",
+        "UTF8_LCASE_RTRIM", Row("[{\"a\":1,\"b\":2}]")),
       StructsToJsonTestCase("map('a', named_struct('b', 1))",
         "UNICODE", Row("{\"a\":{\"b\":1}}")),
+      StructsToJsonTestCase("map('a', named_struct('b', 1))",
+        "UNICODE_RTRIM", Row("{\"a\":{\"b\":1}}")),
       StructsToJsonTestCase("array(map('a', 1))",
-        "UNICODE_CI", Row("[{\"a\":1}]"))
+        "UNICODE_CI", Row("[{\"a\":1}]")),
+      StructsToJsonTestCase("array(map('a', 1))",
+        "UNICODE_CI_RTRIM", Row("[{\"a\":1}]"))
     )
 
     // Supported collations
@@ -913,9 +1051,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       LengthOfJsonArrayTestCase("'[1,2,3,4]'", "UTF8_BINARY", Row(4)),
+      LengthOfJsonArrayTestCase("'[1,2,3,4]'", "UTF8_BINARY_RTRIM", Row(4)),
       LengthOfJsonArrayTestCase("'[1,2,3,{\"f1\":1,\"f2\":[5,6]},4]'", "UTF8_LCASE", Row(5)),
+      LengthOfJsonArrayTestCase("'[1,2,3,{\"f1\":1,\"f2\":[5,6]},4]'", "UTF8_LCASE_RTRIM", Row(5)),
       LengthOfJsonArrayTestCase("'[1,2'", "UNICODE", Row(null)),
-      LengthOfJsonArrayTestCase("'['", "UNICODE_CI", Row(null))
+      LengthOfJsonArrayTestCase("'[1,2'", "UNICODE_RTRIM", Row(null)),
+      LengthOfJsonArrayTestCase("'['", "UNICODE_CI", Row(null)),
+      LengthOfJsonArrayTestCase("'['", "UNICODE_CI_RTRIM", Row(null))
     )
 
     // Supported collations
@@ -943,11 +1085,19 @@ class CollationSQLExpressionsSuite
     val testCases = Seq(
       JsonObjectKeysJsonArrayTestCase("{}", "UTF8_BINARY",
         Row(Seq())),
+      JsonObjectKeysJsonArrayTestCase("{}", "UTF8_BINARY_RTRIM",
+        Row(Seq())),
       JsonObjectKeysJsonArrayTestCase("{\"k\":", "UTF8_LCASE",
         Row(null)),
+      JsonObjectKeysJsonArrayTestCase("{\"k\":", "UTF8_LCASE_RTRIM",
+        Row(null)),
       JsonObjectKeysJsonArrayTestCase("{\"k1\": \"v1\"}", "UNICODE",
         Row(Seq("k1"))),
+      JsonObjectKeysJsonArrayTestCase("{\"k1\": \"v1\"}", "UNICODE_RTRIM",
+        Row(Seq("k1"))),
       JsonObjectKeysJsonArrayTestCase("{\"k1\":1,\"k2\":{\"k3\":3, \"k4\":4}}", "UNICODE_CI",
+        Row(Seq("k1", "k2"))),
+      JsonObjectKeysJsonArrayTestCase("{\"k1\":1,\"k2\":{\"k3\":3, \"k4\":4}}", "UNICODE_CI_RTRIM",
         Row(Seq("k1", "k2")))
     )
 
@@ -977,12 +1127,20 @@ class CollationSQLExpressionsSuite
     val testCases = Seq(
       SchemaOfJsonTestCase("'[{\"col\":0}]'",
         "UTF8_BINARY", Row("ARRAY<STRUCT<col: BIGINT>>")),
+      SchemaOfJsonTestCase("'[{\"col\":0}]'",
+        "UTF8_BINARY_RTRIM", Row("ARRAY<STRUCT<col: BIGINT>>")),
       SchemaOfJsonTestCase("'[{\"col\":01}]', map('allowNumericLeadingZeros', 'true')",
         "UTF8_LCASE", Row("ARRAY<STRUCT<col: BIGINT>>")),
+      SchemaOfJsonTestCase("'[{\"col\":01}]', map('allowNumericLeadingZeros', 'true')",
+        "UTF8_LCASE_RTRIM", Row("ARRAY<STRUCT<col: BIGINT>>")),
       SchemaOfJsonTestCase("'[]'",
         "UNICODE", Row("ARRAY<STRING>")),
+      SchemaOfJsonTestCase("'[]'",
+        "UNICODE_RTRIM", Row("ARRAY<STRING>")),
       SchemaOfJsonTestCase("''",
-        "UNICODE_CI", Row("STRING"))
+        "UNICODE_CI", Row("STRING")),
+      SchemaOfJsonTestCase("''",
+        "UNICODE_CI_RTRIM", Row("STRING"))
     )
 
     // Supported collations
@@ -1023,10 +1181,7 @@ class CollationSQLExpressionsSuite
         Map("c" -> "1", "č" -> "2", "ć" -> "3"))
     )
     val unsupportedTestCases = Seq(
-      StringToMapTestCase("a:1,b:2,c:3", "?", "?", "UNICODE_AI", null),
-      StringToMapTestCase("a:1,b:2,c:3", "?", "?", "UNICODE_RTRIM", null),
-      StringToMapTestCase("a:1,b:2,c:3", "?", "?", "UTF8_BINARY_RTRIM", null),
-      StringToMapTestCase("a:1,b:2,c:3", "?", "?", "UTF8_LCASE_RTRIM", null))
+      StringToMapTestCase("a:1,b:2,c:3", "?", "?", "UNICODE_AI", null))
     testCases.foreach(t => {
       // Unit test.
       val text = Literal.create(t.text, StringType(t.collation))
@@ -1073,9 +1228,13 @@ class CollationSQLExpressionsSuite
     case class RaiseErrorTestCase(errorMessage: String, collationName: String)
     val testCases = Seq(
       RaiseErrorTestCase("custom error message 1", "UTF8_BINARY"),
+      RaiseErrorTestCase("custom error message 1", "UTF8_BINARY_RTRIM"),
       RaiseErrorTestCase("custom error message 2", "UTF8_LCASE"),
+      RaiseErrorTestCase("custom error message 2", "UTF8_LCASE_RTRIM"),
       RaiseErrorTestCase("custom error message 3", "UNICODE"),
-      RaiseErrorTestCase("custom error message 4", "UNICODE_CI")
+      RaiseErrorTestCase("custom error message 3", "UNICODE_RTRIM"),
+      RaiseErrorTestCase("custom error message 4", "UNICODE_CI"),
+      RaiseErrorTestCase("custom error message 4", "UNICODE_CI_RTRIM")
     )
     testCases.foreach(t => {
       withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
@@ -1094,7 +1253,13 @@ class CollationSQLExpressionsSuite
 
   test("Support CurrentDatabase/Catalog/User expressions with collation") {
     // Supported collations
-    Seq("UTF8_LCASE", "UNICODE", "UNICODE_CI", "SR_CI_AI").foreach(collationName =>
+    Seq(
+      "UTF8_LCASE",
+      "UTF8_LCASE_RTRIM",
+      "UNICODE",
+      "UNICODE_RTRIM",
+      "UNICODE_CI",
+      "SR_CI_AI").foreach(collationName =>
       withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
         val queryDatabase = sql("SELECT current_schema()")
         val queryCatalog = sql("SELECT current_catalog()")
@@ -1110,7 +1275,14 @@ class CollationSQLExpressionsSuite
 
   test("Support Uuid misc expression with collation") {
     // Supported collations
-    Seq("UTF8_LCASE", "UNICODE", "UNICODE_CI", "NO_CI_AI").foreach(collationName =>
+    Seq(
+      "UTF8_LCASE",
+      "UTF8_LCASE_RTRIM",
+      "UNICODE",
+      "UNICODE_RTRIM",
+      "UNICODE_CI",
+      "UNICODE_CI_RTRIM",
+      "NO_CI_AI").foreach(collationName =>
       withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
         val query = s"SELECT uuid()"
         // Result & data type
@@ -1285,15 +1457,33 @@ class CollationSQLExpressionsSuite
         Row(1), Seq(
           StructField("a", IntegerType, nullable = true)
         )),
+      XmlToStructsTestCase("<p><a>1</a></p>", "UTF8_BINARY_RTRIM", "'a INT'", "",
+        Row(1), Seq(
+          StructField("a", IntegerType, nullable = true)
+        )),
       XmlToStructsTestCase("<p><A>true</A><B>0.8</B></p>", "UTF8_LCASE",
         "'A BOOLEAN, B DOUBLE'", "", Row(true, 0.8), Seq(
           StructField("A", BooleanType, nullable = true),
           StructField("B", DoubleType, nullable = true)
         )),
+      XmlToStructsTestCase("<p><A>true</A><B>0.8</B></p>", "UTF8_LCASE_RTRIM",
+        "'A BOOLEAN, B DOUBLE'", "", Row(true, 0.8), Seq(
+          StructField("A", BooleanType, nullable = true),
+          StructField("B", DoubleType, nullable = true)
+        )),
       XmlToStructsTestCase("<p><s>Spark</s></p>", "UNICODE", "'s STRING'", "",
+        Row("Spark"), Seq(
+          StructField("s", StringType, nullable = true)
+        )),
+      XmlToStructsTestCase("<p><s>Spark</s></p>", "UTF8_BINARY", "'s STRING COLLATE UNICODE'", "",
         Row("Spark"), Seq(
           StructField("s", StringType("UNICODE"), nullable = true)
         )),
+      XmlToStructsTestCase("<p><s>Spark</s></p>", "UNICODE_RTRIM",
+        "'s STRING COLLATE UNICODE_RTRIM'", "",
+        Row("Spark"), Seq(
+          StructField("s", StringType("UNICODE_RTRIM"), nullable = true)
+        )),
       XmlToStructsTestCase("<p><time>26/08/2015</time></p>", "UNICODE_CI", "'time Timestamp'",
         ", map('timestampFormat', 'dd/MM/yyyy')", Row(
           new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S").parse("2015-08-26 00:00:00.0")
@@ -1327,10 +1517,16 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       SchemaOfXmlTestCase("<p><a>1</a></p>", "UTF8_BINARY", "STRUCT<a: BIGINT>"),
+      SchemaOfXmlTestCase("<p><a>1</a></p>", "UTF8_BINARY_RTRIM", "STRUCT<a: BIGINT>"),
       SchemaOfXmlTestCase("<p><A>true</A><B>0.8</B></p>", "UTF8_LCASE",
         "STRUCT<A: BOOLEAN, B: DOUBLE>"),
+      SchemaOfXmlTestCase("<p><A>true</A><B>0.8</B></p>", "UTF8_LCASE_RTRIM",
+        "STRUCT<A: BOOLEAN, B: DOUBLE>"),
       SchemaOfXmlTestCase("<p></p>", "UNICODE", "STRUCT<>"),
+      SchemaOfXmlTestCase("<p></p>", "UNICODE_RTRIM", "STRUCT<>"),
       SchemaOfXmlTestCase("<p><A>1</A><A>2</A><A>3</A></p>", "UNICODE_CI",
+        "STRUCT<A: ARRAY<BIGINT>>"),
+      SchemaOfXmlTestCase("<p><A>1</A><A>2</A><A>3</A></p>", "UNICODE_CI_RTRIM",
         "STRUCT<A: ARRAY<BIGINT>>")
     )
 
@@ -1363,6 +1559,11 @@ class CollationSQLExpressionsSuite
            |    <a>1</a>
            |    <b>2</b>
            |</ROW>""".stripMargin),
+      StructsToXmlTestCase("named_struct('a', 1, 'b', 2)", "UTF8_BINARY_RTRIM",
+        s"""<ROW>
+           |    <a>1</a>
+           |    <b>2</b>
+           |</ROW>""".stripMargin),
       StructsToXmlTestCase("named_struct('A', true, 'B', 2.0)", "UTF8_LCASE",
         s"""<ROW>
            |    <A>true</A>
@@ -1373,6 +1574,11 @@ class CollationSQLExpressionsSuite
            |    <A>aa</A>
            |    <B>bb</B>
            |</ROW>""".stripMargin),
+      StructsToXmlTestCase("named_struct('A', 'aa', 'B', 'bb')", "UTF8_LCASE_RTRIM",
+        s"""<ROW>
+           |    <A>aa</A>
+           |    <B>bb</B>
+           |</ROW>""".stripMargin),
       StructsToXmlTestCase("named_struct('A', 'aa', 'B', 'bb')", "UTF8_BINARY",
         s"""<ROW>
            |    <A>aa</A>
@@ -1380,6 +1586,8 @@ class CollationSQLExpressionsSuite
            |</ROW>""".stripMargin),
       StructsToXmlTestCase("named_struct()", "UNICODE",
         "<ROW/>"),
+      StructsToXmlTestCase("named_struct()", "UNICODE_RTRIM",
+        "<ROW/>"),
       StructsToXmlTestCase("named_struct('time', to_timestamp('2015-08-26'))", "UNICODE_CI",
         s"""<ROW>
            |    <time>2015-08-26T00:00:00.000-07:00</time>
@@ -1411,9 +1619,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       ParseJsonTestCase("{\"a\":1,\"b\":2}", "UTF8_BINARY", "{\"a\":1,\"b\":2}"),
+      ParseJsonTestCase("{\"a\":1,\"b\":2}", "UTF8_BINARY_RTRIM", "{\"a\":1,\"b\":2}"),
       ParseJsonTestCase("{\"A\":3,\"B\":4}", "UTF8_LCASE", "{\"A\":3,\"B\":4}"),
+      ParseJsonTestCase("{\"A\":3,\"B\":4}", "UTF8_LCASE_RTRIM", "{\"A\":3,\"B\":4}"),
       ParseJsonTestCase("{\"c\":5,\"d\":6}", "UNICODE", "{\"c\":5,\"d\":6}"),
-      ParseJsonTestCase("{\"C\":7,\"D\":8}", "UNICODE_CI", "{\"C\":7,\"D\":8}")
+      ParseJsonTestCase("{\"c\":5,\"d\":6}", "UNICODE_RTRIM", "{\"c\":5,\"d\":6}"),
+      ParseJsonTestCase("{\"C\":7,\"D\":8}", "UNICODE_CI", "{\"C\":7,\"D\":8}"),
+      ParseJsonTestCase("{\"C\":7,\"D\":8}", "UNICODE_CI_RTRIM", "{\"C\":7,\"D\":8}")
     )
 
     // Supported collations (ParseJson)
@@ -1483,9 +1695,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       IsVariantNullTestCase("'null'", "UTF8_BINARY", result = true),
+      IsVariantNullTestCase("'null'", "UTF8_BINARY_RTRIM", result = true),
       IsVariantNullTestCase("'\"null\"'", "UTF8_LCASE", result = false),
+      IsVariantNullTestCase("'\"null\"'", "UTF8_LCASE_RTRIM", result = false),
       IsVariantNullTestCase("'13'", "UNICODE", result = false),
-      IsVariantNullTestCase("null", "UNICODE_CI", result = false)
+      IsVariantNullTestCase("'13'", "UNICODE_RTRIM", result = false),
+      IsVariantNullTestCase("null", "UNICODE_CI", result = false),
+      IsVariantNullTestCase("null", "UNICODE_CI_RTRIM", result = false)
     )
 
     // Supported collations
@@ -1514,9 +1730,15 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       VariantGetTestCase("{\"a\": 1}", "$.a", "int", "UTF8_BINARY", 1, IntegerType),
+      VariantGetTestCase("{\"a\": 1}", "$.a", "int", "UTF8_BINARY_RTRIM", 1, IntegerType),
       VariantGetTestCase("{\"a\": 1}", "$.b", "int", "UTF8_LCASE", null, IntegerType),
-      VariantGetTestCase("[1, \"2\"]", "$[1]", "string", "UNICODE", "2", StringType("UNICODE")),
+      VariantGetTestCase("[1, \"2\"]", "$[1]", "string", "UNICODE", "2",
+        StringType),
+      VariantGetTestCase("[1, \"2\"]", "$[1]", "string collate unicode", "UTF8_BINARY", "2",
+        StringType("UNICODE")),
       VariantGetTestCase("[1, \"2\"]", "$[2]", "string", "UNICODE_CI", null,
+        StringType),
+      VariantGetTestCase("[1, \"2\"]", "$[2]", "string collate unicode_CI", "UTF8_BINARY", null,
         StringType("UNICODE_CI"))
     )
 
@@ -1595,6 +1817,14 @@ class CollationSQLExpressionsSuite
               StructField("value", VariantType, nullable = false)
           )
       ),
+      VariantExplodeTestCase("[\"hello\", \"world\"]", "UTF8_BINARY_RTRIM",
+        Row(0, "null", "\"hello\"").toString() + Row(1, "null", "\"world\"").toString(),
+        Seq[StructField](
+          StructField("pos", IntegerType, nullable = false),
+          StructField("key", StringType("UTF8_BINARY_RTRIM")),
+          StructField("value", VariantType, nullable = false)
+        )
+      ),
       VariantExplodeTestCase("[\"Spark\", \"SQL\"]", "UTF8_LCASE",
         Row(0, "null", "\"Spark\"").toString() + Row(1, "null", "\"SQL\"").toString(),
         Seq[StructField](
@@ -1603,6 +1833,14 @@ class CollationSQLExpressionsSuite
           StructField("value", VariantType, nullable = false)
         )
       ),
+      VariantExplodeTestCase("[\"Spark\", \"SQL\"]", "UTF8_LCASE_RTRIM",
+        Row(0, "null", "\"Spark\"").toString() + Row(1, "null", "\"SQL\"").toString(),
+        Seq[StructField](
+          StructField("pos", IntegerType, nullable = false),
+          StructField("key", StringType("UTF8_LCASE_RTRIM")),
+          StructField("value", VariantType, nullable = false)
+        )
+      ),
       VariantExplodeTestCase("{\"a\": true, \"b\": 3.14}", "UNICODE",
         Row(0, "a", "true").toString() + Row(1, "b", "3.14").toString(),
         Seq[StructField](
@@ -1611,6 +1849,14 @@ class CollationSQLExpressionsSuite
           StructField("value", VariantType, nullable = false)
         )
       ),
+      VariantExplodeTestCase("{\"a\": true, \"b\": 3.14}", "UNICODE_RTRIM",
+        Row(0, "a", "true").toString() + Row(1, "b", "3.14").toString(),
+        Seq[StructField](
+          StructField("pos", IntegerType, nullable = false),
+          StructField("key", StringType("UNICODE_RTRIM")),
+          StructField("value", VariantType, nullable = false)
+        )
+      ),
       VariantExplodeTestCase("{\"A\": 9.99, \"B\": false}", "UNICODE_CI",
         Row(0, "A", "9.99").toString() + Row(1, "B", "false").toString(),
         Seq[StructField](
@@ -1646,11 +1892,17 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       SchemaOfVariantTestCase("null", "UTF8_BINARY", "VOID"),
+      SchemaOfVariantTestCase("null", "UTF8_BINARY_RTRIM", "VOID"),
       SchemaOfVariantTestCase("[]", "UTF8_LCASE", "ARRAY<VOID>"),
+      SchemaOfVariantTestCase("[]", "UTF8_LCASE_RTRIM", "ARRAY<VOID>"),
       SchemaOfVariantTestCase("[{\"a\":true,\"b\":0}]", "UNICODE",
         "ARRAY<OBJECT<a: BOOLEAN, b: BIGINT>>"),
+      SchemaOfVariantTestCase("[{\"a\":true,\"b\":0}]", "UNICODE_RTRIM",
+        "ARRAY<OBJECT<a: BOOLEAN, b: BIGINT>>"),
       SchemaOfVariantTestCase("[{\"A\":\"x\",\"B\":-1.00}]", "UNICODE_CI",
-        "ARRAY<OBJECT<A: STRING COLLATE UNICODE_CI, B: DECIMAL(1,0)>>")
+        "ARRAY<OBJECT<A: STRING COLLATE UNICODE_CI, B: DECIMAL(1,0)>>"),
+      SchemaOfVariantTestCase("[{\"A\":\"x\",\"B\":-1.00}]", "UNICODE_CI_RTRIM",
+        "ARRAY<OBJECT<A: STRING COLLATE UNICODE_CI_RTRIM, B: DECIMAL(1,0)>>")
     )
 
     // Supported collations
@@ -1677,11 +1929,18 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       SchemaOfVariantAggTestCase("('1'), ('2'), ('3')", "UTF8_BINARY", "BIGINT"),
+      SchemaOfVariantAggTestCase("('1'), ('2'), ('3')", "UTF8_BINARY_RTRIM", "BIGINT"),
       SchemaOfVariantAggTestCase("('true'), ('false'), ('true')", "UTF8_LCASE", "BOOLEAN"),
+      SchemaOfVariantAggTestCase("('true'), ('false'), ('true')", "UTF8_LCASE_RTRIM", "BOOLEAN"),
       SchemaOfVariantAggTestCase("('{\"a\": 1}'), ('{\"b\": true}'), ('{\"c\": 1.23}')",
         "UNICODE", "OBJECT<a: BIGINT, b: BOOLEAN, c: DECIMAL(3,2)>"),
+      SchemaOfVariantAggTestCase("('{\"a\": 1}'), ('{\"b\": true}'), ('{\"c\": 1.23}')",
+        "UNICODE_RTRIM", "OBJECT<a: BIGINT, b: BOOLEAN, c: DECIMAL(3,2)>"),
       SchemaOfVariantAggTestCase("('{\"A\": \"x\"}'), ('{\"B\": 9.99}'), ('{\"C\": 0}')",
-        "UNICODE_CI", "OBJECT<A: STRING COLLATE UNICODE_CI, B: DECIMAL(3,2), C: BIGINT>")
+        "UNICODE_CI", "OBJECT<A: STRING COLLATE UNICODE_CI, B: DECIMAL(3,2), C: BIGINT>"),
+      SchemaOfVariantAggTestCase("('{\"A\": \"x\"}'), ('{\"B\": 9.99}'), ('{\"C\": 0}')",
+        "UNICODE_CI_RTRIM", "OBJECT<A: STRING COLLATE UNICODE_CI_RTRIM, B: DECIMAL(3,2), C: BIGINT>"
+      )
     )
 
     // Supported collations
@@ -1701,7 +1960,16 @@ class CollationSQLExpressionsSuite
 
   test("Support InputFileName expression with collation") {
     // Supported collations
-    Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI", "MT_CI_AI").foreach(collationName => {
+    Seq(
+      "UTF8_BINARY",
+      "UTF8_BINARY_RTRIM",
+      "UTF8_LCASE",
+      "UTF8_LCASE_RTRIM",
+      "UNICODE",
+      "UNICODE_RTRIM",
+      "UNICODE_CI",
+      "UNICODE_CI_RTRIM",
+      "MT_CI_AI").foreach(collationName => {
       val query =
         s"""
            |select input_file_name()
@@ -1720,9 +1988,13 @@ class CollationSQLExpressionsSuite
     case class DateFormatTestCase[R](date: String, format: String, collation: String, result: R)
     val testCases = Seq(
       DateFormatTestCase("2021-01-01", "yyyy-MM-dd", "UTF8_BINARY", "2021-01-01"),
+      DateFormatTestCase("2021-01-01", "yyyy-MM-dd", "UTF8_BINARY_RTRIM", "2021-01-01"),
       DateFormatTestCase("2021-01-01", "yyyy-dd", "UTF8_LCASE", "2021-01"),
+      DateFormatTestCase("2021-01-01", "yyyy-dd", "UTF8_LCASE_RTRIM", "2021-01"),
       DateFormatTestCase("2021-01-01", "yyyy-MM-dd", "UNICODE", "2021-01-01"),
-      DateFormatTestCase("2021-01-01", "yyyy", "UNICODE_CI", "2021")
+      DateFormatTestCase("2021-01-01", "yyyy-MM-dd", "UNICODE_RTRIM", "2021-01-01"),
+      DateFormatTestCase("2021-01-01", "yyyy", "UNICODE_CI", "2021"),
+      DateFormatTestCase("2021-01-01", "yyyy", "UNICODE_CI_RTRIM", "2021")
     )
 
     for {
@@ -1749,7 +2021,16 @@ class CollationSQLExpressionsSuite
   }
 
   test("Support mode for string expression with collation - Basic Test") {
-    Seq("utf8_binary", "UTF8_LCASE", "unicode_ci", "unicode", "NL_AI").foreach { collationId =>
+    Seq(
+      "utf8_binary",
+      "utf8_binary_rtrim",
+      "UTF8_LCASE",
+      "UTF8_LCASE_RTRIM",
+      "unicode_ci",
+      "unicode_ci_rtrim",
+      "unicode",
+      "unicode_rtrim",
+      "NL_AI").foreach { collationId =>
       val query = s"SELECT mode(collate('abc', '${collationId}'))"
       checkAnswer(sql(query), Row("abc"))
       assert(sql(query).schema.fields.head.dataType.sameType(StringType(collationId)))
@@ -1760,9 +2041,13 @@ class CollationSQLExpressionsSuite
     case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
     val testCases = Seq(
       ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("utf8_binary_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
       ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("UTF8_LCASE_RTRIM", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
       ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode_ci_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
       ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("unicode_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
       ModeTestCase("SR", Map("c" -> 3L, "č" -> 2L, "Č" -> 2L), "c")
     )
     testCases.foreach(t => {
@@ -1797,9 +2082,14 @@ class CollationSQLExpressionsSuite
 
     val testCasesUTF8String = Seq(
       UTF8StringModeTestCase("utf8_binary", bufferValuesUTF8String, "a"),
+      UTF8StringModeTestCase("utf8_binary_rtrim", bufferValuesUTF8String, "a"),
       UTF8StringModeTestCase("UTF8_LCASE", bufferValuesUTF8String, "b"),
+      UTF8StringModeTestCase("UTF8_LCASE_RTRIM", bufferValuesUTF8String, "b"),
       UTF8StringModeTestCase("unicode_ci", bufferValuesUTF8String, "b"),
-      UTF8StringModeTestCase("unicode", bufferValuesUTF8String, "a"))
+      UTF8StringModeTestCase("unicode_ci_rtrim", bufferValuesUTF8String, "b"),
+      UTF8StringModeTestCase("unicode", bufferValuesUTF8String, "a"),
+      UTF8StringModeTestCase("unicode_rtrim", bufferValuesUTF8String, "a")
+    )
 
     testCasesUTF8String.foreach ( t => {
       val buffer = new OpenHashMap[AnyRef, Long](5)
@@ -1827,9 +2117,13 @@ class CollationSQLExpressionsSuite
     }
     val testCasesUTF8String = Seq(
       UTF8StringModeTestCase("utf8_binary", bufferValuesComplex, "[a,a,a]"),
+      UTF8StringModeTestCase("utf8_binary_rtrim", bufferValuesComplex, "[a,a,a]"),
       UTF8StringModeTestCase("UTF8_LCASE", bufferValuesComplex, "[b,b,b]"),
+      UTF8StringModeTestCase("UTF8_LCASE_rtrim", bufferValuesComplex, "[b,b,b]"),
       UTF8StringModeTestCase("unicode_ci", bufferValuesComplex, "[b,b,b]"),
-      UTF8StringModeTestCase("unicode", bufferValuesComplex, "[a,a,a]"))
+      UTF8StringModeTestCase("unicode_ci_rtrim", bufferValuesComplex, "[b,b,b]"),
+      UTF8StringModeTestCase("unicode", bufferValuesComplex, "[a,a,a]"),
+      UTF8StringModeTestCase("unicode_rtrim", bufferValuesComplex, "[a,a,a]"))
 
     testCasesUTF8String.foreach { t =>
       val buffer = new OpenHashMap[AnyRef, Long](5)
@@ -1847,9 +2141,13 @@ class CollationSQLExpressionsSuite
     case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
     val testCases = Seq(
       ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("utf8_binary_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
       ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("UTF8_LCASE_RTRIM", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
       ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
-      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+      ModeTestCase("unicode_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode_ci_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
     )
     testCases.foreach(t => {
       val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
@@ -1872,9 +2170,13 @@ class CollationSQLExpressionsSuite
     case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
     val testCases = Seq(
       ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("utf8_binary_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
       ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("UTF8_LCASE_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
       ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
-      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+      ModeTestCase("unicode_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode_ci_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
     )
     testCases.foreach { t =>
       val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
@@ -1897,9 +2199,13 @@ class CollationSQLExpressionsSuite
     case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
     val testCases = Seq(
       ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("utf8_binary_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
       ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("UTF8_LCASE_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
       ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
-      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+      ModeTestCase("unicode_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode_ci_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
     )
     testCases.foreach { t =>
       val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
@@ -1923,9 +2229,13 @@ class CollationSQLExpressionsSuite
     case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
     val testCases = Seq(
       ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("utf8_binary_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
       ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("UTF8_LCASE_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
       ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
-      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+      ModeTestCase("unicode_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode_ci_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
     )
     testCases.foreach { t =>
       val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
@@ -1949,9 +2259,13 @@ class CollationSQLExpressionsSuite
     case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
     val testCases = Seq(
       ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("utf8_binary_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
       ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("UTF8_LCASE_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
       ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
-      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+      ModeTestCase("unicode_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode_ci_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
     )
     testCases.foreach { t =>
       val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
@@ -1976,8 +2290,11 @@ class CollationSQLExpressionsSuite
     case class ModeTestCase(collationId: String, bufferValues: Map[String, Long], result: String)
     Seq(
       ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{a -> 1}"),
+      ModeTestCase("utf8_binary_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{a -> 1}"),
       ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{a -> 1}"),
+      ModeTestCase("unicode_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{a -> 1}"),
       ModeTestCase("utf8_lcase", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{b -> 1}"),
+      ModeTestCase("utf8_lcase_rtrim", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{b -> 1}"),
       ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{b -> 1}")
     ).foreach { t1 =>
       def getValuesToAdd(t: ModeTestCase): String = {
@@ -1991,9 +2308,11 @@ class CollationSQLExpressionsSuite
       }
       val tableName = s"t_${t1.collationId}_mode_nested_map_struct1"
       withTable(tableName) {
-        sql(s"CREATE TABLE ${tableName}(" +
-          s"i STRUCT<m1: MAP<STRING COLLATE ${t1.collationId}, INT>>) USING parquet")
-        sql(s"INSERT INTO ${tableName} VALUES ${getValuesToAdd(t1)}")
+        withSQLConf(SQLConf.ALLOW_COLLATIONS_IN_MAP_KEYS.key -> "true") {
+          sql(s"CREATE TABLE ${tableName}(" +
+            s"i STRUCT<m1: MAP<STRING COLLATE ${t1.collationId}, INT>>) USING parquet")
+          sql(s"INSERT INTO ${tableName} VALUES ${getValuesToAdd(t1)}")
+        }
         val query = "SELECT lower(cast(mode(i).m1 as string))" +
           s" FROM ${tableName}"
         val queryResult = sql(query)
@@ -2006,7 +2325,12 @@ class CollationSQLExpressionsSuite
     for {
       collateKey <- Seq(true, false)
       collateVal <- Seq(true, false)
-      defaultCollation <- Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE")
+      defaultCollation <- Seq(
+        "UTF8_BINARY",
+        "UTF8_BINARY_RTRIM",
+        "UTF8_LCASE",
+        "UTF8_LCASE_RTRIM",
+        "UNICODE")
     } {
       val mapKey = if (collateKey) "'a' collate utf8_lcase" else "'a'"
       val mapVal = if (collateVal) "'b' collate utf8_lcase" else "'b'"
@@ -2014,11 +2338,11 @@ class CollationSQLExpressionsSuite
       val queryExtractor = s"select collation(map($mapKey, $mapVal)[$mapKey])"
       val queryElementAt = s"select collation(element_at(map($mapKey, $mapVal), $mapKey))"
 
-      checkAnswer(sql(queryExtractor), Row(collation))
-      checkAnswer(sql(queryElementAt), Row(collation))
+      checkAnswer(sql(queryExtractor), Row(fullyQualifiedPrefix + collation))
+      checkAnswer(sql(queryElementAt), Row(fullyQualifiedPrefix + collation))
 
       withSQLConf(SqlApiConf.DEFAULT_COLLATION -> defaultCollation) {
-        val res = if (collateVal) "UTF8_LCASE" else defaultCollation
+        val res = fullyQualifiedPrefix + (if (collateVal) "UTF8_LCASE" else defaultCollation)
         checkAnswer(sql(queryExtractor), Row(res))
         checkAnswer(sql(queryElementAt), Row(res))
       }
@@ -2403,7 +2727,8 @@ class CollationSQLExpressionsSuite
         "a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary", true),
       ReflectExpressions("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary",
         "A5Cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary", false),
-
+      ReflectExpressions("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary",
+        "a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary_rtrim", true),
       ReflectExpressions("A5cf6C42-0C85-418f-af6c-3E4E5b1328f2", "utf8_binary",
         "a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_lcase", true),
       ReflectExpressions("A5cf6C42-0C85-418f-af6c-3E4E5b1328f2", "utf8_binary",
@@ -3149,14 +3474,22 @@ class CollationSQLExpressionsSuite
     )
 
     val testCases = Seq(
-      HyperLogLogPlusPlusTestCase("utf8_binary", Seq("a", "a", "A", "z", "zz", "ZZ", "w", "AA",
-        "aA", "Aa", "aa"), Seq(Row(10))),
-      HyperLogLogPlusPlusTestCase("utf8_lcase", Seq("a", "a", "A", "z", "zz", "ZZ", "w", "AA",
-        "aA", "Aa", "aa"), Seq(Row(5))),
+      HyperLogLogPlusPlusTestCase("utf8_binary", Seq("a", "a", "A", "z", "zz", "ZZ", "w",
+        "AA", "aA", "Aa", "aa"), Seq(Row(10))),
+      HyperLogLogPlusPlusTestCase("utf8_binary_rtrim", Seq("a ", "a", "a", "A", "z", "zz", "ZZ",
+        "w", "AA", "aA", "Aa", "aa"), Seq(Row(10))),
+      HyperLogLogPlusPlusTestCase("utf8_lcase", Seq("a", "a", "A", "z", "zz", "ZZ", "w",
+        "AA", "aA", "Aa", "aa"), Seq(Row(5))),
+      HyperLogLogPlusPlusTestCase("utf8_lcase_rtrim", Seq("a ", "a", "a", "A", "z", "zz", "ZZ", "w",
+        "AA", "aA", "Aa", "aa"), Seq(Row(5))),
       HyperLogLogPlusPlusTestCase("UNICODE", Seq("a", "a", "A", "z", "zz", "ZZ", "w", "AA",
         "aA", "Aa", "aa"), Seq(Row(9))),
+      HyperLogLogPlusPlusTestCase("UNICODE_RTRIM", Seq("a ", "a", "a", "A", "z", "zz", "ZZ", "w",
+        "AA", "aA", "Aa", "aa"), Seq(Row(9))),
       HyperLogLogPlusPlusTestCase("UNICODE_CI", Seq("a", "a", "A", "z", "zz", "ZZ", "w", "AA",
-        "aA", "Aa", "aa"), Seq(Row(5)))
+        "aA", "Aa", "aa"), Seq(Row(5))),
+      HyperLogLogPlusPlusTestCase("UNICODE_CI_RTRIM", Seq("a ", "a", "a", "A", "z", "zz", "ZZ", "w",
+        "AA", "aA", "Aa", "aa"), Seq(Row(5)))
     )
 
     testCases.foreach( t => {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLRegexpSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLRegexpSuite.scala
index 5bb8511d0d935..8d831e4ca1668 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLRegexpSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLRegexpSuite.scala
@@ -111,15 +111,17 @@ class CollationSQLRegexpSuite
     }
     val tableNameLcase = "T_LCASE"
     withTable(tableNameLcase) {
-      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UTF8_LCASE") {
-        sql(s"CREATE TABLE IF NOT EXISTS $tableNameLcase(c STRING) using PARQUET")
-        sql(s"INSERT INTO $tableNameLcase(c) VALUES('ABC')")
-        checkAnswer(sql(s"select c like 'ab%' FROM $tableNameLcase"), Row(true))
-        checkAnswer(sql(s"select c like '%bc' FROM $tableNameLcase"), Row(true))
-        checkAnswer(sql(s"select c like 'a%c' FROM $tableNameLcase"), Row(true))
-        checkAnswer(sql(s"select c like '%b%' FROM $tableNameLcase"), Row(true))
-        checkAnswer(sql(s"select c like 'abc' FROM $tableNameLcase"), Row(true))
-      }
+      sql(s"""
+           |CREATE TABLE IF NOT EXISTS $tableNameLcase(
+           |  c STRING COLLATE UTF8_LCASE
+           |) using PARQUET
+           |""".stripMargin)
+      sql(s"INSERT INTO $tableNameLcase(c) VALUES('ABC')")
+      checkAnswer(sql(s"select c like 'ab%' FROM $tableNameLcase"), Row(true))
+      checkAnswer(sql(s"select c like '%bc' FROM $tableNameLcase"), Row(true))
+      checkAnswer(sql(s"select c like 'a%c' FROM $tableNameLcase"), Row(true))
+      checkAnswer(sql(s"select c like '%b%' FROM $tableNameLcase"), Row(true))
+      checkAnswer(sql(s"select c like 'abc' FROM $tableNameLcase"), Row(true))
     }
   }
 
@@ -448,7 +450,8 @@ class CollationSQLRegexpSuite
         },
         condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
-          "sqlExpr" -> "\"regexp_replace(collate(ABCDE, UNICODE_CI), .c., FFF, 1)\"",
+          "sqlExpr" ->
+            """"regexp_replace(collate(ABCDE, UNICODE_CI), .c., 'FFF' collate UNICODE_CI, 1)"""",
           "paramIndex" -> "first",
           "inputSql" -> "\"collate(ABCDE, UNICODE_CI)\"",
           "inputType" -> "\"STRING COLLATE UNICODE_CI\"",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala
index 2a0b84c075079..ee9734ebaa5bb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala
@@ -198,8 +198,10 @@ class CollationStringExpressionsSuite
     checkError(
       exception = intercept[AnalysisException] {
         val expr = StringSplitSQL(
-          Collate(Literal.create("1a2"), "UTF8_BINARY"),
-          Collate(Literal.create("a"), "UTF8_LCASE"))
+          Collate(Literal.create("1a2", StringType("UTF8_BINARY")),
+            ResolvedCollation("UTF8_BINARY")),
+          Collate(Literal.create("a", StringType("UTF8_BINARY")),
+            ResolvedCollation("UTF8_LCASE")))
         CollationTypeCasts.transform(expr)
       },
       condition = "COLLATION_MISMATCH.EXPLICIT",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
index 1707820053837..1571433a37e16 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
@@ -22,7 +22,6 @@ import scala.jdk.CollectionConverters.MapHasAsJava
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.connector.{DatasourceV2SQLBase, FakeV2ProviderWithCustomSchema}
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTable}
@@ -34,6 +33,7 @@ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec}
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.joins._
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.{SqlApiConf, SQLConf}
 import org.apache.spark.sql.types.{ArrayType, IntegerType, MapType, Metadata, MetadataBuilder, StringType, StructField, StructType}
 
@@ -43,6 +43,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   private val collationPreservingSources = Seq("parquet")
   private val collationNonPreservingSources = Seq("orc", "csv", "json", "text")
   private val allFileBasedDataSources = collationPreservingSources ++  collationNonPreservingSources
+  private val fullyQualifiedPrefix = s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}."
 
   @inline
   private def isSortMergeForced: Boolean = {
@@ -117,7 +118,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     ).foreach { collationName =>
       checkAnswer(
         sql(s"select collation('aaa' collate $collationName)"),
-        Row(collationName.toUpperCase())
+        Row(fullyQualifiedPrefix + collationName.toUpperCase())
       )
     }
   }
@@ -209,7 +210,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   }
 
   test("collation expression returns default collation") {
-    checkAnswer(sql(s"select collation('aaa')"), Row("UTF8_BINARY"))
+    checkAnswer(sql(s"select collation('aaa')"), Row(fullyQualifiedPrefix + "UTF8_BINARY"))
   }
 
   test("invalid collation name throws exception") {
@@ -220,23 +221,54 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       parameters = Map("collationName" -> "UTF8_BS", "proposals" -> "UTF8_LCASE"))
   }
 
+  test("fail on table creation with collated strings as map key") {
+    withTable("table_1", "table_2") {
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("CREATE TABLE table_1 (col MAP<STRING COLLATE UNICODE, STRING>) USING parquet")
+        },
+        condition = "UNSUPPORTED_FEATURE.COLLATIONS_IN_MAP_KEYS"
+      )
+      withSQLConf(SQLConf.ALLOW_COLLATIONS_IN_MAP_KEYS.key -> "true") {
+        sql("CREATE TABLE table_2 (col MAP<STRING COLLATE UNICODE, STRING>) USING parquet")
+      }
+    }
+  }
+
+  test("fail on adding column with collated map key") {
+    withTable("table_1") {
+      sql("CREATE TABLE table_1 (id INTEGER) USING parquet")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("ALTER TABLE table_1 ADD COLUMN col1 MAP<ARRAY<STRING COLLATE UNICODE>, INTEGER>")
+        },
+        condition = "UNSUPPORTED_FEATURE.COLLATIONS_IN_MAP_KEYS"
+      )
+      withSQLConf(SQLConf.ALLOW_COLLATIONS_IN_MAP_KEYS.key -> "true") {
+        sql("ALTER TABLE table_1 ADD COLUMN col1 MAP<ARRAY<STRING COLLATE UNICODE>, INTEGER>")
+      }
+    }
+  }
+
   test("disable bucketing on collated string column") {
     def createTable(bucketColumns: String*): Unit = {
       val tableName = "test_partition_tbl"
       withTable(tableName) {
-        sql(
-          s"""
-             |CREATE TABLE $tableName (
-             |  id INT,
-             |  c1 STRING COLLATE UNICODE,
-             |  c2 STRING,
-             |  struct_col STRUCT<col1: STRING COLLATE UNICODE, col2: STRING>,
-             |  array_col ARRAY<STRING COLLATE UNICODE>,
-             |  map_col MAP<STRING COLLATE UNICODE, STRING>
-             |) USING parquet
-             |CLUSTERED BY (${bucketColumns.mkString(",")})
-             |INTO 4 BUCKETS""".stripMargin
-        )
+        withSQLConf(SQLConf.ALLOW_COLLATIONS_IN_MAP_KEYS.key -> "true") {
+          sql(
+            s"""
+               |CREATE TABLE $tableName (
+               |  id INT,
+               |  c1 STRING COLLATE UNICODE,
+               |  c2 STRING,
+               |  struct_col STRUCT<col1: STRING COLLATE UNICODE, col2: STRING>,
+               |  array_col ARRAY<STRING COLLATE UNICODE>,
+               |  map_col MAP<STRING COLLATE UNICODE, STRING>
+               |) USING parquet
+               |CLUSTERED BY (${bucketColumns.mkString(",")})
+               |INTO 4 BUCKETS""".stripMargin
+          )
+        }
       }
     }
     // should work fine on default collated columns
@@ -477,7 +509,8 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         sql(s"INSERT INTO $tableName VALUES ('aaa')")
         sql(s"INSERT INTO $tableName VALUES ('AAA')")
 
-        checkAnswer(sql(s"SELECT DISTINCT COLLATION(c1) FROM $tableName"), Seq(Row(collationName)))
+        checkAnswer(sql(s"SELECT DISTINCT COLLATION(c1) FROM $tableName"),
+          Seq(Row(fullyQualifiedPrefix + collationName)))
         assert(sql(s"select c1 FROM $tableName").schema.head.dataType == StringType(collationId))
       }
     }
@@ -501,7 +534,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         checkAnswer(readback, Row("aaa"))
         checkAnswer(
           readback.selectExpr(s"collation(${readback.columns.head})"),
-          Row(readbackCollation))
+          Row(fullyQualifiedPrefix + readbackCollation))
       }
     }
   }
@@ -523,7 +556,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       sql(s"INSERT INTO $tableName VALUES ('AAA')")
 
       checkAnswer(sql(s"SELECT DISTINCT COLLATION(c1) FROM $tableName"),
-        Seq(Row(defaultCollation)))
+        Seq(Row(fullyQualifiedPrefix + defaultCollation)))
 
       sql(
         s"""
@@ -535,7 +568,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       sql(s"INSERT INTO $tableName VALUES ('AAA', 'AAA')")
 
       checkAnswer(sql(s"SELECT DISTINCT COLLATION(c2) FROM $tableName"),
-        Seq(Row(collationName)))
+        Seq(Row(fullyQualifiedPrefix + collationName)))
       assert(sql(s"select c2 FROM $tableName").schema.head.dataType == StringType(collationId))
     }
   }
@@ -558,7 +591,8 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       sql(s"ALTER TABLE $tableName ALTER COLUMN c4.t TYPE STRING COLLATE UNICODE")
       checkAnswer(sql(s"SELECT collation(c1), collation(c2[0]), " +
         s"collation(c3[1]), collation(c4.t) FROM $tableName"),
-        Seq(Row("UTF8_LCASE", "UNICODE_CI", "UTF8_BINARY", "UNICODE")))
+        Seq(Row(fullyQualifiedPrefix + "UTF8_LCASE", fullyQualifiedPrefix + "UNICODE_CI",
+          fullyQualifiedPrefix + "UTF8_BINARY", fullyQualifiedPrefix + "UNICODE")))
     }
   }
 
@@ -662,6 +696,11 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         s"IN (COLLATE('aa', 'UTF8_LCASE'))"), Seq(Row("a"), Row("A")))
       checkAnswer(sql(s"SELECT c1 FROM $tableName where (c1 || 'a') " +
         s"IN (COLLATE('aa', 'UTF8_BINARY'))"), Seq(Row("a")))
+      checkAnswer(sql(s"SELECT c1 FROM $tableName where c1 || 'a' " +
+        s"IN (COLLATE('aa', 'UTF8_LCASE_RTRIM'))"), Seq(Row("a"), Row("A")))
+      checkAnswer(sql(s"SELECT c1 FROM $tableName where (c1 || 'a') " +
+        s"IN (COLLATE('aa', 'UTF8_BINARY_RTRIM'))"), Seq(Row("a")))
+
 
       // columns have different collation
       checkError(
@@ -772,6 +811,16 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         )
       )
 
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT array('A', 'a' COLLATE UNICODE) == array('b' COLLATE UNICODE_CI_RTRIM)")
+        },
+        condition = "COLLATION_MISMATCH.EXPLICIT",
+        parameters = Map(
+          "explicitTypes" -> """"STRING COLLATE UNICODE", "STRING COLLATE UNICODE_CI_RTRIM""""
+        )
+      )
+
       checkError(
         exception = intercept[AnalysisException] {
           sql("SELECT array_join(array('a', 'b' collate UNICODE), 'c' collate UNICODE_CI)")
@@ -829,7 +878,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         """EXECUTE IMMEDIATE stmtStr1 USING
           | 'a' AS var1,
           | 'b' AS var2;""".stripMargin),
-      Seq(Row("UTF8_BINARY"))
+      Seq(Row(fullyQualifiedPrefix + "UTF8_BINARY"))
     )
 
     withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
@@ -838,7 +887,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
           """EXECUTE IMMEDIATE stmtStr1 USING
             | 'a' AS var1,
             | 'b' AS var2;""".stripMargin),
-        Seq(Row("UNICODE"))
+        Seq(Row(fullyQualifiedPrefix + "UNICODE"))
       )
     }
 
@@ -846,7 +895,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       sql(
         """EXECUTE IMMEDIATE stmtStr2 USING
           | 'a' AS var1;""".stripMargin),
-      Seq(Row("UNICODE"))
+      Seq(Row(fullyQualifiedPrefix + "UNICODE"))
     )
 
     withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
@@ -854,7 +903,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         sql(
           """EXECUTE IMMEDIATE stmtStr2 USING
             | 'a' AS var1;""".stripMargin),
-        Seq(Row("UNICODE"))
+        Seq(Row(fullyQualifiedPrefix + "UNICODE"))
       )
     }
   }
@@ -941,7 +990,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       sql(s"INSERT INTO $tableName VALUES ('a'), ('A')")
 
       checkAnswer(sql(s"SELECT DISTINCT COLLATION(c1) FROM $tableName"),
-        Seq(Row(collationName)))
+        Seq(Row(fullyQualifiedPrefix + collationName)))
       assert(sql(s"select c1 FROM $tableName").schema.head.dataType == StringType(collationId))
     }
   }
@@ -1053,69 +1102,46 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
 
   test("SPARK-47431: Default collation set to UNICODE, literal test") {
     withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
-      checkAnswer(sql(s"SELECT collation('aa')"), Seq(Row("UNICODE")))
+      checkAnswer(sql(s"SELECT collation('aa')"), Seq(Row(fullyQualifiedPrefix + "UNICODE")))
     }
   }
 
-  test("SPARK-47972: Cast expression limitation for collations") {
-    checkError(
-      exception = intercept[ParseException]
-        (sql("SELECT cast(1 as string collate unicode)")),
-      condition = "UNSUPPORTED_DATATYPE",
-      parameters = Map(
-        "typeName" -> toSQLType(StringType("UNICODE"))),
-      context =
-        ExpectedContext(fragment = s"cast(1 as string collate unicode)", start = 7, stop = 39)
-    )
+  test("Cast expression for collations") {
+    checkAnswer(
+      sql(s"SELECT collation(cast('a' as string collate utf8_lcase))"),
+      Seq(Row(fullyQualifiedPrefix + "UTF8_LCASE")))
 
-    checkError(
-      exception = intercept[ParseException]
-        (sql("SELECT 'A' :: string collate unicode")),
-      condition = "UNSUPPORTED_DATATYPE",
-      parameters = Map(
-        "typeName" -> toSQLType(StringType("UNICODE"))),
-      context = ExpectedContext(fragment = s"'A' :: string collate unicode", start = 7, stop = 35)
-    )
+    checkAnswer(
+      sql(s"SELECT collation('a' :: string collate utf8_lcase)"),
+      Seq(Row(fullyQualifiedPrefix + "UTF8_LCASE")))
 
     checkAnswer(sql(s"SELECT cast(1 as string)"), Seq(Row("1")))
     checkAnswer(sql(s"SELECT cast('A' as string)"), Seq(Row("A")))
 
     withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
-      checkError(
-        exception = intercept[ParseException]
-          (sql("SELECT cast(1 as string collate unicode)")),
-        condition = "UNSUPPORTED_DATATYPE",
-        parameters = Map(
-          "typeName" -> toSQLType(StringType("UNICODE"))),
-        context =
-          ExpectedContext(fragment = s"cast(1 as string collate unicode)", start = 7, stop = 39)
-      )
-
+      checkAnswer(
+        sql(s"SELECT collation(cast(1 as string collate unicode))"),
+        Seq(Row(fullyQualifiedPrefix + "UNICODE")))
       checkAnswer(sql(s"SELECT cast(1 as string)"), Seq(Row("1")))
-      checkAnswer(sql(s"SELECT collation(cast(1 as string))"), Seq(Row("UNICODE")))
+      checkAnswer(sql(s"SELECT collation(cast(1 as string))"),
+        Seq(Row(fullyQualifiedPrefix + "UNICODE")))
     }
   }
 
-  test("SPARK-47431: Default collation set to UNICODE, column type test") {
-    withTable("t") {
-      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
-        sql(s"CREATE TABLE t(c1 STRING) USING PARQUET")
-        sql(s"INSERT INTO t VALUES ('a')")
-        checkAnswer(sql(s"SELECT collation(c1) FROM t"), Seq(Row("UNICODE")))
-      }
-    }
-  }
+  test("cast using the dataframe api") {
+    val tableName = "cast_table"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (name STRING COLLATE UTF8_LCASE) USING PARQUET")
 
-  test("SPARK-47431: Create table with UTF8_BINARY, make sure collation persists on read") {
-    withTable("t") {
-      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UTF8_BINARY") {
-        sql("CREATE TABLE t(c1 STRING) USING PARQUET")
-        sql("INSERT INTO t VALUES ('a')")
-        checkAnswer(sql("SELECT collation(c1) FROM t"), Seq(Row("UTF8_BINARY")))
-      }
-      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
-        checkAnswer(sql("SELECT collation(c1) FROM t"), Seq(Row("UTF8_BINARY")))
-      }
+      var df = spark.read.table(tableName)
+        .withColumn("name", col("name").cast("STRING COLLATE UNICODE"))
+
+      assert(df.schema.fields.head.dataType === StringType("UNICODE"))
+
+      df = spark.read.table(tableName)
+        .withColumn("name", col("name").cast("STRING COLLATE UTF8_BINARY"))
+
+      assert(df.schema.fields.head.dataType === StringType)
     }
   }
 
@@ -1143,7 +1169,9 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     }
     // map doesn't support aggregation
     withTable(table) {
-      sql(s"create table $table (m map<string collate utf8_lcase, string>) using parquet")
+      withSQLConf(SQLConf.ALLOW_COLLATIONS_IN_MAP_KEYS.key -> "true") {
+        sql(s"create table $table (m map<string collate utf8_lcase, string>) using parquet")
+      }
       val query = s"select distinct m from $table"
       checkError(
         exception = intercept[ExtendedAnalysisException](sql(query)),
@@ -1185,8 +1213,10 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     }
     // map doesn't support joins
     withTable(tableLeft, tableRight) {
-      Seq(tableLeft, tableRight).map(tab =>
-        sql(s"create table $tab (m map<string collate utf8_lcase, string>) using parquet"))
+      withSQLConf(SQLConf.ALLOW_COLLATIONS_IN_MAP_KEYS.key -> "true") {
+        Seq(tableLeft, tableRight).map(tab =>
+          sql(s"create table $tab (m map<string collate utf8_lcase, string>) using parquet"))
+      }
       val query =
         s"select $tableLeft.m from $tableLeft join $tableRight on $tableLeft.m = $tableRight.m"
       val ctx = s"$tableLeft.m = $tableRight.m"
@@ -1437,7 +1467,10 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         val tableName = "t"
 
         withTable(tableName) {
-          withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codeGen) {
+          withSQLConf(
+            SQLConf.CODEGEN_FACTORY_MODE.key -> codeGen,
+            SQLConf.ALLOW_COLLATIONS_IN_MAP_KEYS.key -> "true"
+          ) {
             sql(s"create table $tableName" +
               s" (m map<string$collationSetup, string$collationSetup>)")
             sql(s"insert into $tableName values (map('aaa', 'AAA'))")
@@ -1462,7 +1495,10 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         val tableName = "t"
 
         withTable(tableName) {
-          withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codeGen) {
+          withSQLConf(
+            SQLConf.CODEGEN_FACTORY_MODE.key -> codeGen,
+            SQLConf.ALLOW_COLLATIONS_IN_MAP_KEYS.key -> "true"
+          ) {
             sql(s"create table $tableName" +
               s" (m map<struct<fld1: string$collationSetup, fld2: string$collationSetup>, " +
               s"struct<fld1: string$collationSetup, fld2: string$collationSetup>>)")
@@ -1489,7 +1525,10 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         val tableName = "t"
 
         withTable(tableName) {
-          withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codeGen) {
+          withSQLConf(
+            SQLConf.CODEGEN_FACTORY_MODE.key -> codeGen,
+            SQLConf.ALLOW_COLLATIONS_IN_MAP_KEYS.key -> "true"
+          ) {
             sql(s"create table $tableName " +
               s"(m map<array<string$collationSetup>, array<string$collationSetup>>)")
             sql(s"insert into $tableName values (map(array('aaa', 'bbb'), array('ccc', 'ddd')))")
@@ -1512,7 +1551,10 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       test(s"Check that order by on map with$collationSetup strings fails ($codeGen)") {
         val tableName = "t"
         withTable(tableName) {
-          withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codeGen) {
+          withSQLConf(
+            SQLConf.CODEGEN_FACTORY_MODE.key -> codeGen,
+            SQLConf.ALLOW_COLLATIONS_IN_MAP_KEYS.key -> "true"
+          ) {
             sql(s"create table $tableName" +
               s" (m map<string$collationSetup, string$collationSetup>, " +
               s"  c integer)")
@@ -2087,4 +2129,49 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
       Seq(Row("UTF8_BINARY"), Row("UTF8_LCASE")))
   }
+
+  test("fully qualified name") {
+    Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI_AI").foreach { collation =>
+      // Make sure that the collation expression returns the correct fully qualified name.
+      val df = sql(s"SELECT collation('a' collate $collation)")
+      checkAnswer(df,
+        Seq(Row(s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}.$collation")))
+
+      // Make sure the user can specify the fully qualified name as a collation name.
+      Seq("contains", "startswith", "endswith").foreach{ binaryFunction =>
+        val dfRegularName = sql(
+          s"SELECT $binaryFunction('a' collate $collation, 'A' collate $collation)")
+        val dfFullyQualifiedName = sql(
+          s"SELECT $binaryFunction('a' collate system.builtin.$collation, 'A' collate $collation)")
+        checkAnswer(dfRegularName, dfFullyQualifiedName)
+      }
+    }
+
+    // Wrong collation names raise a Spark exception.
+    Seq(
+      ("system.builtin2.UTF8_BINARY", "UTF8_BINARY"),
+      ("system.UTF8_BINARY", "UTF8_BINARY"),
+      ("builtin.UTF8_LCASE", "UTF8_LCASE")
+    ).foreach { case(collationName, proposal) =>
+      checkError(
+        exception = intercept[SparkException] {
+          sql(s"SELECT 'a' COLLATE ${collationName}")
+        },
+        condition = "COLLATION_INVALID_NAME",
+        sqlState = "42704",
+        parameters = Map("collationName" -> collationName.split("\\.").last,
+          "proposals" -> proposal))
+    }
+
+    // Case insensitive fully qualified names are supported.
+    checkAnswer(
+      sql("SELECT 'a' collate sYstEm.bUiltIn.utf8_lCAse = 'A'"),
+      Seq(Row(true))
+    )
+
+    // Make sure DDLs can use fully qualified names.
+    withTable("t") {
+      sql(s"CREATE TABLE t (c STRING COLLATE system.builtin.UTF8_LCASE)")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 6348e5f315395..141d6b219f2a7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -39,10 +39,12 @@ import org.apache.spark.sql.test.SQLTestData.DecimalData
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType.{DAY, HOUR, MINUTE, SECOND}
 import org.apache.spark.sql.types.YearMonthIntervalType.{MONTH, YEAR}
+import org.apache.spark.tags.SlowSQLTest
 import org.apache.spark.unsafe.types.CalendarInterval
 
 case class Fact(date: Int, hour: Int, minute: Int, room_name: String, temp: Double)
 
+@SlowSQLTest
 class DataFrameAggregateSuite extends QueryTest
   with SharedSparkSession
   with AdaptiveSparkPlanHelper {
@@ -621,6 +623,41 @@ class DataFrameAggregateSuite extends QueryTest
     )
   }
 
+  test("listagg function") {
+    // normal case
+    val df = Seq(("a", "b"), ("b", "c"), ("c", "d")).toDF("a", "b")
+    checkAnswer(
+      df.selectExpr("listagg(a)", "listagg(b)"),
+      Seq(Row("abc", "bcd"))
+    )
+    checkAnswer(
+      df.select(listagg($"a"), listagg($"b")),
+      Seq(Row("abc", "bcd"))
+    )
+
+    // distinct case
+    val df2 = Seq(("a", "b"), ("a", "b"), ("b", "d")).toDF("a", "b")
+    checkAnswer(
+      df2.select(listagg_distinct($"a"), listagg_distinct($"b")),
+      Seq(Row("ab", "bd"))
+    )
+
+    // null case
+    val df3 = Seq(("a", "b", null), ("a", "b", null), (null, null, null)).toDF("a", "b", "c")
+    checkAnswer(
+      df3.select(listagg_distinct($"a"), listagg($"a"), listagg_distinct($"b"), listagg($"b"),
+        listagg($"c")),
+      Seq(Row("a", "aa", "b", "bb", null))
+    )
+
+    // custom delimiter
+    val df4 = Seq(("a", "b"), ("b", "c"), ("c", "d")).toDF("a", "b")
+    checkAnswer(
+      df4.selectExpr("listagg(a, '|')", "listagg(b, '|')"),
+      Seq(Row("a|b|c", "b|c|d"))
+    )
+  }
+
   test("SPARK-31500: collect_set() of BinaryType returns duplicate elements") {
     val bytesTest1 = "test1".getBytes
     val bytesTest2 = "test2".getBytes
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
index 48ea0e01a4372..8024b579e5d0c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
@@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.expressions.objects.MapObjects
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{ArrayType, BooleanType, Decimal, DoubleType, IntegerType, MapType, StringType, StructField, StructType}
@@ -92,8 +91,8 @@ class DataFrameComplexTypeSuite extends QueryTest with SharedSparkSession {
 
       // items: Seq[Int] => items.map { item => Seq(Struct(item)) }
       val result = df.select(
-        column(MapObjects(
-          (item: Expression) => array(struct(column(item))).expr,
+        Column(MapObjects(
+          (item: Expression) => array(struct(Column(item))).expr,
           $"items".expr,
           df.schema("items").dataType.asInstanceOf[ArrayType].elementType
         )) as "items"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 975a82e26f4eb..fc6d3023ed072 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -24,7 +24,7 @@ import scala.reflect.runtime.universe.runtimeMirror
 import scala.util.Random
 
 import org.apache.spark.{QueryContextType, SPARK_DOC_ROOT, SparkException, SparkRuntimeException}
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{ExtendedAnalysisException, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, UnaryExpression}
 import org.apache.spark.sql.catalyst.expressions.Cast._
@@ -32,7 +32,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.catalyst.plans.logical.OneRowRelation
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, UTC}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -73,7 +72,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       "sum_distinct", // equivalent to sum(distinct foo)
       "typedLit", "typedlit", // Scala only
       "udaf", "udf", // create function statement in sql
-      "call_function" // moot in SQL as you just call the function directly
+      "call_function", // moot in SQL as you just call the function directly
+      "listagg_distinct", // equivalent to listagg(distinct foo)
+      "string_agg_distinct" // equivalent to string_agg(distinct foo)
     )
 
     val excludedSqlFunctions = Set.empty[String]
@@ -404,7 +405,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         callSitePattern = "",
         startIndex = 0,
         stopIndex = 0))
-    expr = nullifzero(Literal.create(20201231, DateType))
+    expr = nullifzero(Column(Literal.create(20201231, DateType)))
     checkError(
       intercept[AnalysisException](df.select(expr)),
       condition = "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
@@ -457,14 +458,14 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     val df = Seq((0)).toDF("a")
     var expr = randstr(lit(10), lit("a"))
     checkError(
-      intercept[AnalysisException](df.select(expr)),
+      intercept[ExtendedAnalysisException](df.select(expr).collect()),
       condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"randstr(10, a)\"",
         "paramIndex" -> "second",
         "inputSql" -> "\"a\"",
         "inputType" -> "\"STRING\"",
-        "requiredType" -> "INT or SMALLINT"),
+        "requiredType" -> "(\"INT\" or \"BIGINT\")"),
       context = ExpectedContext(
         contextType = QueryContextType.DataFrame,
         fragment = "randstr",
@@ -479,7 +480,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
       parameters = Map(
         "inputName" -> "`length`",
-        "inputType" -> "INT or SMALLINT",
+        "inputType" -> "integer",
         "inputExpr" -> "\"a\"",
         "sqlExpr" -> "\"randstr(a, 10)\""),
       context = ExpectedContext(
@@ -516,7 +517,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         "paramIndex" -> "second",
         "inputSql" -> "\"a\"",
         "inputType" -> "\"STRING\"",
-        "requiredType" -> "integer or floating-point"),
+        "requiredType" -> "\"NUMERIC\""),
       context = ExpectedContext(
         contextType = QueryContextType.DataFrame,
         fragment = "uniform",
@@ -586,7 +587,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         callSitePattern = "",
         startIndex = 0,
         stopIndex = 0))
-    expr = zeroifnull(Literal.create(20201231, DateType))
+    expr = zeroifnull(Column(Literal.create(20201231, DateType)))
     checkError(
       intercept[AnalysisException](df.select(expr)),
       condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
@@ -5735,7 +5736,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     import DataFrameFunctionsSuite.CodegenFallbackExpr
     for ((codegenFallback, wholeStage) <- Seq((true, false), (false, false), (false, true))) {
       val c = if (codegenFallback) {
-        column(CodegenFallbackExpr(v.expr))
+        Column(CodegenFallbackExpr(v.expr))
       } else {
         v
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
index f0ed2241fd286..0e9b1c9d2104e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, AttributeRef
 import org.apache.spark.sql.catalyst.plans.logical.{Expand, Generate, ScriptInputOutputSchema, ScriptTransformation, Window => WindowPlan}
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions.{col, count, explode, sum, year}
-import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.test.SQLTestData.TestData
@@ -375,7 +374,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
       Seq.empty,
       PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,
       true)
-    val df7 = df1.mapInPandas(mapInPandasUDF)
+    val df7 = df1.mapInPandas(Column(mapInPandasUDF))
     val df8 = df7.filter($"x" > 0)
     assertAmbiguousSelfJoin(df7.join(df8, df7("x") === df8("y")))
     assertAmbiguousSelfJoin(df8.join(df7, df7("x") === df8("y")))
@@ -386,7 +385,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
       Seq.empty,
       PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
       true)
-    val df9 = df1.groupBy($"key1").flatMapGroupsInPandas(flatMapGroupsInPandasUDF)
+    val df9 = df1.groupBy($"key1").flatMapGroupsInPandas(Column(flatMapGroupsInPandasUDF))
     val df10 = df9.filter($"x" > 0)
     assertAmbiguousSelfJoin(df9.join(df10, df9("x") === df10("y")))
     assertAmbiguousSelfJoin(df10.join(df9, df9("x") === df10("y")))
@@ -398,7 +397,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
       PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
       true)
     val df11 = df1.groupBy($"key1").flatMapCoGroupsInPandas(
-      df1.groupBy($"key2"), flatMapCoGroupsInPandasUDF)
+      df1.groupBy($"key2"), Column(flatMapCoGroupsInPandasUDF))
     val df12 = df11.filter($"x" > 0)
     assertAmbiguousSelfJoin(df11.join(df12, df11("x") === df12("y")))
     assertAmbiguousSelfJoin(df12.join(df11, df11("x") === df12("y")))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
index 5ff737d2b57cb..9c182be0f7dd6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
@@ -350,6 +350,84 @@ class DataFrameSetOperationsSuite extends QueryTest
     dates.intersect(widenTypedRows).collect()
   }
 
+  test("SPARK-50373 - cannot run set operations with variant type") {
+    val df = sql("select parse_json(case when id = 0 then 'null' else '1' end)" +
+      " as v, id % 5 as id from range(0, 100, 1, 5)")
+    checkError(
+      exception = intercept[AnalysisException](df.intersect(df)),
+      condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_VARIANT_TYPE",
+      parameters = Map(
+        "colName" -> "`v`",
+        "dataType" -> "\"VARIANT\"")
+    )
+    checkError(
+      exception = intercept[AnalysisException](df.except(df)),
+      condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_VARIANT_TYPE",
+      parameters = Map(
+        "colName" -> "`v`",
+        "dataType" -> "\"VARIANT\"")
+    )
+    checkError(
+      exception = intercept[AnalysisException](df.distinct()),
+      condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_VARIANT_TYPE",
+      parameters = Map(
+        "colName" -> "`v`",
+        "dataType" -> "\"VARIANT\""))
+    checkError(
+      exception = intercept[AnalysisException](df.dropDuplicates()),
+      condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_VARIANT_TYPE",
+      parameters = Map(
+        "colName" -> "`v`",
+        "dataType" -> "\"VARIANT\""))
+    withTempView("tv") {
+      df.createOrReplaceTempView("tv")
+      checkError(
+        exception = intercept[AnalysisException](sql("SELECT DISTINCT v FROM tv")),
+        condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_VARIANT_TYPE",
+        parameters = Map(
+          "colName" -> "`v`",
+          "dataType" -> "\"VARIANT\""),
+        context = ExpectedContext(
+          fragment = "SELECT DISTINCT v FROM tv",
+          start = 0,
+          stop = 24)
+      )
+      checkError(
+        exception = intercept[AnalysisException](sql("SELECT DISTINCT STRUCT(v) FROM tv")),
+        condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_VARIANT_TYPE",
+        parameters = Map(
+          "colName" -> "`struct(v)`",
+          "dataType" -> "\"STRUCT<v: VARIANT NOT NULL>\""),
+        context = ExpectedContext(
+          fragment = "SELECT DISTINCT STRUCT(v) FROM tv",
+          start = 0,
+          stop = 32)
+      )
+      checkError(
+        exception = intercept[AnalysisException](sql("SELECT DISTINCT ARRAY(v) FROM tv")),
+        condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_VARIANT_TYPE",
+        parameters = Map(
+          "colName" -> "`array(v)`",
+          "dataType" -> "\"ARRAY<VARIANT>\""),
+        context = ExpectedContext(
+          fragment = "SELECT DISTINCT ARRAY(v) FROM tv",
+          start = 0,
+          stop = 31)
+      )
+      checkError(
+        exception = intercept[AnalysisException](sql("SELECT DISTINCT MAP('m', v) FROM tv")),
+        condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_MAP_TYPE",
+        parameters = Map(
+          "colName" -> "`map(m, v)`",
+          "dataType" -> "\"MAP<STRING, VARIANT>\""),
+        context = ExpectedContext(
+          fragment = "SELECT DISTINCT MAP('m', v) FROM tv",
+          start = 0,
+          stop = 34)
+      )
+    }
+  }
+
   test("SPARK-19893: cannot run set operations with map type") {
     val df = spark.range(1).select(map(lit("key"), $"id").as("m"))
     checkError(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala
index d656c36ce842a..621d468454d40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala
@@ -53,23 +53,15 @@ class DataFrameSubquerySuite extends QueryTest with SharedSparkSession {
     r.createOrReplaceTempView("r")
   }
 
-  test("unanalyzable expression") {
-    val sub = spark.range(1).select($"id" === $"id".outer())
-
-    checkError(
-      intercept[AnalysisException](sub.schema),
-      condition = "UNANALYZABLE_EXPRESSION",
-      parameters = Map("expr" -> "\"outer(id)\""),
-      queryContext =
-        Array(ExpectedContext(fragment = "outer", callSitePattern = getCurrentClassCallSitePattern))
-    )
-
+  test("noop outer()") {
+    checkAnswer(spark.range(1).select($"id".outer()), Row(0))
     checkError(
-      intercept[AnalysisException](sub.encoder),
-      condition = "UNANALYZABLE_EXPRESSION",
-      parameters = Map("expr" -> "\"outer(id)\""),
-      queryContext =
-        Array(ExpectedContext(fragment = "outer", callSitePattern = getCurrentClassCallSitePattern))
+      intercept[AnalysisException](spark.range(1).select($"outer_col".outer()).collect()),
+      "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map("objectName" -> "`outer_col`", "proposal" -> "`id`"),
+      context = ExpectedContext(
+        fragment = "$",
+        callSitePattern = getCurrentClassCallSitePattern)
     )
   }
 
@@ -148,6 +140,64 @@ class DataFrameSubquerySuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("correlated scalar subquery in SELECT with outer() function") {
+    val df1 = spark.table("l").as("t1")
+    val df2 = spark.table("l").as("t2")
+    // We can use the `.outer()` function to wrap either the outer column, or the entire condition,
+    // or the SQL string of the condition.
+    Seq(
+      $"t1.a" === $"t2.a".outer(),
+      ($"t1.a" === $"t2.a").outer(),
+      expr("t1.a = t2.a").outer()).foreach { cond =>
+      checkAnswer(
+        df1.select(
+          $"a",
+          df2.where(cond).select(sum($"b")).scalar().as("sum_b")
+        ),
+        sql("select a, (select sum(b) from l t1 where t1.a = t2.a) sum_b from l t2")
+      )
+    }
+  }
+
+  test("correlated scalar subquery in WHERE with outer() function") {
+    // We can use the `.outer()` function to wrap either the outer column, or the entire condition,
+    // or the SQL string of the condition.
+    Seq(
+      $"a".outer() === $"c",
+      ($"a" === $"c").outer(),
+      expr("a = c").outer()).foreach { cond =>
+      checkAnswer(
+        spark.table("l").where(
+          $"b" < spark.table("r").where(cond).select(max($"d")).scalar()
+        ),
+        sql("select * from l where b < (select max(d) from r where a = c)")
+      )
+    }
+  }
+
+  test("EXISTS predicate subquery with outer() function") {
+    // We can use the `.outer()` function to wrap either the outer column, or the entire condition,
+    // or the SQL string of the condition.
+    Seq(
+      $"a".outer() === $"c",
+      ($"a" === $"c").outer(),
+      expr("a = c").outer()).foreach { cond =>
+      checkAnswer(
+        spark.table("l").where(
+          spark.table("r").where(cond).exists()
+        ),
+        sql("select * from l where exists (select * from r where l.a = r.c)")
+      )
+
+      checkAnswer(
+        spark.table("l").where(
+          spark.table("r").where(cond).exists() && $"a" <= lit(2)
+        ),
+        sql("select * from l where exists (select * from r where l.a = r.c) and l.a <= 2")
+      )
+    }
+  }
+
   test("SPARK-15677: Queries against local relations with scalar subquery in Select list") {
     withTempView("t1", "t2") {
       Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t1")
@@ -192,22 +242,6 @@ class DataFrameSubquerySuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("EXISTS predicate subquery") {
-    checkAnswer(
-      spark.table("l").where(
-        spark.table("r").where($"a".outer() === $"c").exists()
-      ),
-      sql("select * from l where exists (select * from r where l.a = r.c)")
-    )
-
-    checkAnswer(
-      spark.table("l").where(
-        spark.table("r").where($"a".outer() === $"c").exists() && $"a" <= lit(2)
-      ),
-      sql("select * from l where exists (select * from r where l.a = r.c) and l.a <= 2")
-    )
-  }
-
   test("NOT EXISTS predicate subquery") {
     checkAnswer(
       spark.table("l").where(
@@ -244,32 +278,15 @@ class DataFrameSubquerySuite extends QueryTest with SharedSparkSession {
     )
   }
 
-  test("correlated scalar subquery in where") {
+  test("correlated scalar subquery in select (null safe equal)") {
+    val df1 = spark.table("l").as("t1")
+    val df2 = spark.table("l").as("t2")
     checkAnswer(
-      spark.table("l").where(
-        $"b" < spark.table("r").where($"a".outer() === $"c").select(max($"d")).scalar()
-      ),
-      sql("select * from l where b < (select max(d) from r where a = c)")
-    )
-  }
-
-  test("correlated scalar subquery in select") {
-    checkAnswer(
-      spark.table("l").select(
+      df1.select(
         $"a",
-        spark.table("l").where($"a" === $"a".outer()).select(sum($"b")).scalar().as("sum_b")
+        df2.where($"t2.a" <=> $"t1.a".outer()).select(sum($"b")).scalar().as("sum_b")
       ),
-      sql("select a, (select sum(b) from l l2 where l2.a = l1.a) sum_b from l l1")
-    )
-  }
-
-  test("correlated scalar subquery in select (null safe)") {
-    checkAnswer(
-      spark.table("l").select(
-        $"a",
-        spark.table("l").where($"a" <=> $"a".outer()).select(sum($"b")).scalar().as("sum_b")
-      ),
-      sql("select a, (select sum(b) from l l2 where l2.a <=> l1.a) sum_b from l l1")
+      sql("select a, (select sum(b) from l t2 where t2.a <=> t1.a) sum_b from l t1")
     )
   }
 
@@ -300,10 +317,12 @@ class DataFrameSubquerySuite extends QueryTest with SharedSparkSession {
   }
 
   test("non-aggregated correlated scalar subquery") {
+    val df1 = spark.table("l").as("t1")
+    val df2 = spark.table("l").as("t2")
     val exception1 = intercept[SparkRuntimeException] {
-      spark.table("l").select(
+      df1.select(
         $"a",
-        spark.table("l").where($"a" === $"a".outer()).select($"b").scalar().as("sum_b")
+        df2.where($"t1.a" === $"t2.a".outer()).select($"b").scalar().as("sum_b")
       ).collect()
     }
     checkError(
@@ -313,12 +332,14 @@ class DataFrameSubquerySuite extends QueryTest with SharedSparkSession {
   }
 
   test("non-equal correlated scalar subquery") {
+    val df1 = spark.table("l").as("t1")
+    val df2 = spark.table("l").as("t2")
     checkAnswer(
-      spark.table("l").select(
+      df1.select(
         $"a",
-        spark.table("l").where($"a" < $"a".outer()).select(sum($"b")).scalar().as("sum_b")
+        df2.where($"t2.a" < $"t1.a".outer()).select(sum($"b")).scalar().as("sum_b")
       ),
-      sql("select a, (select sum(b) from l l2 where l2.a < l1.a) sum_b from l l1")
+      sql("select a, (select sum(b) from l t2 where t2.a < t1.a) sum_b from l t1")
     )
   }
 
@@ -346,7 +367,7 @@ class DataFrameSubquerySuite extends QueryTest with SharedSparkSession {
       spark.table("l").select(
         $"a",
         spark.table("r").where($"c" === $"a").select(sum($"d")).scalar()
-      ).collect()
+      )
     }
     checkError(
       exception1,
@@ -355,35 +376,468 @@ class DataFrameSubquerySuite extends QueryTest with SharedSparkSession {
       queryContext =
         Array(ExpectedContext(fragment = "$", callSitePattern = getCurrentClassCallSitePattern))
     )
+  }
 
-    // Extra `outer()`
-    val exception2 = intercept[AnalysisException] {
-      spark.table("l").select(
-        $"a",
-        spark.table("r").where($"c".outer() === $"a".outer()).select(sum($"d")).scalar()
-      ).collect()
+  private def table1() = {
+    sql("CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)")
+    spark.table("t1")
+  }
+
+  private def table2() = {
+    sql("CREATE VIEW t2(c1, c2) AS VALUES (0, 2), (0, 3)")
+    spark.table("t2")
+  }
+
+  private def table3() = {
+    sql("CREATE VIEW t3(c1, c2) AS " +
+      "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))")
+    spark.table("t3")
+  }
+
+  test("lateral join with single column select") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.lateralJoin(spark.range(1).select($"c1".outer())),
+        sql("SELECT * FROM t1, LATERAL (SELECT c1)")
+      )
+      checkAnswer(
+        t1.lateralJoin(t2.select($"c1")),
+        sql("SELECT * FROM t1, LATERAL (SELECT c1 FROM t2)")
+      )
+      checkAnswer(
+        t1.lateralJoin(t2.select($"t1.c1".outer())),
+        sql("SELECT * FROM t1, LATERAL (SELECT t1.c1 FROM t2)")
+      )
+      checkAnswer(
+        t1.lateralJoin(t2.select($"t1.c1".outer() + $"t2.c1")),
+        sql("SELECT * FROM t1, LATERAL (SELECT t1.c1 + t2.c1 FROM t2)")
+      )
     }
-    checkError(
-      exception2,
-      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
-      parameters = Map("objectName" -> "`c`", "proposal" -> "`a`, `b`"),
-      queryContext =
-        Array(ExpectedContext(fragment = "outer", callSitePattern = getCurrentClassCallSitePattern))
-    )
+  }
 
-    // Missing `outer()` for another outer
-    val exception3 = intercept[AnalysisException] {
-      spark.table("l").select(
-        $"a",
-        spark.table("r").where($"b" === $"a".outer()).select(sum($"d")).scalar()
-      ).collect()
+  test("lateral join with star expansion") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.lateralJoin(spark.range(1).select().select($"*")),
+        sql("SELECT * FROM t1, LATERAL (SELECT *)")
+      )
+      checkAnswer(
+        t1.lateralJoin(t2.select($"*")),
+        sql("SELECT * FROM t1, LATERAL (SELECT * FROM t2)")
+      )
+      checkAnswer(
+        t1.lateralJoin(t2.select($"t1.*".outer(), $"t2.*")),
+        sql("SELECT * FROM t1, LATERAL (SELECT t1.*, t2.* FROM t2)")
+      )
+      checkAnswer(
+        t1.lateralJoin(t2.alias("t1").select($"t1.*")),
+        sql("SELECT * FROM t1, LATERAL (SELECT t1.* FROM t2 AS t1)")
+      )
+    }
+  }
+
+  test("lateral join with different join types") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(
+        t1.lateralJoin(
+          spark.range(1).select(($"c1".outer() + $"c2".outer()).as("c3")),
+          $"c2" === $"c3"),
+        sql("SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3")
+      )
+      checkAnswer(
+        t1.lateralJoin(
+          spark.range(1).select(($"c1".outer() + $"c2".outer()).as("c3")),
+          $"c2" === $"c3",
+          "left"),
+        sql("SELECT * FROM t1 LEFT JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3")
+      )
+      checkAnswer(
+        t1.lateralJoin(
+          spark.range(1).select(($"c1".outer() + $"c2".outer()).as("c3")),
+          "cross"),
+        sql("SELECT * FROM t1 CROSS JOIN LATERAL (SELECT c1 + c2 AS c3)")
+      )
+    }
+  }
+
+  test("lateral join with subquery alias") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(
+        t1.lateralJoin(spark.range(1).select($"c1".outer(), $"c2".outer()).toDF("a", "b").as("s"))
+          .select("a", "b"),
+        sql("SELECT a, b FROM t1, LATERAL (SELECT c1, c2) s(a, b)")
+      )
+    }
+  }
+
+  test("lateral join with correlated equality / non-equality predicates") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.lateralJoin(t2.where($"t1.c1".outer() === $"t2.c1").select($"c2")),
+        sql("SELECT * FROM t1, LATERAL (SELECT c2 FROM t2 WHERE t1.c1 = t2.c1)")
+      )
+      checkAnswer(
+        t1.lateralJoin(t2.where($"t1.c1".outer() < $"t2.c1").select($"c2")),
+        sql("SELECT * FROM t1, LATERAL (SELECT c2 FROM t2 WHERE t1.c1 < t2.c1)")
+      )
+    }
+  }
+
+  test("lateral join with aggregation and correlated non-equality predicates") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.lateralJoin(t2.where($"t1.c2".outer() < $"t2.c2").select(max($"c2").as("m"))),
+        sql("SELECT * FROM t1, LATERAL (SELECT max(c2) AS m FROM t2 WHERE t1.c2 < t2.c2)")
+      )
+    }
+  }
+
+  test("lateral join can reference preceding FROM clause items") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.join(t2).lateralJoin(
+          spark.range(1).select($"t1.c2".outer() + $"t2.c2".outer())
+        ),
+        sql("SELECT * FROM t1 JOIN t2 JOIN LATERAL (SELECT t1.c2 + t2.c2)")
+      )
+    }
+  }
+
+  test("multiple lateral joins") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(
+        t1.lateralJoin(
+          spark.range(1).select(($"c1".outer() + $"c2".outer()).as("a"))
+        ).lateralJoin(
+          spark.range(1).select(($"c1".outer() - $"c2".outer()).as("b"))
+        ).lateralJoin(
+          spark.range(1).select(($"a".outer() * $"b".outer()).as("c"))
+        ),
+        sql(
+          """
+            |SELECT * FROM t1,
+            |LATERAL (SELECT c1 + c2 AS a),
+            |LATERAL (SELECT c1 - c2 AS b),
+            |LATERAL (SELECT a * b AS c)
+            |""".stripMargin)
+      )
+    }
+  }
+
+  test("lateral join in between regular joins") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.lateralJoin(
+          t2.where($"t1.c1".outer() === $"t2.c1").select($"c2").as("s"), "left"
+        ).join(t1.as("t3"), $"s.c2" === $"t3.c2", "left"),
+        sql(
+          """
+            |SELECT * FROM t1
+            |LEFT OUTER JOIN LATERAL (SELECT c2 FROM t2 WHERE t1.c1 = t2.c1) s
+            |LEFT OUTER JOIN t1 t3 ON s.c2 = t3.c2
+            |""".stripMargin)
+      )
+    }
+  }
+
+  test("nested lateral joins") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.lateralJoin(
+          t2.lateralJoin(spark.range(1).select($"c1".outer()))
+        ),
+        sql("SELECT * FROM t1, LATERAL (SELECT * FROM t2, LATERAL (SELECT c1))")
+      )
+      checkAnswer(
+        t1.lateralJoin(
+          spark.range(1).select(($"c1".outer() + lit(1)).as("c1"))
+            .lateralJoin(spark.range(1).select($"c1".outer()))
+        ),
+        sql("SELECT * FROM t1, LATERAL (SELECT * FROM (SELECT c1 + 1 AS c1), LATERAL (SELECT c1))")
+      )
+    }
+  }
+
+  test("scalar subquery inside lateral join") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      // uncorrelated
+      checkAnswer(
+        t1.lateralJoin(
+          spark.range(1).select(
+            $"c2".outer(),
+            t2.select(min($"c2")).scalar()
+          )
+        ),
+        sql("SELECT * FROM t1, LATERAL (SELECT c2, (SELECT MIN(c2) FROM t2))")
+      )
+
+      // correlated
+      checkAnswer(
+        t1.lateralJoin(
+          spark.range(1).select($"c1".outer().as("a"))
+            .select(t2.where($"c1" === $"a".outer()).select(sum($"c2")).scalar())
+        ),
+        sql(
+          """
+            |SELECT * FROM t1, LATERAL (
+            |    SELECT (SELECT SUM(c2) FROM t2 WHERE c1 = a) FROM (SELECT c1 AS a)
+            |)
+            |""".stripMargin)
+      )
+    }
+  }
+
+  test("lateral join inside subquery") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      // uncorrelated
+      checkAnswer(
+        t1.where(
+          $"c1" === t2.lateralJoin(
+            spark.range(1).select($"c1".outer().as("a"))).select(min($"a")
+          ).scalar()
+        ),
+        sql("SELECT * FROM t1 WHERE c1 = (SELECT MIN(a) FROM t2, LATERAL (SELECT c1 AS a))")
+      )
+      // correlated
+      checkAnswer(
+        t1.where(
+          $"c1" === t2.lateralJoin(
+              spark.range(1).select($"c1".outer().as("a")))
+            .where($"c1" === $"t1.c1".outer())
+            .select(min($"a"))
+            .scalar()
+        ),
+        sql("SELECT * FROM t1 " +
+          "WHERE c1 = (SELECT MIN(a) FROM t2, LATERAL (SELECT c1 AS a) WHERE c1 = t1.c1)")
+      )
+    }
+  }
+
+  test("lateral join with table-valued functions") {
+    withView("t1", "t3") {
+      val t1 = table1()
+      val t3 = table3()
+
+      checkAnswer(
+        t1.lateralJoin(spark.tvf.range(3)),
+        sql("SELECT * FROM t1, LATERAL RANGE(3)")
+      )
+      checkAnswer(
+        t1.lateralJoin(spark.tvf.explode(array($"c1".outer(), $"c2".outer()))),
+        sql("SELECT * FROM t1, LATERAL EXPLODE(ARRAY(c1, c2)) t2(c3)")
+      )
+      checkAnswer(
+        t3.lateralJoin(spark.tvf.explode_outer($"c2".outer())),
+        sql("SELECT * FROM t3, LATERAL EXPLODE_OUTER(c2) t2(v)")
+      )
+      checkAnswer(
+        spark.tvf.explode(array(lit(1), lit(2))).toDF("v")
+          .lateralJoin(spark.range(1).select($"v".outer() + 1)),
+        sql("SELECT * FROM EXPLODE(ARRAY(1, 2)) t(v), LATERAL (SELECT v + 1)")
+      )
+    }
+  }
+
+  test("lateral join with table-valued functions and join conditions") {
+    withView("t1", "t3") {
+      val t1 = table1()
+      val t3 = table3()
+
+      checkAnswer(
+        t1.lateralJoin(
+          spark.tvf.explode(array($"c1".outer(), $"c2".outer())),
+          $"c1" === $"col"
+        ),
+        sql("SELECT * FROM t1 JOIN LATERAL EXPLODE(ARRAY(c1, c2)) t(c3) ON t1.c1 = c3")
+      )
+      checkAnswer(
+        t3.lateralJoin(
+          spark.tvf.explode($"c2".outer()),
+          $"c1" === $"col"
+        ),
+        sql("SELECT * FROM t3 JOIN LATERAL EXPLODE(c2) t(c3) ON t3.c1 = c3")
+      )
+      checkAnswer(
+        t3.lateralJoin(
+          spark.tvf.explode($"c2".outer()),
+          $"c1" === $"col",
+          "left"
+        ),
+        sql("SELECT * FROM t3 LEFT JOIN LATERAL EXPLODE(c2) t(c3) ON t3.c1 = c3")
+      )
+    }
+  }
+
+  test("subquery with generator / table-valued functions") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(
+        spark.range(1).select(explode(t1.select(collect_list("c2")).scalar())),
+        sql("SELECT EXPLODE((SELECT COLLECT_LIST(c2) FROM t1))")
+      )
+      checkAnswer(
+        spark.tvf.explode(t1.select(collect_list("c2")).scalar()),
+        sql("SELECT * FROM EXPLODE((SELECT COLLECT_LIST(c2) FROM t1))")
+      )
+    }
+  }
+
+  test("subquery in join condition") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkAnswer(
+        t1.join(t2, $"t1.c1" === t1.select(max("c1")).scalar()),
+        sql("SELECT * FROM t1 JOIN t2 ON t1.c1 = (SELECT MAX(c1) FROM t1)")
+      )
+    }
+  }
+
+  test("subquery in unpivot") {
+    withView("t1", "t2") {
+      val t1 = table1()
+      val t2 = table2()
+
+      checkError(
+        intercept[AnalysisException] {
+          t1.unpivot(Array(t2.exists()), "c1", "c2").collect()
+        },
+        "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_IN_EXISTS_SUBQUERY",
+        parameters = Map("treeNode" -> "(?s)'Unpivot.*"),
+        matchPVals = true,
+        queryContext = Array(ExpectedContext(
+          fragment = "exists",
+          callSitePattern = getCurrentClassCallSitePattern))
+      )
+      checkError(
+        intercept[AnalysisException] {
+          t1.unpivot(Array($"c1"), Array(t2.exists()), "c1", "c2").collect()
+        },
+        "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_IN_EXISTS_SUBQUERY",
+        parameters = Map("treeNode" -> "(?s)Expand.*"),
+        matchPVals = true,
+        queryContext = Array(ExpectedContext(
+          fragment = "exists",
+          callSitePattern = getCurrentClassCallSitePattern))
+      )
+    }
+  }
+
+  test("subquery in transpose") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkError(
+        intercept[AnalysisException] {
+          t1.transpose(t1.select(max("c1")).scalar()).collect()
+        },
+        "TRANSPOSE_INVALID_INDEX_COLUMN",
+        parameters = Map("reason" -> "Index column must be an atomic attribute")
+      )
+    }
+  }
+
+  test("subquery in withColumns") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(
+        t1.withColumn(
+          "scalar",
+          spark
+            .range(1)
+            .select($"c1".outer() + $"c2".outer())
+            .scalar()),
+        t1.select($"*", ($"c1" + $"c2").as("scalar")))
+
+      checkAnswer(
+        t1.withColumn(
+          "scalar",
+          spark
+            .range(1)
+            .withColumn("c1", $"c1".outer())
+            .select($"c1" + $"c2".outer())
+            .scalar()),
+        t1.select($"*", ($"c1" + $"c2").as("scalar")))
+
+      checkAnswer(
+        t1.withColumn(
+          "scalar",
+          spark
+            .range(1)
+            .select($"c1".outer().as("c1"))
+            .withColumn("c2", $"c2".outer())
+            .select($"c1" + $"c2")
+            .scalar()),
+        t1.select($"*", ($"c1" + $"c2").as("scalar")))
+    }
+  }
+
+  test("subquery in withColumnsRenamed") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(
+        t1.withColumn(
+          "scalar",
+          spark
+            .range(1)
+            .select($"c1".outer().as("c1"), $"c2".outer().as("c2"))
+            .withColumnsRenamed(Map("c1" -> "x", "c2" -> "y"))
+            .select($"x" + $"y")
+            .scalar()),
+        t1.select($"*", ($"c1".as("x") + $"c2".as("y")).as("scalar")))
+    }
+  }
+
+  test("subquery in drop") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(t1.drop(spark.range(1).select(lit("c1")).scalar()), t1)
+    }
+  }
+
+  test("subquery in repartition") {
+    withView("t1") {
+      val t1 = table1()
+
+      checkAnswer(t1.repartition(spark.range(1).select(lit(1)).scalar()), t1)
     }
-    checkError(
-      exception3,
-      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
-      parameters = Map("objectName" -> "`b`", "proposal" -> "`c`, `d`"),
-      queryContext =
-        Array(ExpectedContext(fragment = "$", callSitePattern = getCurrentClassCallSitePattern))
-    )
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index ff251ddbbfb52..71d55b007aa17 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -43,7 +43,6 @@ import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec, ShuffleExchangeLike}
 import org.apache.spark.sql.expressions.{Aggregator, Window}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession}
 import org.apache.spark.sql.test.SQLTestData.{ArrayStringWrapper, ContainerStringWrapper, StringWrapper, TestData2}
@@ -309,6 +308,69 @@ class DataFrameSuite extends QueryTest
       testData.select("key").collect().toSeq)
   }
 
+  test("SPARK-50503 - cannot partition by variant columns") {
+    val df = sql("select parse_json(case when id = 0 then 'null' else '1' end)" +
+      " as v, id % 5 as id, named_struct('v', parse_json(id::string)) s from range(0, 100, 1, 5)")
+    // variant column
+    checkError(
+      exception = intercept[AnalysisException](df.repartition(5, col("v"))),
+      condition = "UNSUPPORTED_FEATURE.PARTITION_BY_VARIANT",
+      parameters = Map(
+        "expr" -> "\"v\"",
+        "dataType" -> "\"VARIANT\"")
+    )
+    // nested variant column
+    checkError(
+      exception = intercept[AnalysisException](df.repartition(5, col("s"))),
+      condition = "UNSUPPORTED_FEATURE.PARTITION_BY_VARIANT",
+      parameters = Map(
+        "expr" -> "\"s\"",
+        "dataType" -> "\"STRUCT<v: VARIANT NOT NULL>\"")
+    )
+    // variant producing expression
+    checkError(
+      exception =
+        intercept[AnalysisException](df.repartition(5, parse_json(col("id").cast("string")))),
+      condition = "UNSUPPORTED_FEATURE.PARTITION_BY_VARIANT",
+      parameters = Map(
+        "expr" -> "\"parse_json(CAST(id AS STRING))\"",
+        "dataType" -> "\"VARIANT\"")
+    )
+    // Partitioning by non-variant column works
+    try {
+      df.repartition(5, col("id")).collect()
+    } catch {
+      case e: Exception =>
+        fail(s"Expected no exception to be thrown but an exception was thrown: ${e.getMessage}")
+    }
+    // SQL
+    withTempView("tv") {
+      df.createOrReplaceTempView("tv")
+      checkError(
+        exception = intercept[AnalysisException](sql("SELECT * FROM tv DISTRIBUTE BY v")),
+        condition = "UNSUPPORTED_FEATURE.PARTITION_BY_VARIANT",
+        parameters = Map(
+          "expr" -> "\"v\"",
+          "dataType" -> "\"VARIANT\""),
+        context = ExpectedContext(
+          fragment = "DISTRIBUTE BY v",
+          start = 17,
+          stop = 31)
+      )
+      checkError(
+        exception = intercept[AnalysisException](sql("SELECT * FROM tv DISTRIBUTE BY s")),
+        condition = "UNSUPPORTED_FEATURE.PARTITION_BY_VARIANT",
+        parameters = Map(
+          "expr" -> "\"s\"",
+          "dataType" -> "\"STRUCT<v: VARIANT NOT NULL>\""),
+        context = ExpectedContext(
+          fragment = "DISTRIBUTE BY s",
+          start = 17,
+          stop = 31)
+      )
+    }
+  }
+
   test("repartition with SortOrder") {
     // passing SortOrder expressions to .repartition() should result in an informative error
 
@@ -366,6 +428,35 @@ class DataFrameSuite extends QueryTest
     }
   }
 
+  test("repartition by MapType") {
+    Seq("int", "long", "float", "double", "decimal(10, 2)", "string", "varchar(6)").foreach { dt =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+        val df = spark.range(20)
+          .withColumn("c1",
+            when(col("id") % 3 === 1, typedLit(Map(1 -> 1)))
+              .when(col("id") % 3 === 2, typedLit(Map(1 -> 1, 2 -> 2)))
+              .otherwise(typedLit(Map(2 -> 2, 1 -> 1))).cast(s"map<$dt, $dt>"))
+          .withColumn("c2", typedLit(Map(1 -> null)).cast(s"map<$dt, $dt>"))
+          .withColumn("c3", lit(null).cast(s"map<$dt, $dt>"))
+
+        assertPartitionNumber(df.repartition(4, col("c1")), 2)
+        assertPartitionNumber(df.repartition(4, col("c2")), 1)
+        assertPartitionNumber(df.repartition(4, col("c3")), 1)
+        assertPartitionNumber(df.repartition(4, col("c1"), col("c2")), 2)
+        assertPartitionNumber(df.repartition(4, col("c1"), col("c3")), 2)
+        assertPartitionNumber(df.repartition(4, col("c1"), col("c2"), col("c3")), 2)
+        assertPartitionNumber(df.repartition(4, col("c2"), col("c3")), 2)
+      }
+    }
+  }
+
+  private def assertPartitionNumber(df: => DataFrame, max: Int): Unit = {
+    val dfGrouped = df.groupBy(spark_partition_id()).count()
+    // Result number of partition can be lower or equal to max,
+    // but no more than that.
+    assert(dfGrouped.count() <= max, dfGrouped.queryExecution.simpleString)
+  }
+
   test("coalesce") {
     intercept[IllegalArgumentException] {
       testData.select("key").coalesce(0)
@@ -1567,7 +1658,7 @@ class DataFrameSuite extends QueryTest
   test("SPARK-46794: exclude subqueries from LogicalRDD constraints") {
     withTempDir { checkpointDir =>
       val subquery =
-        column(ScalarSubquery(spark.range(10).selectExpr("max(id)").logicalPlan))
+        Column(ScalarSubquery(spark.range(10).selectExpr("max(id)").logicalPlan))
       val df = spark.range(1000).filter($"id" === subquery)
       assert(df.logicalPlan.constraints.exists(_.exists(_.isInstanceOf[ScalarSubquery])))
 
@@ -2054,18 +2145,18 @@ class DataFrameSuite extends QueryTest
     // the number of keys must match
     val exception1 = intercept[IllegalArgumentException] {
       df1.groupBy($"key1", $"key2").flatMapCoGroupsInPandas(
-        df2.groupBy($"key2"), flatMapCoGroupsInPandasUDF)
+        df2.groupBy($"key2"), Column(flatMapCoGroupsInPandasUDF))
     }
     assert(exception1.getMessage.contains("Cogroup keys must have same size: 2 != 1"))
     val exception2 = intercept[IllegalArgumentException] {
       df1.groupBy($"key1").flatMapCoGroupsInPandas(
-        df2.groupBy($"key1", $"key2"), flatMapCoGroupsInPandasUDF)
+        df2.groupBy($"key1", $"key2"), Column(flatMapCoGroupsInPandasUDF))
     }
     assert(exception2.getMessage.contains("Cogroup keys must have same size: 1 != 2"))
 
     // but different keys are allowed
     val actual = df1.groupBy($"key1").flatMapCoGroupsInPandas(
-      df2.groupBy($"key2"), flatMapCoGroupsInPandasUDF)
+      df2.groupBy($"key2"), Column(flatMapCoGroupsInPandasUDF))
     // can't evaluate the DataFrame as there is no PythonFunction given
     assert(actual != null)
   }
@@ -2419,7 +2510,7 @@ class DataFrameSuite extends QueryTest
         |  SELECT a, b FROM (SELECT a, b FROM VALUES (1, 2) AS t(a, b))
         |)
         |""".stripMargin)
-    val stringCols = df.logicalPlan.output.map(column(_).cast(StringType))
+    val stringCols = df.logicalPlan.output.map(Column(_).cast(StringType))
     val castedDf = df.select(stringCols: _*)
     checkAnswer(castedDf, Row("1", "1") :: Row("1", "2") :: Nil)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala
index c2f53ff56d1aa..637e0cf964fe5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
 
 class DataFrameTableValuedFunctionsSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
 
   test("explode") {
     val actual1 = spark.tvf.explode(array(lit(1), lit(2)))
@@ -50,6 +51,30 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with SharedSparkSessi
     checkAnswer(actual6, expected6)
   }
 
+  test("explode - lateral join") {
+    withView("t1", "t3") {
+      sql("CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)")
+      sql("CREATE VIEW t3(c1, c2) AS " +
+        "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))")
+      val t1 = spark.table("t1")
+      val t3 = spark.table("t3")
+
+      checkAnswer(
+        t1.lateralJoin(spark.tvf.explode(array($"c1".outer(), $"c2".outer())).toDF("c3").as("t2")),
+        sql("SELECT * FROM t1, LATERAL EXPLODE(ARRAY(c1, c2)) t2(c3)")
+      )
+      checkAnswer(
+        t3.lateralJoin(spark.tvf.explode($"c2".outer()).toDF("v").as("t2")),
+        sql("SELECT * FROM t3, LATERAL EXPLODE(c2) t2(v)")
+      )
+      checkAnswer(
+        spark.tvf.explode(array(lit(1), lit(2))).toDF("v")
+          .lateralJoin(spark.range(1).select($"v".outer() + lit(1))),
+        sql("SELECT * FROM EXPLODE(ARRAY(1, 2)) t(v), LATERAL (SELECT v + 1)")
+      )
+    }
+  }
+
   test("explode_outer") {
     val actual1 = spark.tvf.explode_outer(array(lit(1), lit(2)))
     val expected1 = spark.sql("SELECT * FROM explode_outer(array(1, 2))")
@@ -78,6 +103,31 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with SharedSparkSessi
     checkAnswer(actual6, expected6)
   }
 
+  test("explode_outer - lateral join") {
+    withView("t1", "t3") {
+      sql("CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)")
+      sql("CREATE VIEW t3(c1, c2) AS " +
+        "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))")
+      val t1 = spark.table("t1")
+      val t3 = spark.table("t3")
+
+      checkAnswer(
+        t1.lateralJoin(
+          spark.tvf.explode_outer(array($"c1".outer(), $"c2".outer())).toDF("c3").as("t2")),
+        sql("SELECT * FROM t1, LATERAL EXPLODE_OUTER(ARRAY(c1, c2)) t2(c3)")
+      )
+      checkAnswer(
+        t3.lateralJoin(spark.tvf.explode_outer($"c2".outer()).toDF("v").as("t2")),
+        sql("SELECT * FROM t3, LATERAL EXPLODE_OUTER(c2) t2(v)")
+      )
+      checkAnswer(
+        spark.tvf.explode_outer(array(lit(1), lit(2))).toDF("v")
+          .lateralJoin(spark.range(1).select($"v".outer() + lit(1))),
+        sql("SELECT * FROM EXPLODE_OUTER(ARRAY(1, 2)) t(v), LATERAL (SELECT v + 1)")
+      )
+    }
+  }
+
   test("inline") {
     val actual1 = spark.tvf.inline(array(struct(lit(1), lit("a")), struct(lit(2), lit("b"))))
     val expected1 = spark.sql("SELECT * FROM inline(array(struct(1, 'a'), struct(2, 'b')))")
@@ -98,6 +148,32 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with SharedSparkSessi
     checkAnswer(actual3, expected3)
   }
 
+  test("inline - lateral join") {
+    withView("array_struct") {
+      sql(
+        """
+          |CREATE VIEW array_struct(id, arr) AS VALUES
+          |    (1, ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b'))),
+          |    (2, ARRAY()),
+          |    (3, ARRAY(STRUCT(3, 'c')))
+          |""".stripMargin)
+      val arrayStruct = spark.table("array_struct")
+
+      checkAnswer(
+        arrayStruct.lateralJoin(spark.tvf.inline($"arr".outer())),
+        sql("SELECT * FROM array_struct JOIN LATERAL INLINE(arr)")
+      )
+      checkAnswer(
+        arrayStruct.lateralJoin(
+          spark.tvf.inline($"arr".outer()).toDF("k", "v").as("t"),
+          $"id" === $"k",
+          "left"
+        ),
+        sql("SELECT * FROM array_struct LEFT JOIN LATERAL INLINE(arr) t(k, v) ON id = k")
+      )
+    }
+  }
+
   test("inline_outer") {
     val actual1 = spark.tvf.inline_outer(array(struct(lit(1), lit("a")), struct(lit(2), lit("b"))))
     val expected1 = spark.sql("SELECT * FROM inline_outer(array(struct(1, 'a'), struct(2, 'b')))")
@@ -118,6 +194,32 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with SharedSparkSessi
     checkAnswer(actual3, expected3)
   }
 
+  test("inline_outer - lateral join") {
+    withView("array_struct") {
+      sql(
+        """
+          |CREATE VIEW array_struct(id, arr) AS VALUES
+          |    (1, ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b'))),
+          |    (2, ARRAY()),
+          |    (3, ARRAY(STRUCT(3, 'c')))
+          |""".stripMargin)
+      val arrayStruct = spark.table("array_struct")
+
+      checkAnswer(
+        arrayStruct.lateralJoin(spark.tvf.inline_outer($"arr".outer())),
+        sql("SELECT * FROM array_struct JOIN LATERAL INLINE_OUTER(arr)")
+      )
+      checkAnswer(
+        arrayStruct.lateralJoin(
+          spark.tvf.inline_outer($"arr".outer()).toDF("k", "v").as("t"),
+          $"id" === $"k",
+          "left"
+        ),
+        sql("SELECT * FROM array_struct LEFT JOIN LATERAL INLINE_OUTER(arr) t(k, v) ON id = k")
+      )
+    }
+  }
+
   test("json_tuple") {
     val actual = spark.tvf.json_tuple(lit("""{"a":1,"b":2}"""), lit("a"), lit("b"))
     val expected = spark.sql("""SELECT * FROM json_tuple('{"a":1,"b":2}', 'a', 'b')""")
@@ -130,6 +232,43 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with SharedSparkSessi
     assert(ex.messageParameters("functionName") == "`json_tuple`")
   }
 
+  test("json_tuple - lateral join") {
+    withView("json_table") {
+      sql(
+        """
+          |CREATE OR REPLACE TEMP VIEW json_table(key, jstring) AS VALUES
+          |    ('1', '{"f1": "1", "f2": "2", "f3": 3, "f5": 5.23}'),
+          |    ('2', '{"f1": "1", "f3": "3", "f2": 2, "f4": 4.01}'),
+          |    ('3', '{"f1": 3, "f4": "4", "f3": "3", "f2": 2, "f5": 5.01}'),
+          |    ('4', cast(null as string)),
+          |    ('5', '{"f1": null, "f5": ""}'),
+          |    ('6', '[invalid JSON string]')
+          |""".stripMargin)
+      val jsonTable = spark.table("json_table")
+
+      checkAnswer(
+        jsonTable.as("t1").lateralJoin(
+          spark.tvf.json_tuple(
+            $"t1.jstring".outer(),
+            lit("f1"), lit("f2"), lit("f3"), lit("f4"), lit("f5")).as("t2")
+        ).select($"t1.key", $"t2.*"),
+        sql("SELECT t1.key, t2.* FROM json_table t1, " +
+          "LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2")
+      )
+      checkAnswer(
+        jsonTable.as("t1").lateralJoin(
+          spark.tvf.json_tuple(
+            $"jstring".outer(),
+            lit("f1"), lit("f2"), lit("f3"), lit("f4"), lit("f5")).as("t2")
+        ).where($"t2.c0".isNotNull)
+          .select($"t1.key", $"t2.*"),
+        sql("SELECT t1.key, t2.* FROM json_table t1, " +
+          "LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2 " +
+          "WHERE t2.c0 IS NOT NULL")
+      )
+    }
+  }
+
   test("posexplode") {
     val actual1 = spark.tvf.posexplode(array(lit(1), lit(2)))
     val expected1 = spark.sql("SELECT * FROM posexplode(array(1, 2))")
@@ -158,6 +297,30 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with SharedSparkSessi
     checkAnswer(actual6, expected6)
   }
 
+  test("posexplode - lateral join") {
+    withView("t1", "t3") {
+      sql("CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)")
+      sql("CREATE VIEW t3(c1, c2) AS " +
+        "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))")
+      val t1 = spark.table("t1")
+      val t3 = spark.table("t3")
+
+      checkAnswer(
+        t1.lateralJoin(spark.tvf.posexplode(array($"c1".outer(), $"c2".outer()))),
+        sql("SELECT * FROM t1, LATERAL POSEXPLODE(ARRAY(c1, c2))")
+      )
+      checkAnswer(
+        t3.lateralJoin(spark.tvf.posexplode($"c2".outer())),
+        sql("SELECT * FROM t3, LATERAL POSEXPLODE(c2)")
+      )
+      checkAnswer(
+        spark.tvf.posexplode(array(lit(1), lit(2))).toDF("p", "v")
+          .lateralJoin(spark.range(1).select($"v".outer() + lit(1))),
+        sql("SELECT * FROM POSEXPLODE(ARRAY(1, 2)) t(p, v), LATERAL (SELECT v + 1)")
+      )
+    }
+  }
+
   test("posexplode_outer") {
     val actual1 = spark.tvf.posexplode_outer(array(lit(1), lit(2)))
     val expected1 = spark.sql("SELECT * FROM posexplode_outer(array(1, 2))")
@@ -186,12 +349,66 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with SharedSparkSessi
     checkAnswer(actual6, expected6)
   }
 
+  test("posexplode_outer - lateral join") {
+    withView("t1", "t3") {
+      sql("CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)")
+      sql("CREATE VIEW t3(c1, c2) AS " +
+        "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))")
+      val t1 = spark.table("t1")
+      val t3 = spark.table("t3")
+
+      checkAnswer(
+        t1.lateralJoin(spark.tvf.posexplode_outer(array($"c1".outer(), $"c2".outer()))),
+        sql("SELECT * FROM t1, LATERAL POSEXPLODE_OUTER(ARRAY(c1, c2))")
+      )
+      checkAnswer(
+        t3.lateralJoin(spark.tvf.posexplode_outer($"c2".outer())),
+        sql("SELECT * FROM t3, LATERAL POSEXPLODE_OUTER(c2)")
+      )
+      checkAnswer(
+        spark.tvf.posexplode_outer(array(lit(1), lit(2))).toDF("p", "v")
+          .lateralJoin(spark.range(1).select($"v".outer() + lit(1))),
+        sql("SELECT * FROM POSEXPLODE_OUTER(ARRAY(1, 2)) t(p, v), LATERAL (SELECT v + 1)")
+      )
+    }
+  }
+
   test("stack") {
     val actual = spark.tvf.stack(lit(2), lit(1), lit(2), lit(3))
     val expected = spark.sql("SELECT * FROM stack(2, 1, 2, 3)")
     checkAnswer(actual, expected)
   }
 
+  test("stack - lateral join") {
+    withView("t1", "t3") {
+      sql("CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)")
+      sql("CREATE VIEW t3(c1, c2) AS " +
+        "VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))")
+      val t1 = spark.table("t1")
+      val t3 = spark.table("t3")
+
+      checkAnswer(
+        t1.lateralJoin(
+          spark.tvf.stack(lit(2), lit("Key"), $"c1".outer(), lit("Value"), $"c2".outer()).as("t")
+        ).select($"t.*"),
+        sql("SELECT t.* FROM t1, LATERAL stack(2, 'Key', c1, 'Value', c2) t")
+      )
+      checkAnswer(
+        t1.lateralJoin(
+          spark.tvf.stack(lit(1), $"c1".outer(), $"c2".outer()).toDF("x", "y").as("t")
+        ).select($"t.*"),
+        sql("SELECT t.* FROM t1 JOIN LATERAL stack(1, c1, c2) t(x, y)")
+      )
+      checkAnswer(
+        t1.join(t3, $"t1.c1" === $"t3.c1")
+          .lateralJoin(
+            spark.tvf.stack(lit(1), $"t1.c2".outer(), $"t3.c2".outer()).as("t")
+          ).select($"t.*"),
+        sql("SELECT t.* FROM t1 JOIN t3 ON t1.c1 = t3.c1 JOIN LATERAL stack(1, t1.c2, t3.c2) t")
+      )
+    }
+  }
+
   test("collations") {
     val actual = spark.tvf.collations()
     val expected = spark.sql("SELECT * FROM collations()")
@@ -235,6 +452,28 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with SharedSparkSessi
     checkAnswer(actual6, expected6)
   }
 
+  test("variant_explode - lateral join") {
+    withView("variant_table") {
+      sql(
+        """
+          |CREATE VIEW variant_table(id, v) AS
+          |SELECT id, parse_json(v) AS v FROM VALUES
+          |(0, '["hello", "world"]'), (1, '{"a": true, "b": 3.14}'),
+          |(2, '[]'), (3, '{}'),
+          |(4, NULL), (5, '1')
+          |AS t(id, v)
+          |""".stripMargin)
+      val variantTable = spark.table("variant_table")
+
+      checkAnswer(
+        variantTable.as("t1").lateralJoin(
+          spark.tvf.variant_explode($"v".outer()).as("t")
+        ).select($"t1.id", $"t.*"),
+        sql("SELECT t1.id, t.* FROM variant_table AS t1, LATERAL variant_explode(v) AS t")
+      )
+    }
+  }
+
   test("variant_explode_outer") {
     val actual1 = spark.tvf.variant_explode_outer(parse_json(lit("""["hello", "world"]""")))
     val expected1 = spark.sql(
@@ -265,4 +504,26 @@ class DataFrameTableValuedFunctionsSuite extends QueryTest with SharedSparkSessi
     val expected6 = spark.sql("SELECT * FROM variant_explode_outer(parse_json('1'))")
     checkAnswer(actual6, expected6)
   }
+
+  test("variant_explode_outer - lateral join") {
+    withView("variant_table") {
+      sql(
+        """
+          |CREATE VIEW variant_table(id, v) AS
+          |SELECT id, parse_json(v) AS v FROM VALUES
+          |(0, '["hello", "world"]'), (1, '{"a": true, "b": 3.14}'),
+          |(2, '[]'), (3, '{}'),
+          |(4, NULL), (5, '1')
+          |AS t(id, v)
+          |""".stripMargin)
+      val variantTable = spark.table("variant_table")
+
+      checkAnswer(
+        variantTable.as("t1").lateralJoin(
+          spark.tvf.variant_explode_outer($"v".outer()).as("t")
+        ).select($"t1.id", $"t.*"),
+        sql("SELECT t1.id, t.* FROM variant_table AS t1, LATERAL variant_explode_outer(v) AS t")
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTransposeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTransposeSuite.scala
index 51de8553216c6..ce1c8d7ceb64a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTransposeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTransposeSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -173,4 +174,20 @@ class DataFrameTransposeSuite extends QueryTest with SharedSparkSession {
     )
     assertResult(Array("key", "A", "B"))(transposedDf.columns)
   }
+
+  test("SPARK-50602: invalid index columns") {
+    val df = Seq(
+      ("A", 1, 2),
+      ("B", 3, 4),
+      (null, 5, 6)
+    ).toDF("id", "val1", "val2")
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.transpose($"id" + lit(1))
+      },
+      condition = "TRANSPOSE_INVALID_INDEX_COLUMN",
+      parameters = Map("reason" -> "Index column must be an atomic attribute")
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 8a86aa10887c0..01e72daead440 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -29,7 +29,6 @@ import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, Exchange, S
 import org.apache.spark.sql.execution.window.WindowExec
 import org.apache.spark.sql.expressions.{Aggregator, MutableAggregationBuffer, UserDefinedAggregateFunction, Window}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -862,7 +861,7 @@ class DataFrameWindowFunctionsSuite extends QueryTest
           lead($"value", 2, null, true).over(window),
           lead($"value", 3, null, true).over(window),
           lead(concat($"value", $"key"), 1, null, true).over(window),
-          column(Lag($"value".expr, NonFoldableLiteral(1), Literal(null), true)).over(window),
+          Column(Lag($"value".expr, NonFoldableLiteral(1), Literal(null), true)).over(window),
           lag($"value", 2).over(window),
           lag($"value", 0, null, true).over(window),
           lag($"value", 1, null, true).over(window),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
index bda8c7f26082f..9d8aaf8d90e32 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableS
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.Metadata
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.tags.SlowSQLTest
 
@@ -312,4 +313,19 @@ class DatasetCacheSuite extends QueryTest
       }
     }
   }
+
+  test("SPARK-50682: inner Alias should be canonicalized") {
+    // Put a metadata in the Alias so that it won't be removed by the analyzer.
+    val metadata = Metadata.fromJson("""{"k": "v"}""")
+    val df1 = spark.range(5).select(struct($"id".as("name", metadata)))
+    df1.cache()
+    // This is exactly the same as df1.
+    val df2 = spark.range(5).select(struct($"id".as("name", metadata)))
+    assert(df2.queryExecution.executedPlan.exists(_.isInstanceOf[InMemoryTableScanExec]))
+
+    val metadata2 = Metadata.fromJson("""{"k2": "v2"}""")
+    // Same with df1 except for the Alias metadata
+    val df3 = spark.range(5).select(struct($"id".as("name", metadata2)))
+    assert(!df3.queryExecution.executedPlan.exists(_.isInstanceOf[InMemoryTableScanExec]))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
index 81d7de856f881..5db3990b67c8b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala
@@ -184,16 +184,18 @@ class DatasetOptimizationSuite extends QueryTest with SharedSparkSession {
       assert(count3 == count2)
     }
 
-    withClue("array type") {
-      checkCodegenCache(() => Seq(Seq("abc")).toDS())
-    }
+    withSQLConf(SQLConf.ARTIFACTS_SESSION_ISOLATION_ALWAYS_APPLY_CLASSLOADER.key -> "true") {
+      withClue("array type") {
+        checkCodegenCache(() => Seq(Seq("abc")).toDS())
+      }
 
-    withClue("map type") {
-      checkCodegenCache(() => Seq(Map("abc" -> 1)).toDS())
-    }
+      withClue("map type") {
+        checkCodegenCache(() => Seq(Map("abc" -> 1)).toDS())
+      }
 
-    withClue("array of map") {
-      checkCodegenCache(() => Seq(Seq(Map("abc" -> 1))).toDS())
+      withClue("array of map") {
+        checkCodegenCache(() => Seq(Seq(Map("abc" -> 1))).toDS())
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
index 8c0231fddf39f..0468ceb9f967c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
@@ -118,7 +118,8 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
         // SET spark.sql.parser.escapedStringLiterals=true
         example.split("  > ").tail.filterNot(_.trim.startsWith("SET")).take(1).foreach {
           case _ if funcName == "from_avro" || funcName == "to_avro" ||
-            funcName == "from_protobuf" || funcName == "to_protobuf" =>
+            funcName == "schema_of_avro" || funcName == "from_protobuf" ||
+            funcName == "to_protobuf" =>
               // Skip running the example queries for the from_avro, to_avro, from_protobuf and
               // to_protobuf functions because these functions dynamically load the
               // AvroDataToCatalyst or CatalystDataToAvro classes which are not available in this
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
index cdea4446d9461..22f55819d1d4c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
@@ -31,10 +31,11 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, ExprId, PythonUDF}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.execution.datasources.v2.python.UserDefinedPythonDataSource
 import org.apache.spark.sql.execution.python.{UserDefinedPythonFunction, UserDefinedPythonTableFunction}
 import org.apache.spark.sql.expressions.SparkUserDefinedFunction
-import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
+import org.apache.spark.sql.internal.ExpressionUtils.expression
 import org.apache.spark.sql.internal.UserDefinedFunctionUtils.toScalaUDF
 import org.apache.spark.sql.types.{DataType, IntegerType, NullType, StringType, StructType, VariantType}
 import org.apache.spark.util.ArrayImplicits._
@@ -1592,7 +1593,7 @@ object IntegratedUDFTestUtils extends SQLHelper {
       Cast(toScalaUDF(udf, Cast(expr, StringType) :: Nil), rt)
     }
 
-    def apply(exprs: Column*): Column = builder(exprs.map(expression))
+    def apply(exprs: Column*): Column = Column(builder(exprs.map(expression)))
 
     val prettyName: String = "Scala UDF"
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 84408d8e2495d..ea185b6b4901a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -29,7 +29,6 @@ import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -1394,7 +1393,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     val df = Seq(1).toDF("a")
     val schema = StructType(StructField("b", ObjectType(classOf[java.lang.Integer])) :: Nil)
     val row = InternalRow.fromSeq(Seq(Integer.valueOf(1)))
-    val structData = column(Literal.create(row, schema))
+    val structData = Column(Literal.create(row, schema))
     checkError(
       exception = intercept[AnalysisException] {
         df.select($"a").withColumn("c", to_json(structData)).collect()
@@ -1456,4 +1455,28 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     assert(plan.isInstanceOf[WholeStageCodegenExec])
     checkAnswer(df, Row(null))
   }
+
+  test("function json_tuple - field names foldable") {
+    withTempView("t") {
+      val json = """{"a":1, "b":2, "c":3}"""
+      val df = Seq((json, "a", "b", "c")).toDF("json", "c1", "c2", "c3")
+      df.createOrReplaceTempView("t")
+
+      // Json and all field names are foldable.
+      val df1 = sql(s"SELECT json_tuple('$json', 'a', 'b', 'c') from t")
+      checkAnswer(df1, Row("1", "2", "3"))
+
+      // All field names are foldable.
+      val df2 = sql("SELECT json_tuple(json, 'a', 'b', 'c') from t")
+      checkAnswer(df2, Row("1", "2", "3"))
+
+      // The field names some foldable, some non-foldable.
+      val df3 = sql("SELECT json_tuple(json, 'a', c2, 'c') from t")
+      checkAnswer(df3, Row("1", "2", "3"))
+
+      // All field names are non-foldable.
+      val df4 = sql("SELECT json_tuple(json, c1, c2, c3) from t")
+      checkAnswer(df4, Row("1", "2", "3"))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/LateralColumnAliasSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/LateralColumnAliasSuite.scala
index d7177e19a6177..3def42cd7ee55 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/LateralColumnAliasSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/LateralColumnAliasSuite.scala
@@ -1365,4 +1365,41 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
     // the states are cleared - a subsequent correct query should succeed
     sql("select 1 as a, a").queryExecution.assertAnalyzed()
   }
+
+  test("SPARK-49349: Improve error message for LCA with Generate") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          s"""
+            |SELECT
+            |  explode(split(name , ',')) AS new_name,
+            |  new_name like 'a%'
+            |FROM $testTable
+            |""".stripMargin)
+      },
+      condition = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GENERATOR",
+      sqlState = "0A000",
+      parameters = Map(
+        "lca" -> "`new_name`",
+        "generatorExpr" -> "\"unresolvedalias(lateralAliasReference(new_name) LIKE a%)\""))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          s"""
+             |SELECT
+             |  explode_outer(from_json(name,'array<struct<values:string>>')) as newName,
+             |  size(from_json(newName.values,'array<string>')) +
+             |    size(array(from_json(newName.values,'map<string,string>'))) as size
+             |FROM $testTable
+             |""".stripMargin)
+      },
+      condition = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GENERATOR",
+      sqlState = "0A000",
+      parameters = Map(
+        "lca" -> "`newName.values`",
+        "generatorExpr" -> ("\"(size(from_json(lateralAliasReference(newName.values), " +
+          "array<string>)) + size(array(from_json(lateralAliasReference(newName.values), " +
+          "map<string,string>)))) AS size\"")))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala
index 873337e7a4242..861b0bf0f3945 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala
@@ -33,12 +33,18 @@ class LogQuerySuite extends QueryTest with SharedSparkSession with Logging {
     new File(pwd + "/target/LogQuerySuite.log")
   }
 
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    Logging.enableStructuredLogging()
+  }
+
   override def afterAll(): Unit = {
     super.afterAll()
     // Clear the log file
     if (logFile.exists()) {
       logFile.delete()
     }
+    Logging.disableStructuredLogging()
   }
 
   private def createTempView(viewName: String): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
index 791bcc91d5094..bb1363f1c58c0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
@@ -758,4 +758,47 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       checkAnswer(spark.sql(query("?"), args = Array("tt1")), Row(1))
     }
   }
+
+  test("SPARK-50441: parameterized identifier referencing a CTE") {
+    def query(p: String): String = {
+      s"""
+         |WITH t1 AS (SELECT 1)
+         |SELECT * FROM IDENTIFIER($p)""".stripMargin
+    }
+
+    checkAnswer(spark.sql(query(":cte"), args = Map("cte" -> "t1")), Row(1))
+    checkAnswer(spark.sql(query("?"), args = Array("t1")), Row(1))
+  }
+
+  test("SPARK-50403: parameterized execute immediate") {
+    checkAnswer(spark.sql("execute immediate 'select ?' using ?", Array(1)), Row(1))
+    checkAnswer(spark.sql("execute immediate 'select ?, ?' using ?, 2", Array(1)), Row(1, 2))
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.sql("execute immediate 'select ?, ?' using 1", Array(2))
+      },
+      condition = "UNBOUND_SQL_PARAMETER",
+      parameters = Map("name" -> "_10"),
+      context = ExpectedContext("?", 10, 10))
+
+    checkAnswer(spark.sql("execute immediate 'select ?' using 1", Map("param1" -> "1")), Row(1))
+    checkAnswer(spark.sql("execute immediate 'select :param1' using :param2 as param1",
+      Map("param2" -> 42)), Row(42))
+    checkAnswer(spark.sql(
+      "execute immediate 'select :param1, :param2' using :param2 as param1, 43 as param2",
+      Map("param2" -> 42)), Row(42, 43))
+    checkAnswer(spark.sql("execute immediate 'select :param' using 0 as param",
+      Map("param" -> 42)), Row(0))
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.sql("execute immediate 'select :param1, :param2' using 1 as param1",
+          Map("param2" -> 2))
+      },
+      condition = "UNBOUND_SQL_PARAMETER",
+      parameters = Map("name" -> "param2"),
+      context = ExpectedContext(":param2", 16, 22))
+
+    checkAnswer(spark.sql("execute immediate 'select ?' using :param", Map("param" -> 2)), Row(2))
+    checkAnswer(spark.sql("execute immediate 'select :param' using ? as param", Array(3)), Row(3))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index 30180d48da71a..b59c83c23d3c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -27,7 +27,7 @@ import org.scalatest.Assertions
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.execution.{QueryExecution, SparkPlan, SQLExecution}
+import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.util.QueryExecutionListener
 import org.apache.spark.storage.StorageLevel
@@ -449,12 +449,12 @@ object QueryTest extends Assertions {
     }
   }
 
-  def withPhysicalPlansCaptured(spark: SparkSession, thunk: => Unit): Seq[SparkPlan] = {
-    var capturedPlans = Seq.empty[SparkPlan]
+  def withQueryExecutionsCaptured(spark: SparkSession)(thunk: => Unit): Seq[QueryExecution] = {
+    var capturedQueryExecutions = Seq.empty[QueryExecution]
 
     val listener = new QueryExecutionListener {
       override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
-        capturedPlans = capturedPlans :+ qe.executedPlan
+        capturedQueryExecutions = capturedQueryExecutions :+ qe
       }
       override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {}
     }
@@ -468,7 +468,7 @@ object QueryTest extends Assertions {
       spark.listenerManager.unregister(listener)
     }
 
-    capturedPlans
+    capturedQueryExecutions
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
index c80787c40c487..ce3ac9b8834bf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
@@ -108,4 +108,26 @@ class RuntimeConfigSuite extends SparkFunSuite {
     // this set should not fail
     conf.set(DEFAULT_PARALLELISM.key, "1")
   }
+
+  test("config entry") {
+    val conf = newConf()
+
+    val entry = SQLConf.FILES_MAX_PARTITION_NUM
+    assert(conf.get(entry.key) === null)
+    assert(conf.get(entry).isEmpty)
+    assert(conf.get(entry, Option(55)) === Option(55))
+    conf.set(entry, Option(33))
+    assert(conf.get(entry.key) === "33")
+    assert(conf.get(entry) === Option(33))
+    assert(conf.get(entry, Option(55)) === Option(33))
+
+    val entryWithDefault = SQLConf.RUNTIME_FILTER_NUMBER_THRESHOLD
+    assert(conf.get(entryWithDefault.key) === "10")
+    assert(conf.get(entryWithDefault) === 10)
+    assert(conf.get(entryWithDefault, 11) === 11)
+    conf.set(entryWithDefault, 12)
+    assert(conf.get(entryWithDefault.key) === "12")
+    assert(conf.get(entryWithDefault) === 12)
+    assert(conf.get(entryWithDefault, 11) === 12)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
index d81768c0077eb..ea0d405d2a8f7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
@@ -99,6 +99,29 @@ class SQLContextSuite extends SparkFunSuite with SharedSparkContext {
     assert(sqlContext.tables().filter("tableName = 'listtablessuitetable'").count() === 0)
   }
 
+  test("get tables from a database") {
+    val sqlContext = SQLContext.getOrCreate(sc)
+
+    try {
+      sqlContext.sql("CREATE DATABASE IF NOT EXISTS temp_db_1")
+      sqlContext.sql("CREATE TABLE temp_db_1.temp_table_1 (key int)")
+      sqlContext.sql("INSERT INTO temp_db_1.temp_table_1 VALUES (1)")
+
+      assert(sqlContext.tableNames("temp_db_1").sameElements(Array("temp_table_1")))
+
+      assert(sqlContext.tables("temp_db_1").collect().toSeq ==
+        Row("temp_db_1", "temp_table_1", false) :: Nil)
+
+      assert(sqlContext.tables().collect().toSeq == Nil)
+      sqlContext.sql("USE temp_db_1")
+      assert(sqlContext.tableNames().sameElements(Array("temp_table_1")))
+      assert(sqlContext.tables().collect().toSeq == Row("temp_db_1", "temp_table_1", false) :: Nil)
+    } finally {
+      sqlContext.sql("USE default")
+      sqlContext.sql("DROP DATABASE IF EXISTS temp_db_1 CASCADE")
+    }
+  }
+
   test("getting all tables with a database name has no impact on returned table names") {
     val sqlContext = SQLContext.getOrCreate(sc)
     val df = sqlContext.range(10)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index 7daf2c6b1b58b..04f274e4af592 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -60,7 +60,18 @@ trait SQLQueryTestHelper extends Logging {
       .replaceAll("CTERelationDef \\d+,", s"CTERelationDef xxxx,")
       .replaceAll("CTERelationRef \\d+,", s"CTERelationRef xxxx,")
       .replaceAll("@\\w*,", s"@xxxxxxxx,")
-      .replaceAll("\\*\\(\\d+\\) ", "*") // remove the WholeStageCodegen codegenStageIds
+      .replaceAll("\\*\\(\\d+\\) ", "*")
+      .replaceAll(
+        s""""location":.*?$clsName/""",
+        s""""location": "$notIncludedMsg/{warehouse_dir}/""")
+      .replaceAll(s""""created_by":".*?"""", s""""created_by $notIncludedMsg":"None"""")
+      .replaceAll(s""""created_time":".*?"""", s""""created_time $notIncludedMsg":"None"""")
+      .replaceAll(s""""last_access":".*?"""", s""""last_access $notIncludedMsg":"None"""")
+      .replaceAll(s""""owner":".*?"""", s""""owner $notIncludedMsg":"None"""")
+      .replaceAll(s""""partition_statistics":"\\d+"""",
+        s""""partition_statistics $notIncludedMsg":"None"""")
+      .replaceAll("cterelationdef \\d+,", "cterelationdef xxxx,")
+      .replaceAll("cterelationref \\d+,", "cterelationref xxxx,")
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 5c56377f21c20..575a4ae69d1a9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -132,7 +132,7 @@ import org.apache.spark.util.Utils
 // scalastyle:on line.size.limit
 @ExtendedSQLTest
 class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
-    with SQLQueryTestHelper {
+    with SQLQueryTestHelper with TPCDSSchema {
 
   import IntegratedUDFTestUtils._
 
@@ -165,13 +165,17 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
   protected def ignoreList: Set[String] = Set(
     "ignored.sql" // Do NOT remove this one. It is here to test the ignore functionality.
   ) ++ otherIgnoreList
+  /** List of test cases that require TPCDS table schemas to be loaded. */
+  private def requireTPCDSCases: Seq[String] = Seq("pipe-operators.sql")
+  /** List of TPCDS table names and schemas to load from the [[TPCDSSchema]] base class. */
+  private val tpcDSTableNamesToSchemas: Map[String, String] = tableColumns
 
   // Create all the test cases.
   listTestCases.foreach(createScalaTestCase)
 
   protected def createScalaTestCase(testCase: TestCase): Unit = {
     if (ignoreList.exists(t =>
-        testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT)))) {
+      testCase.name.toLowerCase(Locale.ROOT).contains(t.toLowerCase(Locale.ROOT)))) {
       // Create a test case to ignore this case.
       ignore(testCase.name) { /* Do nothing */ }
     } else testCase match {
@@ -322,6 +326,15 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
       setOperations.foreach(localSparkSession.sql)
     }
 
+    // Load TPCDS table schemas for the test case if required.
+    val lowercaseTestCase = testCase.name.toLowerCase(Locale.ROOT)
+    if (requireTPCDSCases.contains(lowercaseTestCase)) {
+      tpcDSTableNamesToSchemas.foreach { case (name: String, schema: String) =>
+        localSparkSession.sql(s"DROP TABLE IF EXISTS $name")
+        localSparkSession.sql(s"CREATE TABLE `$name` ($schema) USING parquet")
+      }
+    }
+
     // Run the SQL queries preparing them for comparison.
     val outputs: Seq[QueryTestOutput] = queries.map { sql =>
       testCase match {
@@ -348,6 +361,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
       }
     }
 
+    // Drop TPCDS tables after the test case if required.
+    if (requireTPCDSCases.contains(lowercaseTestCase)) {
+      tpcDSTableNamesToSchemas.foreach { case (name: String, schema: String) =>
+        localSparkSession.sql(s"DROP TABLE IF EXISTS $name")
+      }
+    }
+
     if (regenerateGoldenFiles) {
       // Again, we are explicitly not using multi-line string due to stripMargin removing "|".
       val goldenOutput = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSCollationQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSCollationQueryTestSuite.scala
index 46a24acb475c4..43e6111fc99ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSCollationQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSCollationQueryTestSuite.scala
@@ -67,6 +67,7 @@ class TPCDSCollationQueryTestSuite extends QueryTest with TPCDSBase with SQLQuer
   // To make output results deterministic
   override protected def sparkConf: SparkConf = super.sparkConf
     .set(SQLConf.SHUFFLE_PARTITIONS.key, "1")
+    .remove("spark.hadoop.fs.file.impl")
 
   protected override def createSparkSession: TestSparkSession = {
     new TestSparkSession(new SparkContext("local[1]", this.getClass.getSimpleName, sparkConf))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
index ffd15eb46a48e..e8b36d8b130cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
@@ -33,8 +33,7 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase {
     // Disable read-side char padding so that the generated code is less than 8000.
     super.sparkConf.set(SQLConf.READ_SIDE_CHAR_PADDING, false)
 
-  // q72 is skipped due to GitHub Actions' memory limit.
-  tpcdsQueries.filterNot(sys.env.contains("GITHUB_ACTIONS") && _ == "q72").foreach { name =>
+  tpcdsQueries.foreach { name =>
     val queryString = resourceToString(s"tpcds/$name.sql",
       classLoader = Thread.currentThread().getContextClassLoader)
     test(name) {
@@ -44,8 +43,7 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase {
     }
   }
 
-  // q72 is skipped due to GitHub Actions' memory limit.
-  tpcdsQueriesV2_7_0.filterNot(sys.env.contains("GITHUB_ACTIONS") && _ == "q72").foreach { name =>
+  tpcdsQueriesV2_7_0.foreach { name =>
     val queryString = resourceToString(s"tpcds-v2.7.0/$name.sql",
       classLoader = Thread.currentThread().getContextClassLoader)
     test(s"$name-v2.7") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala
index bde6155529872..c1246a167b8cc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala
@@ -62,6 +62,7 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp
   // To make output results deterministic
   override protected def sparkConf: SparkConf = super.sparkConf
     .set(SQLConf.SHUFFLE_PARTITIONS.key, "1")
+    .remove("spark.hadoop.fs.file.impl")
 
   protected override def createSparkSession: TestSparkSession = {
     new TestSparkSession(new SparkContext("local[1]", this.getClass.getSimpleName, sparkConf))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
index 624bae70ce09c..662eead137c40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
@@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.internal.ExpressionUtils.{column => toColumn, expression}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
@@ -89,7 +88,7 @@ class TypedImperativeAggregateSuite extends QueryTest with SharedSparkSession {
 
   test("dataframe aggregate with object aggregate buffer, should not use HashAggregate") {
     val df = data.toDF("a", "b")
-    val max = TypedMax($"a")
+    val max = Column(TypedMax($"a".expr))
 
     // Always uses SortAggregateExec
     val sparkPlan = df.select(max).queryExecution.sparkPlan
@@ -212,9 +211,10 @@ class TypedImperativeAggregateSuite extends QueryTest with SharedSparkSession {
     checkAnswer(query, expected)
   }
 
-  private def typedMax(column: Column): Column = TypedMax(column)
+  private def typedMax(column: Column): Column = Column(TypedMax(column.expr))
 
-  private def nullableTypedMax(column: Column): Column = TypedMax(column, nullable = true)
+  private def nullableTypedMax(column: Column): Column =
+    Column(TypedMax(column.expr, nullable = true))
 }
 
 object TypedImperativeAggregateSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala
new file mode 100644
index 0000000000000..3443028ba45b0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala
@@ -0,0 +1,383 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.io.File
+import java.sql.{Date, Timestamp}
+import java.time.LocalDateTime
+
+import org.apache.spark.SparkThrowable
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.execution.datasources.parquet.{ParquetTest, SparkShreddingUtils}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+import org.apache.spark.types.variant._
+import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
+
+class VariantShreddingSuite extends QueryTest with SharedSparkSession with ParquetTest {
+  def parseJson(s: String): VariantVal = {
+    val v = VariantBuilder.parseJson(s, false)
+    new VariantVal(v.getValue, v.getMetadata)
+  }
+
+  // Make a variant value binary by parsing a JSON string.
+  def value(s: String): Array[Byte] = VariantBuilder.parseJson(s, false).getValue
+
+  // Make a variant metadata binary that includes a set of keys.
+  def metadata(keys: Seq[String]): Array[Byte] = {
+    val builder = new VariantBuilder(false)
+    keys.foreach(builder.addKey)
+    builder.result().getMetadata
+  }
+
+  // Build a shredded variant value binary. Its IDs refer to the metadata built from `metadataKeys`,
+  // which can include more keys than the JSON string contains.
+  def shreddedValue(s: String, metadataKeys: Seq[String]): Array[Byte] = {
+    val builder = new VariantBuilder(false)
+    metadataKeys.foreach(builder.addKey)
+    builder.appendVariant(VariantBuilder.parseJson(s, false))
+    builder.result().getValue
+  }
+
+  // Given an expected schema of a Variant value, return a write schema with a single column `v`
+  // with the corresponding shredding schema.
+  def writeSchema(schema: DataType): StructType =
+    StructType(Array(StructField("v", SparkShreddingUtils.variantShreddingSchema(schema))))
+
+  def withPushConfigs(pushConfigs: Seq[Boolean] = Seq(true, false))(fn: => Unit): Unit = {
+    for (push <- pushConfigs) {
+      withSQLConf(SQLConf.PUSH_VARIANT_INTO_SCAN.key -> push.toString) {
+        fn
+      }
+    }
+  }
+
+  def isPushEnabled: Boolean = SQLConf.get.getConf(SQLConf.PUSH_VARIANT_INTO_SCAN)
+
+  def testWithTempPath(name: String)(block: File => Unit): Unit = test(name) {
+    withPushConfigs() {
+      withTempPath { path =>
+        block(path)
+      }
+    }
+  }
+
+  def writeRows(path: File, schema: StructType, rows: Row*): Unit =
+    spark.createDataFrame(spark.sparkContext.parallelize(rows.map(Row(_)), numSlices = 1), schema)
+      .write.mode("overwrite").parquet(path.getAbsolutePath)
+
+  def writeRows(path: File, schema: String, rows: Row*): Unit =
+    writeRows(path, StructType.fromDDL(schema), rows: _*)
+
+  def read(path: File): DataFrame =
+    spark.read.schema("v variant").parquet(path.getAbsolutePath)
+
+  def checkExpr(path: File, expr: String, expected: Any*): Unit = withAllParquetReaders {
+    checkAnswer(read(path).selectExpr(expr), expected.map(Row(_)))
+  }
+
+  def checkException(path: File, expr: String, msg: String): Unit = withAllParquetReaders {
+    val ex = intercept[Exception with SparkThrowable] {
+      read(path).selectExpr(expr).collect()
+    }
+    // When reading with the parquet-mr reader, the expected message can be nested in
+    // `ex.getCause.getCause`.
+    assert(ex.getMessage.contains(msg) || ex.getCause.getMessage.contains(msg)
+      || ex.getCause.getCause.getMessage.contains(msg))
+  }
+
+  testWithTempPath("scalar types rebuild") { path =>
+    val scalarTypes = Array(
+      BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType,
+      TimestampType, TimestampNTZType, DateType,
+      StringType, BinaryType,
+      DecimalType(9, 3), DecimalType(18, 6), DecimalType(22, 9))
+    val schema = StructType(scalarTypes.zipWithIndex.map { case (t, i) =>
+      StructField(i.toString, t)
+    })
+
+    val values = Seq[Any](
+      true, 1.toByte, 2.toShort, 3, 4L, 5.5F, 6.6,
+      new Timestamp(7), LocalDateTime.of(1, 1, 1, 0, 0, 8, 0), new Date(9),
+      "str10", Array[Byte](11),
+      Decimal("12.12"), Decimal("13.13"), Decimal("14.14")).map(Row(null, _))
+    val row = Row(metadata(scalarTypes.indices.map(_.toString)), null, Row.fromSeq(values))
+
+    writeRows(path, writeSchema(schema), row)
+    for (tz <- Seq("Etc/UTC", "America/Los_Angeles")) {
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz) {
+        val timestamp = if (tz == "Etc/UTC") {
+          "1970-01-01 00:00:00.007+00:00"
+        } else {
+          "1969-12-31 16:00:00.007-08:00"
+        }
+        checkExpr(path, "to_json(v)",
+          """{"0":true,"1":1,"10":"str10","11":"Cw==","12":12.12,"13":13.13,"14":14.14,""" +
+            s""""2":2,"3":3,"4":4,"5":5.5,"6":6.6,"7":"$timestamp",""" +
+            """"8":"0001-01-01 00:00:08","9":"1969-12-31"}""")
+        checkExpr(path, "variant_get(v, '$.0', 'int')", 1)
+        checkExpr(path, "variant_get(v, '$.2', 'boolean')", true)
+        checkExpr(path, "variant_get(v, '$.6', 'float')", 6.6F)
+        checkExpr(path, "variant_get(v, '$.11', 'string')", new String(Array[Byte](11)))
+        checkExpr(path, "variant_get(v, '$.14', 'decimal(9, 1)')", BigDecimal("14.1"))
+      }
+    }
+  }
+
+  testWithTempPath("object rebuild") { path =>
+    writeRows(path, writeSchema(StructType.fromDDL("b int, d int")),
+      Row(metadata(Seq("b", "d")), null, Row(Row(null, 1), Row(null, null))),
+      Row(metadata(Seq("b", "d")), null, Row(Row(null, 1), Row(value("null"), null))),
+      Row(metadata(Seq("a", "b", "c", "d")),
+        shreddedValue("""{"a": 1, "c": 3}""", Seq("a", "b", "c", "d")),
+        Row(Row(null, 2), Row(value("4"), null))),
+      Row(metadata(Nil), value("null"), null),
+      null)
+    checkExpr(path, "to_json(v)", """{"b":1}""", """{"b":1,"d":null}""",
+      """{"a":1,"b":2,"c":3,"d":4}""", "null", null)
+    checkExpr(path, "variant_get(v, '$.b', 'string')", "1", "1", "2", null, null)
+    checkExpr(path, "variant_get(v, '$.d', 'string')", null, null, "4", null, null)
+  }
+
+  testWithTempPath("array rebuild") { path =>
+    writeRows(path, writeSchema(ArrayType(IntegerType)),
+      Row(metadata(Nil), null, Array(Row(null, 1), Row(null, 2), Row(value("3"), null))),
+      Row(metadata(Seq("a", "b")), null, Array(
+        Row(shreddedValue("""{"a": 1}""", Seq("a", "b")), null),
+        Row(shreddedValue("""{"b": 2}""", Seq("a", "b")), null))),
+      Row(metadata(Seq("a", "b")), value("""{"a": 1, "b": 2}"""), null))
+    checkExpr(path, "to_json(v)", """[1,2,3]""", """[{"a":1},{"b":2}]""", """{"a":1,"b":2}""")
+    checkExpr(path, "variant_get(v, '$[2]', 'int')", 3, null, null)
+    checkExpr(path, "variant_get(v, '$[1].b', 'int')", null, 2, null)
+    checkExpr(path, "variant_get(v, '$.a', 'long')", null, null, 1L)
+  }
+
+  testWithTempPath("malformed input") { path =>
+    // Top-level variant must not be missing.
+    writeRows(path, writeSchema(IntegerType), Row(metadata(Nil), null, null))
+    checkException(path, "v", "MALFORMED_VARIANT")
+
+    // Array-element variant must not be missing.
+    writeRows(path, writeSchema(ArrayType(IntegerType)),
+      Row(metadata(Nil), null, Array(Row(null, null))))
+    checkException(path, "v", "MALFORMED_VARIANT")
+    checkException(path, "variant_get(v, '$[0]')", "MALFORMED_VARIANT")
+
+    // Shredded field must not be null.
+    // Construct the schema manually, because SparkShreddingUtils.variantShreddingSchema will make
+    // `a` non-nullable, which would prevent us from writing the file.
+    val schema = StructType(Seq(StructField("v", StructType(Seq(
+      StructField("metadata", BinaryType),
+      StructField("value", BinaryType),
+      StructField("typed_value", StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("value", BinaryType),
+          StructField("typed_value", BinaryType))))))))))))
+    writeRows(path, schema, Row(metadata(Seq("a")), null, Row(null)))
+    checkException(path, "v", "MALFORMED_VARIANT")
+    checkException(path, "variant_get(v, '$.a')", "MALFORMED_VARIANT")
+
+    // `value` must not contain any shredded field.
+    writeRows(path, writeSchema(StructType.fromDDL("a int")),
+      Row(metadata(Seq("a")), value("""{"a": 1}"""), Row(Row(null, null))))
+    checkException(path, "v", "MALFORMED_VARIANT")
+    checkException(path, "cast(v as map<string, int>)", "MALFORMED_VARIANT")
+    if (isPushEnabled) {
+      checkExpr(path, "cast(v as struct<a int>)", Row(null))
+      checkExpr(path, "variant_get(v, '$.a', 'int')", null)
+    } else {
+      checkException(path, "cast(v as struct<a int>)", "MALFORMED_VARIANT")
+      checkException(path, "variant_get(v, '$.a', 'int')", "MALFORMED_VARIANT")
+    }
+
+    // Scalar reader reads from `typed_value` if both `value` and `typed_value` are not null.
+    // Cast from `value` succeeds, cast from `typed_value` fails.
+    writeRows(path, "v struct<metadata binary, value binary, typed_value string>",
+      Row(metadata(Nil), value("1"), "invalid"))
+    checkException(path, "cast(v as int)", "INVALID_VARIANT_CAST")
+    checkExpr(path, "try_cast(v as int)", null)
+
+    // Cast from `value` fails, cast from `typed_value` succeeds.
+    writeRows(path, "v struct<metadata binary, value binary, typed_value string>",
+      Row(metadata(Nil), value("\"invalid\""), "1"))
+    checkExpr(path, "cast(v as int)", 1)
+    checkExpr(path, "try_cast(v as int)", 1)
+  }
+
+  testWithTempPath("extract from shredded object") { path =>
+    val keys1 = Seq("a", "b", "c", "d")
+    val keys2 = Seq("a", "b", "c", "e", "f")
+    writeRows(path, "v struct<metadata binary, value binary, typed_value struct<" +
+      "a struct<value binary, typed_value int>, b struct<value binary>," +
+      "c struct<typed_value decimal(20, 10)>>>",
+      // {"a":1,"b":"2","c":3.3,"d":4.4}, d is in the left over value.
+      Row(metadata(keys1), shreddedValue("""{"d": 4.4}""", keys1),
+        Row(Row(null, 1), Row(value("\"2\"")), Row(Decimal("3.3")))),
+      // {"a":5.4,"b":-6,"e":{"f":[true]}}, e is in the left over value.
+      Row(metadata(keys2), shreddedValue("""{"e": {"f": [true]}}""", keys2),
+        Row(Row(value("5.4"), null), Row(value("-6")), Row(null))),
+      // [{"a":1}], the unshredded array at the top-level is put into `value` as a whole.
+      Row(metadata(Seq("a")), value("""[{"a": 1}]"""), null))
+
+    checkAnswer(read(path).selectExpr("variant_get(v, '$.a', 'int')",
+      "variant_get(v, '$.b', 'long')", "variant_get(v, '$.c', 'double')",
+      "variant_get(v, '$.d', 'decimal(9, 4)')"),
+      Seq(Row(1, 2L, 3.3, BigDecimal("4.4")), Row(5, -6L, null, null), Row(null, null, null, null)))
+    checkExpr(path, "variant_get(v, '$.e.f[0]', 'boolean')", null, true, null)
+    checkExpr(path, "variant_get(v, '$[0].a', 'boolean')", null, null, true)
+    checkExpr(path, "try_cast(v as struct<a float, e variant>)",
+      Row(1.0F, null), Row(5.4F, parseJson("""{"f": [true]}""")), null)
+
+    // String "2" cannot be cast into boolean.
+    checkException(path, "variant_get(v, '$.b', 'boolean')", "INVALID_VARIANT_CAST")
+    // Decimal cannot be cast into date.
+    checkException(path, "variant_get(v, '$.c', 'date')", "INVALID_VARIANT_CAST")
+    // The value of `c` doesn't fit into `decimal(1, 1)`.
+    checkException(path, "variant_get(v, '$.c', 'decimal(1, 1)')", "INVALID_VARIANT_CAST")
+    checkExpr(path, "try_variant_get(v, '$.b', 'boolean')", null, true, null)
+    // Scalar cannot be cast into struct.
+    checkException(path, "variant_get(v, '$.a', 'struct<a int>')", "INVALID_VARIANT_CAST")
+    checkExpr(path, "try_variant_get(v, '$.a', 'struct<a int>')", null, null, null)
+
+    checkExpr(path, "try_cast(v as map<string, double>)",
+      Map("a" -> 1.0, "b" -> 2.0, "c" -> 3.3, "d" -> 4.4),
+      Map("a" -> 5.4, "b" -> -6.0, "e" -> null), null)
+    checkExpr(path, "try_cast(v as array<string>)", null, null, Seq("""{"a":1}"""))
+
+    val strings = Seq("""{"a":1,"b":"2","c":3.3,"d":4.4}""",
+      """{"a":5.4,"b":-6,"e":{"f":[true]}}""", """[{"a":1}]""")
+    checkExpr(path, "cast(v as string)", strings: _*)
+    checkExpr(path, "v",
+      VariantExpressionEvalUtils.castToVariant(
+        InternalRow(1, UTF8String.fromString("2"), Decimal("3.3000000000"), Decimal("4.4")),
+        StructType.fromDDL("a int, b string, c decimal(20, 10), d decimal(2, 1)")
+      ),
+      parseJson(strings(1)),
+      parseJson(strings(2))
+    )
+  }
+
+  testWithTempPath("extract from shredded array") { path =>
+    val keys = Seq("a", "b")
+    writeRows(path, "v struct<metadata binary, value binary, typed_value array<" +
+      "struct<value binary, typed_value struct<a struct<value binary, typed_value string>>>>>",
+      // [{"a":"2000-01-01"},{"a":"1000-01-01","b":[7]}], b is in the left over value.
+      Row(metadata(keys), null, Array(
+        Row(null, Row(Row(null, "2000-01-01"))),
+        Row(shreddedValue("""{"b": [7]}""", keys), Row(Row(null, "1000-01-01"))))),
+      // [null,{"a":null},{"a":"null"},{}]
+      Row(metadata(keys), null, Array(
+        Row(value("null"), null),
+        Row(null, Row(Row(value("null"), null))),
+        Row(null, Row(Row(null, "null"))),
+        Row(null, Row(Row(null, null))))))
+
+    val date1 = Date.valueOf("2000-01-01")
+    val date2 = Date.valueOf("1000-01-01")
+    checkExpr(path, "variant_get(v, '$[0].a', 'date')", date1, null)
+    // try_cast succeeds.
+    checkExpr(path, "try_variant_get(v, '$[1].a', 'date')", date2, null)
+    // The first array returns null because of out-of-bound index.
+    // The second array returns "null".
+    checkExpr(path, "variant_get(v, '$[2].a', 'string')", null, "null")
+    // Return null because of invalid cast.
+    checkExpr(path, "try_variant_get(v, '$[1].a', 'int')", null, null)
+
+    checkExpr(path, "variant_get(v, '$[0].b[0]', 'int')", null, null)
+    checkExpr(path, "variant_get(v, '$[1].b[0]', 'int')", 7, null)
+    // Validate timestamp-related casts uses the session time zone correctly.
+    Seq("Etc/UTC", "America/Los_Angeles").foreach { tz =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz) {
+        val expected = sql("select timestamp'1000-01-01', timestamp_ntz'1000-01-01'").head()
+        checkAnswer(read(path).selectExpr("variant_get(v, '$[1].a', 'timestamp')",
+          "variant_get(v, '$[1].a', 'timestamp_ntz')"), Seq(expected, Row(null, null)))
+      }
+    }
+    checkException(path, "variant_get(v, '$[0]', 'int')", "INVALID_VARIANT_CAST")
+    // An out-of-bound array access produces null. It never causes an invalid cast.
+    checkExpr(path, "variant_get(v, '$[4]', 'int')", null, null)
+
+    checkExpr(path, "cast(v as array<struct<a string, b array<int>>>)",
+      Seq(Row("2000-01-01", null), Row("1000-01-01", Seq(7))),
+      Seq(null, Row(null, null), Row("null", null), Row(null, null)))
+    checkExpr(path, "cast(v as array<map<string, string>>)",
+      Seq(Map("a" -> "2000-01-01"), Map("a" -> "1000-01-01", "b" -> "[7]")),
+      Seq(null, Map("a" -> null), Map("a" -> "null"), Map()))
+    checkExpr(path, "try_cast(v as array<map<string, date>>)",
+      Seq(Map("a" -> date1), Map("a" -> date2, "b" -> null)),
+      Seq(null, Map("a" -> null), Map("a" -> null), Map()))
+
+    val strings = Seq("""[{"a":"2000-01-01"},{"a":"1000-01-01","b":[7]}]""",
+      """[null,{"a":null},{"a":"null"},{}]""")
+    checkExpr(path, "cast(v as string)", strings: _*)
+    checkExpr(path, "v", strings.map(parseJson): _*)
+  }
+
+  testWithTempPath("missing fields") { path =>
+    writeRows(path, "v struct<metadata binary, typed_value struct<" +
+      "a struct<value binary, typed_value int>, b struct<typed_value int>>>",
+      Row(metadata(Nil), Row(Row(null, null), Row(null))),
+      Row(metadata(Nil), Row(Row(value("null"), null), Row(null))),
+      Row(metadata(Nil), Row(Row(null, 1), Row(null))),
+      Row(metadata(Nil), Row(Row(null, null), Row(2))),
+      Row(metadata(Nil), Row(Row(value("null"), null), Row(2))),
+      Row(metadata(Nil), Row(Row(null, 3), Row(4))))
+
+    val strings = Seq("{}", """{"a":null}""", """{"a":1}""", """{"b":2}""", """{"a":null,"b":2}""",
+      """{"a":3,"b":4}""")
+    checkExpr(path, "cast(v as string)", strings: _*)
+    checkExpr(path, "v", strings.map(parseJson): _*)
+
+    checkExpr(path, "variant_get(v, '$.a', 'string')", null, null, "1", null, null, "3")
+    checkExpr(path, "variant_get(v, '$.a')", null, parseJson("null"), parseJson("1"), null,
+      parseJson("null"), parseJson("3"))
+  }
+
+  testWithTempPath("custom casts") { path =>
+    writeRows(path, writeSchema(LongType),
+      Row(metadata(Nil), null, Long.MaxValue / MICROS_PER_SECOND + 1),
+      Row(metadata(Nil), null, Long.MaxValue / MICROS_PER_SECOND))
+
+    // long -> timestamp
+    checkException(path, "cast(v as timestamp)", "INVALID_VARIANT_CAST")
+    checkExpr(path, "try_cast(v as timestamp)",
+      null, toJavaTimestamp(Long.MaxValue / MICROS_PER_SECOND * MICROS_PER_SECOND))
+
+    writeRows(path, writeSchema(DecimalType(38, 19)),
+      Row(metadata(Nil), null, Decimal("1E18")),
+      Row(metadata(Nil), null, Decimal("100")),
+      Row(metadata(Nil), null, Decimal("10")),
+      Row(metadata(Nil), null, Decimal("1")),
+      Row(metadata(Nil), null, Decimal("0")),
+      Row(metadata(Nil), null, Decimal("0.1")),
+      Row(metadata(Nil), null, Decimal("0.01")),
+      Row(metadata(Nil), null, Decimal("1E-18")))
+
+    checkException(path, "cast(v as timestamp)", "INVALID_VARIANT_CAST")
+    // decimal -> timestamp
+    checkExpr(path, "try_cast(v as timestamp)",
+      (null +: Seq(100000000, 10000000, 1000000, 0, 100000, 10000, 0).map(toJavaTimestamp(_))): _*)
+    // decimal -> string
+    checkExpr(path, "cast(v as string)",
+      "1000000000000000000", "100", "10", "1", "0", "0.1", "0.01", "0.000000000000000001")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala
index 5d59a3e0f8256..09b29b668b134 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
+import org.apache.spark.types.variant.VariantUtil._
 import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
 import org.apache.spark.util.ArrayImplicits._
 
@@ -117,7 +118,8 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
         rand.nextBytes(value)
         val metadata = new Array[Byte](rand.nextInt(50))
         rand.nextBytes(metadata)
-        new VariantVal(value, metadata)
+        // Generate a valid metadata, otherwise the shredded reader will fail.
+        new VariantVal(value, Array[Byte](VERSION, 0, 0) ++ metadata)
       }
     }
 
@@ -151,7 +153,8 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
         val metadata = new Array[Byte](rand.nextInt(50))
         rand.nextBytes(metadata)
         val numElements = 3 // rand.nextInt(10)
-        Seq.fill(numElements)(new VariantVal(value, metadata))
+        // Generate a valid metadata, otherwise the shredded reader will fail.
+        Seq.fill(numElements)(new VariantVal(value, Array[Byte](VERSION, 0, 0) ++ metadata))
       }
     }
 
@@ -299,7 +302,9 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
             df.write.parquet(file)
             val schema = StructType(Seq(StructField("v", VariantType)))
             val result = spark.read.schema(schema).parquet(file).selectExpr("to_json(v)")
-            val e = intercept[org.apache.spark.SparkException](result.collect())
+            val e = withSQLConf(SQLConf.VARIANT_ALLOW_READING_SHREDDED.key -> "false") {
+              intercept[org.apache.spark.SparkException](result.collect())
+            }
             checkError(
               exception = e.getCause.asInstanceOf[AnalysisException],
               condition = condition,
@@ -429,26 +434,26 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
 
     checkError(
       exception = intercept[AnalysisException] {
-        spark.sql("select parse_json('') order by 1")
+        spark.sql("select parse_json('') v order by 1")
       },
       condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
       parameters = Map(
         "functionName" -> "`sortorder`",
         "dataType" -> "\"VARIANT\"",
-        "sqlExpr" -> "\"parse_json() ASC NULLS FIRST\""),
-      context = ExpectedContext(fragment = "order by 1", start = 22, stop = 31)
+        "sqlExpr" -> "\"v ASC NULLS FIRST\""),
+      context = ExpectedContext(fragment = "order by 1", start = 24, stop = 33)
     )
 
     checkError(
       exception = intercept[AnalysisException] {
-        spark.sql("select parse_json('') sort by 1")
+        spark.sql("select parse_json('') v sort by 1")
       },
       condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
       parameters = Map(
         "functionName" -> "`sortorder`",
         "dataType" -> "\"VARIANT\"",
-        "sqlExpr" -> "\"parse_json() ASC NULLS FIRST\""),
-      context = ExpectedContext(fragment = "sort by 1", start = 22, stop = 30)
+        "sqlExpr" -> "\"v ASC NULLS FIRST\""),
+      context = ExpectedContext(fragment = "sort by 1", start = 24, stop = 32)
     )
 
     checkError(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantWriteShreddingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantWriteShreddingSuite.scala
index ed66ddb1f0f44..d31bf109af6c7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/VariantWriteShreddingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantWriteShreddingSuite.scala
@@ -67,6 +67,36 @@ class VariantWriteShreddingSuite extends SparkFunSuite with ExpressionEvalHelper
 
   private val emptyMetadata: Array[Byte] = parseJson("null").getMetadata
 
+  test("variantShreddingSchema") {
+    // Validate the schema produced by SparkShreddingUtils.variantShreddingSchema for a few simple
+    // cases.
+    // metadata is always non-nullable.
+    assert(SparkShreddingUtils.variantShreddingSchema(IntegerType) ==
+      StructType(Seq(
+        StructField("metadata", BinaryType, nullable = false),
+        StructField("value", BinaryType, nullable = true),
+        StructField("typed_value", IntegerType, nullable = true))))
+
+    val fieldA = StructType(Seq(
+      StructField("value", BinaryType, nullable = true),
+      StructField("typed_value", TimestampNTZType, nullable = true)))
+    val arrayType = ArrayType(StructType(Seq(
+      StructField("value", BinaryType, nullable = true),
+      StructField("typed_value", StringType, nullable = true))), containsNull = false)
+    val fieldB = StructType(Seq(
+      StructField("value", BinaryType, nullable = true),
+      StructField("typed_value", arrayType, nullable = true)))
+    val objectType = StructType(Seq(
+      StructField("a", fieldA, nullable = false),
+      StructField("b", fieldB, nullable = false)))
+    val structSchema = DataType.fromDDL("a timestamp_ntz, b array<string>")
+    assert(SparkShreddingUtils.variantShreddingSchema(structSchema) ==
+      StructType(Seq(
+        StructField("metadata", BinaryType, nullable = false),
+        StructField("value", BinaryType, nullable = true),
+        StructField("typed_value", objectType, nullable = true))))
+  }
+
   test("shredding as fixed numeric types") {
     /* Cast integer to any wider numeric type. */
     testWithSchema("1", IntegerType, Row(emptyMetadata, null, 1))
@@ -179,6 +209,17 @@ class VariantWriteShreddingSuite extends SparkFunSuite with ExpressionEvalHelper
     // Not an object
     testWithSchema(obj, ArrayType(StructType.fromDDL("a int, b string")),
       Row(obj.getMetadata, untypedValue(obj), null))
+
+    // Similar to the case above where "b" was not in the shredding schema, but with the unshredded
+    // value being an object. Check that the copied value has correct dictionary IDs.
+    val obj2 = parseJson("""{"a": 1, "b": {"c": "hello"}}""")
+    val residual2 = untypedValue("""{"b": {"c": "hello"}}""")
+    // First byte is the type, second is number of fields, and the third is the ID for "b"
+    residual2(2) = 1
+    // Followed by 2 bytes for offsets, inner object type and number of fields, then ID for "c".
+    residual2(7) = 2
+    testWithSchema(obj2, StructType.fromDDL("a int, c string"),
+      Row(obj2.getMetadata, residual2, Row(Row(null, 1), Row(null, null))))
   }
 
   test("shredding as array") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/XPathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/XPathFunctionsSuite.scala
index f08466e8f8d9d..f2a86cbf54152 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/XPathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/XPathFunctionsSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.catalyst.expressions.IsNotNull
+import org.apache.spark.sql.execution.FilterExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -76,4 +78,38 @@ class XPathFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.select(xpath(col("xml"), lit("a/*/text()"))),
       Row(Seq("b1", "b2", "b3", "c1", "c2")))
   }
+
+  test("The replacement of `xpath*` functions should be NullIntolerant") {
+    def check(df: DataFrame, expected: Seq[Row]): Unit = {
+      val filter = df.queryExecution
+        .sparkPlan
+        .find(_.isInstanceOf[FilterExec])
+        .get.asInstanceOf[FilterExec]
+      assert(filter.condition.find(_.isInstanceOf[IsNotNull]).nonEmpty)
+      checkAnswer(df, expected)
+    }
+    withTable("t") {
+      sql("CREATE TABLE t AS SELECT * FROM VALUES ('<a><b>1</b></a>'), (NULL) T(xml)")
+      check(sql("SELECT * FROM t WHERE xpath_boolean(xml, 'a/b') = true"),
+        Seq(Row("<a><b>1</b></a>")))
+      check(sql("SELECT * FROM t WHERE xpath_short(xml, 'a/b') = 1"),
+        Seq(Row("<a><b>1</b></a>")))
+      check(sql("SELECT * FROM t WHERE xpath_int(xml, 'a/b') = 1"),
+        Seq(Row("<a><b>1</b></a>")))
+      check(sql("SELECT * FROM t WHERE xpath_long(xml, 'a/b') = 1"),
+        Seq(Row("<a><b>1</b></a>")))
+      check(sql("SELECT * FROM t WHERE xpath_float(xml, 'a/b') = 1"),
+        Seq(Row("<a><b>1</b></a>")))
+      check(sql("SELECT * FROM t WHERE xpath_double(xml, 'a/b') = 1"),
+        Seq(Row("<a><b>1</b></a>")))
+      check(sql("SELECT * FROM t WHERE xpath_string(xml, 'a/b') = '1'"),
+        Seq(Row("<a><b>1</b></a>")))
+    }
+    withTable("t") {
+      sql("CREATE TABLE t AS SELECT * FROM VALUES " +
+        "('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>'), (NULL) T(xml)")
+      check(sql("SELECT * FROM t WHERE xpath(xml, 'a/b/text()') = array('b1', 'b2', 'b3')"),
+        Seq(Row("<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>")))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/ExplicitlyUnsupportedResolverFeatureSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/ExplicitlyUnsupportedResolverFeatureSuite.scala
new file mode 100644
index 0000000000000..7fd7d570ecfc1
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/ExplicitlyUnsupportedResolverFeatureSuite.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.analysis.resolver
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.analysis.resolver.Resolver
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ExplicitlyUnsupportedResolverFeatureSuite extends QueryTest with SharedSparkSession {
+  test("Unsupported table types") {
+    withTable("csv_table") {
+      spark.sql("CREATE TABLE csv_table (col1 INT) USING CSV;").collect()
+      checkResolution("SELECT * FROM csv_table;", shouldPass = true)
+    }
+    withTable("json_table") {
+      spark.sql("CREATE TABLE json_table (col1 INT) USING JSON;").collect()
+      checkResolution("SELECT * FROM json_table;", shouldPass = true)
+    }
+    withTable("parquet_table") {
+      spark.sql("CREATE TABLE parquet_table (col1 INT) USING PARQUET;").collect()
+      checkResolution("SELECT * FROM parquet_table;", shouldPass = true)
+    }
+    withTable("orc_table") {
+      spark.sql("CREATE TABLE orc_table (col1 INT) USING ORC;").collect()
+      checkResolution("SELECT * FROM orc_table;", shouldPass = true)
+    }
+  }
+
+  test("Unsupported view types") {
+    withTable("src_table") {
+      spark.sql("CREATE TABLE src_table (col1 INT) USING PARQUET;").collect()
+
+      withView("temporary_view") {
+        spark.sql("CREATE TEMPORARY VIEW temporary_view AS SELECT * FROM src_table;").collect()
+        checkResolution("SELECT * FROM temporary_view;")
+      }
+
+      withView("persistent_view") {
+        spark.sql("CREATE VIEW persistent_view AS SELECT * FROM src_table;").collect()
+        checkResolution("SELECT * FROM persistent_view;")
+      }
+    }
+  }
+
+  test("Unsupported char type padding") {
+    withTable("char_type_padding") {
+      spark.sql(s"CREATE TABLE t1 (c1 CHAR(3), c2 STRING) USING PARQUET")
+      checkResolution("SELECT c1 = '12', c1 = '12 ', c1 = '12  ' FROM t1 WHERE c2 = '12'")
+    }
+  }
+
+  test("Unsupported lateral column alias") {
+    checkResolution("SELECT 1 AS a, a AS b")
+    checkResolution("SELECT sum(1), `sum(1)` + 1 AS a")
+  }
+
+  private def checkResolution(sqlText: String, shouldPass: Boolean = false): Unit = {
+    def noopWrapper(body: => Unit) = body
+
+    val wrapper = if (shouldPass) {
+      noopWrapper _
+    } else {
+      intercept[Throwable] _
+    }
+
+    val unresolvedPlan = spark.sql(sqlText).queryExecution.logical
+
+    val resolver = new Resolver(
+      spark.sessionState.catalogManager,
+      extensions = spark.sessionState.analyzer.singlePassResolverExtensions
+    )
+    wrapper {
+      resolver.lookupMetadataAndResolve(unresolvedPlan)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/HybridAnalyzerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/HybridAnalyzerSuite.scala
new file mode 100644
index 0000000000000..587725093f0e5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/HybridAnalyzerSuite.scala
@@ -0,0 +1,404 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.analysis.resolver
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
+
+import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.catalyst.{
+  AliasIdentifier,
+  ExtendedAnalysisException,
+  QueryPlanningTracker
+}
+import org.apache.spark.sql.catalyst.analysis.{
+  AnalysisContext,
+  Analyzer,
+  UnresolvedAttribute,
+  UnresolvedStar
+}
+import org.apache.spark.sql.catalyst.analysis.resolver.{
+  AnalyzerBridgeState,
+  ExplicitlyUnsupportedResolverFeature,
+  HybridAnalyzer,
+  Resolver,
+  ResolverGuard
+}
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.plans.logical.{
+  LocalRelation,
+  LogicalPlan,
+  Project,
+  SubqueryAlias
+}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, MetadataBuilder}
+
+class HybridAnalyzerSuite extends QueryTest with SharedSparkSession {
+  private val col1Integer = AttributeReference("col1", IntegerType)()
+  private val col2Integer = AttributeReference("col2", IntegerType)()
+  private val col2IntegerWithMetadata = AttributeReference(
+    "col2",
+    IntegerType,
+    metadata = (new MetadataBuilder).putString("comment", "this is an integer").build()
+  )()
+
+  private def validateSinglePassResolverBridgeState(bridgeRelations: Boolean): Unit = {
+    assert(bridgeRelations == AnalysisContext.get.getSinglePassResolverBridgeState.isDefined)
+  }
+
+  private class BrokenResolver(ex: Throwable, bridgeRelations: Boolean)
+      extends Resolver(spark.sessionState.catalogManager) {
+    override def lookupMetadataAndResolve(
+        plan: LogicalPlan,
+        analyzerBridgeState: Option[AnalyzerBridgeState] = None): LogicalPlan = {
+      validateSinglePassResolverBridgeState(bridgeRelations)
+      throw ex
+    }
+  }
+
+  private class ValidatingResolver(bridgeRelations: Boolean)
+      extends Resolver(spark.sessionState.catalogManager) {
+    override def lookupMetadataAndResolve(
+        plan: LogicalPlan,
+        analyzerBridgeState: Option[AnalyzerBridgeState] = None): LogicalPlan = {
+      validateSinglePassResolverBridgeState(bridgeRelations)
+      super.lookupMetadataAndResolve(plan, analyzerBridgeState)
+    }
+  }
+
+  private class HardCodedResolver(resolvedPlan: LogicalPlan, bridgeRelations: Boolean)
+      extends Resolver(spark.sessionState.catalogManager) {
+    override def lookupMetadataAndResolve(
+        plan: LogicalPlan,
+        analyzerBridgeState: Option[AnalyzerBridgeState] = None): LogicalPlan = {
+      validateSinglePassResolverBridgeState(bridgeRelations)
+      resolvedPlan
+    }
+  }
+
+  private class ValidatingAnalyzer(bridgeRelations: Boolean)
+      extends Analyzer(spark.sessionState.catalogManager) {
+    override def executeAndTrack(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
+      validateSinglePassResolverBridgeState(bridgeRelations)
+      super.executeAndTrack(plan, tracker)
+    }
+  }
+
+  private class BrokenAnalyzer(ex: Throwable, bridgeRelations: Boolean)
+      extends Analyzer(spark.sessionState.catalogManager) {
+    override def executeAndTrack(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
+      validateSinglePassResolverBridgeState(bridgeRelations)
+      throw ex
+    }
+  }
+
+  private class CustomAnalyzer(customCode: () => Unit, bridgeRelations: Boolean)
+      extends Analyzer(spark.sessionState.catalogManager) {
+    override def executeAndTrack(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
+      validateSinglePassResolverBridgeState(bridgeRelations)
+      customCode()
+      super.executeAndTrack(plan, tracker)
+    }
+  }
+
+  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)(
+      implicit pos: Position): Unit = {
+    super.test(testName) {
+      withSQLConf(
+        SQLConf.ANALYZER_DUAL_RUN_LEGACY_AND_SINGLE_PASS_RESOLVER.key -> "true"
+      ) {
+        testFun
+      }
+    }
+  }
+
+  test("Both fixed-point and single-pass analyzers pass") {
+    val plan: LogicalPlan = {
+      Project(
+        Seq(UnresolvedStar(None)),
+        LocalRelation(col1Integer)
+      )
+    }
+    val resolvedPlan =
+      Project(
+        Seq(col1Integer),
+        LocalRelation(Seq(col1Integer))
+      )
+    assert(
+      new HybridAnalyzer(
+        new ValidatingAnalyzer(bridgeRelations = true),
+        new ResolverGuard(spark.sessionState.catalogManager),
+        new ValidatingResolver(bridgeRelations = true)
+      ).apply(plan, null)
+      ==
+      resolvedPlan
+    )
+  }
+
+  test("Fixed-point analyzer passes, single-pass analyzer fails") {
+    val plan: LogicalPlan =
+      Project(Seq(UnresolvedStar(None)), LocalRelation(col1Integer))
+    checkError(
+      exception = intercept[AnalysisException](
+        new HybridAnalyzer(
+          new ValidatingAnalyzer(bridgeRelations = true),
+          new ResolverGuard(spark.sessionState.catalogManager),
+          new BrokenResolver(
+            QueryCompilationErrors.unsupportedSinglePassAnalyzerFeature("test"),
+            bridgeRelations = true
+          )
+        ).apply(plan, null)
+      ),
+      condition = "UNSUPPORTED_SINGLE_PASS_ANALYZER_FEATURE",
+      parameters = Map("feature" -> "test")
+    )
+  }
+
+  test("Fixed-point analyzer fails, single-pass analyzer passes") {
+    val plan: LogicalPlan =
+      Project(
+        Seq(UnresolvedAttribute("nonexistent_col")),
+        LocalRelation(col1Integer)
+      )
+    val resolvedPlan =
+      Project(
+        Seq(col1Integer),
+        LocalRelation(Seq(col1Integer))
+      )
+    checkError(
+      exception = intercept[AnalysisException](
+        new HybridAnalyzer(
+          new ValidatingAnalyzer(bridgeRelations = true),
+          new ResolverGuard(spark.sessionState.catalogManager),
+          new HardCodedResolver(resolvedPlan, bridgeRelations = true)
+        ).apply(plan, null)
+      ),
+      condition = "HYBRID_ANALYZER_EXCEPTION.FIXED_POINT_FAILED_SINGLE_PASS_SUCCEEDED",
+      parameters = Map("singlePassOutput" -> resolvedPlan.toString)
+    )
+  }
+
+  test("Both fixed-point and single-pass analyzers fail") {
+    val plan: LogicalPlan =
+      Project(
+        Seq(UnresolvedAttribute("nonexistent_col")),
+        LocalRelation(col1Integer)
+      )
+    checkError(
+      exception = intercept[ExtendedAnalysisException](
+        new HybridAnalyzer(
+          new ValidatingAnalyzer(bridgeRelations = true),
+          new ResolverGuard(spark.sessionState.catalogManager),
+          new ValidatingResolver(bridgeRelations = true)
+        ).apply(plan, null)
+      ),
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map(
+        "objectName" -> "`nonexistent_col`",
+        "proposal" -> "`col1`"
+      )
+    )
+  }
+
+  test("Plan mismatch") {
+    val plan: LogicalPlan =
+      Project(
+        Seq(UnresolvedAttribute("col1")),
+        SubqueryAlias(
+          AliasIdentifier("t", Seq.empty),
+          LocalRelation(Seq(col1Integer))
+        )
+      )
+    val resolvedPlan =
+      Project(
+        Seq(col1Integer),
+        LocalRelation(Seq(col1Integer))
+      )
+    val expectedResolvedPlan =
+      Project(
+        Seq(col1Integer),
+        SubqueryAlias(
+          AliasIdentifier("t", Seq.empty),
+          LocalRelation(Seq(col1Integer))
+        )
+      )
+    checkError(
+      exception = intercept[AnalysisException](
+        new HybridAnalyzer(
+          new ValidatingAnalyzer(bridgeRelations = true),
+          new ResolverGuard(spark.sessionState.catalogManager),
+          new HardCodedResolver(resolvedPlan, bridgeRelations = true)
+        ).apply(plan, null)
+      ),
+      condition = "HYBRID_ANALYZER_EXCEPTION.LOGICAL_PLAN_COMPARISON_MISMATCH",
+      parameters = Map(
+        "singlePassOutput" -> resolvedPlan.toString,
+        "fixedPointOutput" -> expectedResolvedPlan.toString
+      )
+    )
+  }
+
+  test("Missing metadata in output schema") {
+    val plan: LogicalPlan =
+      Project(
+        Seq(UnresolvedAttribute("col2")),
+        LocalRelation(col2IntegerWithMetadata)
+      )
+    val resolvedPlan =
+      Project(
+        Seq(col2Integer),
+        LocalRelation(Seq(col2Integer))
+      )
+    checkError(
+      exception = intercept[AnalysisException](
+        new HybridAnalyzer(
+          new ValidatingAnalyzer(bridgeRelations = true),
+          new ResolverGuard(spark.sessionState.catalogManager),
+          new HardCodedResolver(resolvedPlan, bridgeRelations = true)
+        ).apply(plan, null)
+      ),
+      condition = "HYBRID_ANALYZER_EXCEPTION.OUTPUT_SCHEMA_COMPARISON_MISMATCH",
+      parameters = Map(
+        "singlePassOutputSchema" -> "(col2,IntegerType,true,{})",
+        "fixedPointOutputSchema" -> "(col2,IntegerType,true,{\"comment\":\"this is an integer\"})"
+      )
+    )
+  }
+
+  test("Explicitly unsupported resolver feature") {
+    val plan: LogicalPlan = {
+      Project(
+        Seq(UnresolvedStar(None)),
+        LocalRelation(col1Integer)
+      )
+    }
+    checkAnswer(
+      new HybridAnalyzer(
+        new ValidatingAnalyzer(bridgeRelations = true),
+        new ResolverGuard(spark.sessionState.catalogManager),
+        new BrokenResolver(
+          new ExplicitlyUnsupportedResolverFeature("FAILURE"),
+          bridgeRelations = true
+        )
+      ).apply(plan, null),
+      plan
+    )
+  }
+
+  test("Fixed-point only run") {
+    val plan = Project(
+      Seq(UnresolvedStar(None)),
+      LocalRelation(col1Integer)
+    )
+    val resolvedPlan = Project(
+      Seq(col1Integer),
+      LocalRelation(Seq(col1Integer))
+    )
+    assert(
+      withSQLConf(
+        SQLConf.ANALYZER_DUAL_RUN_LEGACY_AND_SINGLE_PASS_RESOLVER.key -> "false"
+      ) {
+        new HybridAnalyzer(
+          new ValidatingAnalyzer(bridgeRelations = false),
+          new ResolverGuard(spark.sessionState.catalogManager),
+          new BrokenResolver(
+            new Exception("Single-pass resolver should not be invoked"),
+            bridgeRelations = false
+          )
+        ).apply(plan, null)
+      }
+      ==
+      resolvedPlan
+    )
+  }
+
+  test("Single-pass only run") {
+    val plan = Project(
+      Seq(UnresolvedStar(None)),
+      LocalRelation(col1Integer)
+    )
+    val resolvedPlan = Project(
+      Seq(col1Integer),
+      LocalRelation(Seq(col1Integer))
+    )
+    assert(
+      withSQLConf(
+        SQLConf.ANALYZER_DUAL_RUN_LEGACY_AND_SINGLE_PASS_RESOLVER.key -> "false",
+        SQLConf.ANALYZER_SINGLE_PASS_RESOLVER_ENABLED.key -> "true"
+      ) {
+        new HybridAnalyzer(
+          new BrokenAnalyzer(
+            new Exception("Fixed-point analyzer should not be invoked"),
+            bridgeRelations = false
+          ),
+          new ResolverGuard(spark.sessionState.catalogManager),
+          new ValidatingResolver(bridgeRelations = false)
+        ).apply(plan, null)
+      }
+      ==
+      resolvedPlan
+    )
+  }
+
+  test("Nested invocations") {
+    val plan = Project(
+      Seq(UnresolvedStar(None)),
+      LocalRelation(col1Integer)
+    )
+    val resolvedPlan = Project(
+      Seq(col1Integer),
+      LocalRelation(Seq(col1Integer))
+    )
+
+    val nestedAnalysis = () => {
+      assert(
+        withSQLConf(
+          SQLConf.ANALYZER_DUAL_RUN_LEGACY_AND_SINGLE_PASS_RESOLVER.key -> "false",
+          SQLConf.ANALYZER_SINGLE_PASS_RESOLVER_ENABLED.key -> "true"
+        ) {
+          new HybridAnalyzer(
+            new BrokenAnalyzer(
+              new Exception("Fixed-point analyzer should not be invoked"),
+              bridgeRelations = false
+            ),
+            new ResolverGuard(spark.sessionState.catalogManager),
+            new ValidatingResolver(bridgeRelations = false)
+          ).apply(plan, null)
+        }
+        ==
+        resolvedPlan
+      )
+    }
+
+    assert(
+      new HybridAnalyzer(
+        new CustomAnalyzer(
+          customCode = () => { nestedAnalysis() },
+          bridgeRelations = true
+        ),
+        new ResolverGuard(spark.sessionState.catalogManager),
+        new ValidatingResolver(bridgeRelations = true)
+      ).apply(plan, null)
+      ==
+      resolvedPlan
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/MetadataResolverSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/MetadataResolverSuite.scala
new file mode 100644
index 0000000000000..5fd21d7543b33
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/MetadataResolverSuite.scala
@@ -0,0 +1,277 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.analysis.resolver
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.analysis.resolver.{
+  AnalyzerBridgeState,
+  BridgedRelationMetadataProvider,
+  MetadataResolver,
+  RelationId,
+  Resolver
+}
+import org.apache.spark.sql.catalyst.catalog.UnresolvedCatalogRelation
+import org.apache.spark.sql.catalyst.expressions.{Expression, PlanExpression}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
+import org.apache.spark.sql.execution.datasources.{FileResolver, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class MetadataResolverSuite extends QueryTest with SharedSparkSession with SQLTestUtils {
+  private val keyValueTableSchema = StructType(
+    Seq(
+      StructField("key", IntegerType, true),
+      StructField("value", StringType, true)
+    )
+  )
+  private val fileTableSchema = StructType(
+    Seq(
+      StructField("id", LongType, true)
+    )
+  )
+
+  test("Single CSV relation") {
+    withTable("src_csv") {
+      spark.sql("CREATE TABLE src_csv (key INT, value STRING) USING CSV;").collect()
+
+      checkResolveUnresolvedCatalogRelation(
+        sqlText = "SELECT * FROM src_csv",
+        expectedTableData = Seq(createTableData("src_csv"))
+      )
+    }
+  }
+
+  test("Single ORC relation") {
+    withTable("src_orc") {
+      spark.sql("CREATE TABLE src_orc (key INT, value STRING) USING ORC;").collect()
+
+      checkResolveUnresolvedCatalogRelation(
+        sqlText = "SELECT * FROM src_orc",
+        expectedTableData = Seq(createTableData("src_orc"))
+      )
+    }
+  }
+
+  test("Relation inside an EXISTS subquery") {
+    withTable("src") {
+      spark.sql("CREATE TABLE src (key INT, value STRING) USING PARQUET;").collect()
+
+      checkResolveUnresolvedCatalogRelation(
+        sqlText = "SELECT * FROM VALUES (1) WHERE EXISTS (SELECT col1 FROM src)",
+        expectedTableData = Seq(createTableData("src"))
+      )
+    }
+  }
+
+  test("Relation inside an IN subquery") {
+    withTable("src") {
+      spark.sql("CREATE TABLE src (key INT, value STRING) USING PARQUET;").collect()
+
+      checkResolveUnresolvedCatalogRelation(
+        sqlText = "SELECT * FROM VALUES (1) WHERE col1 IN (SELECT col1 FROM src)",
+        expectedTableData = Seq(createTableData("src"))
+      )
+    }
+  }
+
+  test("Relation inside a nested subquery expression") {
+    withTable("src") {
+      spark.sql("CREATE TABLE src (key INT, value STRING) USING PARQUET;").collect()
+
+      checkResolveUnresolvedCatalogRelation(
+        sqlText = """
+          SELECT
+            col1 + (
+              SELECT 35 * (
+                SELECT key FROM src LIMIT 1
+              ) * col1 FROM VALUES (2)
+            )
+          FROM
+            VALUES (1)
+          """,
+        expectedTableData = Seq(createTableData("src"))
+      )
+    }
+  }
+
+  test("Relation from a file") {
+    val df = spark.range(100).toDF()
+    withTempPath(f => {
+      df.write.json(f.getCanonicalPath)
+      checkResolveLogicalRelation(
+        sqlText = s"select id from json.`${f.getCanonicalPath}`",
+        expectedTableData = Seq(
+          RelationId(
+            multipartIdentifier = Seq("spark_catalog", "json", s"${f.getCanonicalPath}")
+          ) -> TestTableData(
+            name = s"file:${f.getCanonicalPath}",
+            schema = fileTableSchema
+          )
+        )
+      )
+    })
+  }
+
+  test("Relation bridged from legacy Analyzer") {
+    withTable("src") {
+      spark.sql("CREATE TABLE src (key INT, value STRING) USING PARQUET;").collect()
+
+      val analyzerBridgeState = new AnalyzerBridgeState
+      analyzerBridgeState.relationsWithResolvedMetadata.put(
+        UnresolvedRelation(Seq("src")),
+        createUnresolvedCatalogRelation("src")
+      )
+
+      checkResolveUnresolvedCatalogRelation(
+        sqlText = "SELECT * FROM src",
+        expectedTableData = Seq(createTableData("src")),
+        analyzerBridgeState = Some(analyzerBridgeState)
+      )
+    }
+  }
+
+  test("Relation not bridged from legacy Analyzer") {
+    withTable("src") {
+      spark.sql("CREATE TABLE src (key INT, value STRING) USING PARQUET;").collect()
+
+      checkResolveUnresolvedCatalogRelation(
+        sqlText = "SELECT * FROM src",
+        expectedTableData = Seq.empty,
+        analyzerBridgeState = Some(new AnalyzerBridgeState)
+      )
+    }
+  }
+
+  private def checkResolveUnresolvedCatalogRelation(
+      sqlText: String,
+      expectedTableData: Seq[(RelationId, TestTableData)],
+      analyzerBridgeState: Option[AnalyzerBridgeState] = None): Unit = {
+    checkResolve(
+      sqlText,
+      expectedTableData,
+      relation =>
+        relation.asInstanceOf[UnresolvedCatalogRelation].tableMeta.identifier.unquotedString,
+      relation => relation.asInstanceOf[UnresolvedCatalogRelation].tableMeta.schema,
+      analyzerBridgeState
+    )
+  }
+
+  private def checkResolveLogicalRelation(
+      sqlText: String,
+      expectedTableData: Seq[(RelationId, TestTableData)],
+      analyzerBridgeState: Option[AnalyzerBridgeState] = None): Unit = {
+    checkResolve(
+      sqlText,
+      expectedTableData,
+      relation =>
+        relation
+          .asInstanceOf[LogicalRelation]
+          .relation
+          .asInstanceOf[HadoopFsRelation]
+          .location
+          .rootPaths
+          .mkString(","),
+      relation => relation.asInstanceOf[LogicalRelation].relation.schema,
+      analyzerBridgeState
+    )
+  }
+
+  private def checkResolve(
+      sqlText: String,
+      expectedTableData: Seq[(RelationId, TestTableData)],
+      getTableName: LogicalPlan => String,
+      getTableSchema: LogicalPlan => StructType,
+      analyzerBridgeState: Option[AnalyzerBridgeState]): Unit = {
+    val unresolvedPlan = spark.sql(sqlText).queryExecution.logical
+
+    val metadataResolver = analyzerBridgeState match {
+      case Some(analyzerBridgeState) =>
+        new BridgedRelationMetadataProvider(
+          spark.sessionState.catalogManager,
+          Resolver.createRelationResolution(spark.sessionState.catalogManager),
+          analyzerBridgeState
+        )
+      case None =>
+        val metadataResolver = new MetadataResolver(
+          spark.sessionState.catalogManager,
+          Resolver.createRelationResolution(spark.sessionState.catalogManager),
+          Seq(new FileResolver(spark))
+        )
+        metadataResolver.resolve(unresolvedPlan)
+        metadataResolver
+    }
+
+    val actualTableData = new mutable.HashMap[RelationId, TestTableData]
+
+    def findUnresolvedRelations(unresolvedPlan: LogicalPlan): Unit = unresolvedPlan.foreach {
+      case unresolvedRelation: UnresolvedRelation =>
+        metadataResolver.getRelationWithResolvedMetadata(unresolvedRelation) match {
+          case Some(plan) =>
+            val relationId = metadataResolver.relationIdFromUnresolvedRelation(unresolvedRelation)
+            val relation = plan match {
+              case SubqueryAlias(_, relation) => relation
+              case relation => relation
+            }
+
+            actualTableData(relationId) =
+              TestTableData(getTableName(relation), getTableSchema(relation))
+          case None =>
+        }
+      case unresolvedPlan =>
+        def traverseExpressions(expression: Expression): Unit = expression match {
+          case planExpression: PlanExpression[_] =>
+            planExpression.plan match {
+              case plan: LogicalPlan =>
+                findUnresolvedRelations(plan)
+              case _ =>
+            }
+          case expression =>
+            expression.children.foreach(traverseExpressions)
+        }
+
+        unresolvedPlan.expressions.foreach(traverseExpressions)
+    }
+
+    findUnresolvedRelations(unresolvedPlan)
+
+    assert(actualTableData == mutable.HashMap(expectedTableData: _*))
+  }
+
+  private def createTableData(name: String) =
+    RelationId(
+      multipartIdentifier = Seq("spark_catalog", "default", name)
+    ) -> TestTableData(
+      name = s"spark_catalog.default.$name",
+      schema = keyValueTableSchema
+    )
+
+  private def createUnresolvedCatalogRelation(name: String) = SubqueryAlias(
+    AliasIdentifier(name),
+    UnresolvedCatalogRelation(
+      spark.sessionState.catalog.getTableMetadata(TableIdentifier(name)),
+      CaseInsensitiveStringMap.empty
+    )
+  )
+
+  private case class TestTableData(name: String, schema: StructType)
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/NameScopeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/NameScopeSuite.scala
new file mode 100644
index 0000000000000..ec744af89f000
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/NameScopeSuite.scala
@@ -0,0 +1,659 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.analysis.resolver
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.analysis.UnresolvedStar
+import org.apache.spark.sql.catalyst.analysis.resolver.{NameScope, NameScopeStack, NameTarget}
+import org.apache.spark.sql.catalyst.expressions.{
+  AttributeReference,
+  GetArrayItem,
+  GetArrayStructFields,
+  GetMapValue,
+  GetStructField,
+  Literal
+}
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.types.{
+  ArrayType,
+  BooleanType,
+  IntegerType,
+  MapType,
+  StringType,
+  StructField,
+  StructType
+}
+
+class NameScopeSuite extends PlanTest with SQLConfHelper {
+  private val col1Integer = AttributeReference(name = "col1", dataType = IntegerType)()
+  private val col1IntegerOther = AttributeReference(name = "col1", dataType = IntegerType)()
+  private val col2Integer = AttributeReference(name = "col2", dataType = IntegerType)()
+  private val col3Boolean = AttributeReference(name = "col3", dataType = BooleanType)()
+  private val col4String = AttributeReference(name = "col4", dataType = StringType)()
+  private val col5String = AttributeReference(name = "col5", dataType = StringType)()
+  private val col6IntegerWithQualifier = AttributeReference(
+    name = "col6",
+    dataType = IntegerType
+  )(qualifier = Seq("catalog", "database", "table"))
+  private val col6IntegerOtherWithQualifier = AttributeReference(
+    name = "col6",
+    dataType = IntegerType
+  )(qualifier = Seq("catalog", "database", "table"))
+  private val col7StringWithQualifier = AttributeReference(
+    name = "col7",
+    dataType = IntegerType
+  )(qualifier = Seq("catalog", "database", "table"))
+  private val col8Struct = AttributeReference(
+    name = "col8",
+    dataType = StructType(Seq(StructField("field", IntegerType, true)))
+  )()
+  private val col9NestedStruct = AttributeReference(
+    name = "col9",
+    dataType = StructType(
+      Seq(
+        StructField(
+          "field",
+          StructType(
+            Seq(
+              StructField("subfield", IntegerType)
+            )
+          )
+        )
+      )
+    )
+  )()
+  private val col10Map = AttributeReference(
+    name = "col10",
+    dataType = MapType(StringType, IntegerType)
+  )()
+  private val col11MapWithStruct = AttributeReference(
+    name = "col11",
+    dataType = MapType(
+      StringType,
+      StructType(Seq(StructField("field", StringType)))
+    )
+  )()
+  private val col12Array = AttributeReference(
+    name = "col12",
+    dataType = ArrayType(IntegerType)
+  )()
+  private val col13ArrayWithStruct = AttributeReference(
+    name = "col13",
+    dataType = ArrayType(
+      StructType(Seq(StructField("field", StringType)))
+    )
+  )()
+
+  test("Empty scope") {
+    val nameScope = new NameScope
+
+    assert(nameScope.getAllAttributes.isEmpty)
+
+    assert(nameScope.matchMultipartName(Seq("col1")) == NameTarget(candidates = Seq.empty))
+  }
+
+  test("Single unnamed plan") {
+    val nameScope = new NameScope
+
+    nameScope += Seq(col1Integer, col2Integer, col3Boolean)
+
+    assert(nameScope.getAllAttributes == Seq(col1Integer, col2Integer, col3Boolean))
+
+    assert(
+      nameScope.matchMultipartName(Seq("col1")) == NameTarget(
+        candidates = Seq(col1Integer),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col2")) == NameTarget(
+        candidates = Seq(col2Integer),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col3")) == NameTarget(
+        candidates = Seq(col3Boolean),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col4")) == NameTarget(
+        candidates = Seq.empty,
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean)
+      )
+    )
+  }
+
+  test("Several unnamed plans") {
+    val nameScope = new NameScope
+
+    nameScope += Seq(col1Integer)
+    nameScope += Seq(col2Integer, col3Boolean)
+    nameScope += Seq(col4String)
+
+    assert(nameScope.getAllAttributes == Seq(col1Integer, col2Integer, col3Boolean, col4String))
+
+    assert(
+      nameScope.matchMultipartName(Seq("col1")) == NameTarget(
+        candidates = Seq(col1Integer),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col2")) == NameTarget(
+        candidates = Seq(col2Integer),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col3")) == NameTarget(
+        candidates = Seq(col3Boolean),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col4")) == NameTarget(
+        candidates = Seq(col4String),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col5")) == NameTarget(
+        candidates = Seq.empty,
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String)
+      )
+    )
+  }
+
+  test("Single named plan") {
+    val nameScope = new NameScope
+
+    nameScope("table1") = Seq(col1Integer, col2Integer, col3Boolean)
+
+    assert(nameScope.getAllAttributes == Seq(col1Integer, col2Integer, col3Boolean))
+
+    assert(
+      nameScope.matchMultipartName(Seq("col1")) == NameTarget(
+        candidates = Seq(col1Integer),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col2")) == NameTarget(
+        candidates = Seq(col2Integer),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col3")) == NameTarget(
+        candidates = Seq(col3Boolean),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col4")) == NameTarget(
+        candidates = Seq.empty,
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean)
+      )
+    )
+  }
+
+  test("Several named plans") {
+    val nameScope = new NameScope
+
+    nameScope("table1") = Seq(col1Integer)
+    nameScope("table2") = Seq(col2Integer, col3Boolean)
+    nameScope("table2") = Seq(col4String)
+    nameScope("table3") = Seq(col5String)
+
+    assert(
+      nameScope.getAllAttributes == Seq(
+        col1Integer,
+        col2Integer,
+        col3Boolean,
+        col4String,
+        col5String
+      )
+    )
+
+    assert(
+      nameScope.matchMultipartName(Seq("col1")) == NameTarget(
+        candidates = Seq(col1Integer),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String, col5String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col2")) == NameTarget(
+        candidates = Seq(col2Integer),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String, col5String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col3")) == NameTarget(
+        candidates = Seq(col3Boolean),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String, col5String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col4")) == NameTarget(
+        candidates = Seq(col4String),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String, col5String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col5")) == NameTarget(
+        candidates = Seq(col5String),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String, col5String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col6")) == NameTarget(
+        candidates = Seq.empty,
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String, col5String)
+      )
+    )
+  }
+
+  test("Named and unnamed plans with case insensitive comparison") {
+    val col1Integer = AttributeReference(name = "Col1", dataType = IntegerType)()
+    val col2Integer = AttributeReference(name = "col2", dataType = IntegerType)()
+    val col3Boolean = AttributeReference(name = "coL3", dataType = BooleanType)()
+    val col4String = AttributeReference(name = "Col4", dataType = StringType)()
+
+    val nameScope = new NameScope
+
+    nameScope("TaBle1") = Seq(col1Integer)
+    nameScope("table2") = Seq(col2Integer, col3Boolean)
+    nameScope += Seq(col4String)
+
+    assert(nameScope.getAllAttributes == Seq(col1Integer, col2Integer, col3Boolean, col4String))
+
+    assert(
+      nameScope.matchMultipartName(Seq("cOL1")) == NameTarget(
+        candidates = Seq(col1Integer.withName("cOL1")),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("CoL2")) == NameTarget(
+        candidates = Seq(col2Integer.withName("CoL2")),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col3")) == NameTarget(
+        candidates = Seq(col3Boolean.withName("col3")),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("COL4")) == NameTarget(
+        candidates = Seq(col4String.withName("COL4")),
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String)
+      )
+    )
+    assert(
+      nameScope.matchMultipartName(Seq("col5")) == NameTarget(
+        candidates = Seq.empty,
+        allAttributes = Seq(col1Integer, col2Integer, col3Boolean, col4String)
+      )
+    )
+  }
+
+  test("Duplicate attribute names from one plan") {
+    val nameScope = new NameScope
+
+    nameScope("table1") = Seq(col1Integer, col1Integer)
+    nameScope("table1") = Seq(col1IntegerOther)
+
+    assert(nameScope.getAllAttributes == Seq(col1Integer, col1Integer, col1IntegerOther))
+
+    nameScope.matchMultipartName(Seq("col1")) == NameTarget(
+      candidates = Seq(col1Integer, col1IntegerOther)
+    )
+  }
+
+  test("Duplicate attribute names from several plans") {
+    val nameScope = new NameScope
+
+    nameScope("table1") = Seq(col1Integer, col1IntegerOther)
+    nameScope("table2") = Seq(col1Integer, col1IntegerOther)
+
+    assert(
+      nameScope.getAllAttributes == Seq(
+        col1Integer,
+        col1IntegerOther,
+        col1Integer,
+        col1IntegerOther
+      )
+    )
+
+    nameScope.matchMultipartName(Seq("col1")) == NameTarget(
+      candidates = Seq(
+        col1Integer,
+        col1IntegerOther,
+        col1Integer,
+        col1IntegerOther
+      )
+    )
+  }
+
+  test("Expand star") {
+    val nameScope = new NameScope
+
+    nameScope("table") =
+      Seq(col6IntegerWithQualifier, col6IntegerOtherWithQualifier, col7StringWithQualifier)
+
+    Seq(Seq("table"), Seq("database", "table"), Seq("catalog", "database", "table"))
+      .foreach(tableQualifier => {
+        assert(
+          nameScope.expandStar(UnresolvedStar(Some(tableQualifier)))
+          == Seq(col6IntegerWithQualifier, col6IntegerOtherWithQualifier, col7StringWithQualifier)
+        )
+      })
+
+    checkError(
+      exception = intercept[AnalysisException](
+        nameScope.expandStar(UnresolvedStar(Some(Seq("database", "table_fail"))))
+      ),
+      condition = "CANNOT_RESOLVE_STAR_EXPAND",
+      parameters = Map(
+        "targetString" -> "`database`.`table_fail`",
+        "columns" -> "`col6`, `col6`, `col7`"
+      )
+    )
+
+    nameScope("table2") = Seq(col6IntegerWithQualifier)
+
+    checkError(
+      exception = intercept[AnalysisException](
+        nameScope.expandStar(UnresolvedStar(Some(Seq("table2"))))
+      ),
+      condition = "INVALID_USAGE_OF_STAR_OR_REGEX",
+      parameters = Map(
+        "elem" -> "'*'",
+        "prettyName" -> "query"
+      )
+    )
+  }
+
+  test("Multipart attribute names") {
+    val nameScope = new NameScope
+
+    nameScope("table") = Seq(col6IntegerWithQualifier)
+
+    for (multipartIdentifier <- Seq(
+        Seq("catalog", "database", "table", "col6"),
+        Seq("database", "table", "col6"),
+        Seq("table", "col6")
+      )) {
+      assert(
+        nameScope.matchMultipartName(multipartIdentifier) == NameTarget(
+          candidates = Seq(
+            col6IntegerWithQualifier
+          ),
+          allAttributes = Seq(col6IntegerWithQualifier)
+        )
+      )
+    }
+
+    for (multipartIdentifier <- Seq(
+        Seq("catalog.database.table", "col6"),
+        Seq("`database`.`table`.`col6`"),
+        Seq("table.col6")
+      )) {
+      assert(
+        nameScope.matchMultipartName(multipartIdentifier) == NameTarget(
+          candidates = Seq.empty,
+          allAttributes = Seq(col6IntegerWithQualifier)
+        )
+      )
+    }
+  }
+
+  test("Nested fields") {
+    val nameScope = new NameScope
+
+    nameScope("table") = Seq(
+      col8Struct,
+      col9NestedStruct,
+      col10Map,
+      col11MapWithStruct,
+      col12Array,
+      col13ArrayWithStruct
+    )
+
+    var matchedStructs = nameScope.matchMultipartName(Seq("col8", "field"))
+    assert(
+      matchedStructs == NameTarget(
+        candidates = Seq(
+          GetStructField(col8Struct, 0, Some("field"))
+        ),
+        aliasName = Some("field"),
+        allAttributes = Seq(
+          col8Struct,
+          col9NestedStruct,
+          col10Map,
+          col11MapWithStruct,
+          col12Array,
+          col13ArrayWithStruct
+        )
+      )
+    )
+
+    matchedStructs = nameScope.matchMultipartName(Seq("col9", "field", "subfield"))
+    assert(
+      matchedStructs == NameTarget(
+        candidates = Seq(
+          GetStructField(
+            GetStructField(
+              col9NestedStruct,
+              0,
+              Some("field")
+            ),
+            0,
+            Some("subfield")
+          )
+        ),
+        aliasName = Some("subfield"),
+        allAttributes = Seq(
+          col8Struct,
+          col9NestedStruct,
+          col10Map,
+          col11MapWithStruct,
+          col12Array,
+          col13ArrayWithStruct
+        )
+      )
+    )
+
+    var matchedMaps = nameScope.matchMultipartName(Seq("col10", "key"))
+    assert(
+      matchedMaps == NameTarget(
+        candidates = Seq(GetMapValue(col10Map, Literal("key"))),
+        aliasName = Some("key"),
+        allAttributes = Seq(
+          col8Struct,
+          col9NestedStruct,
+          col10Map,
+          col11MapWithStruct,
+          col12Array,
+          col13ArrayWithStruct
+        )
+      )
+    )
+
+    matchedMaps = nameScope.matchMultipartName(Seq("col11", "key"))
+    assert(
+      matchedMaps == NameTarget(
+        candidates = Seq(GetMapValue(col11MapWithStruct, Literal("key"))),
+        aliasName = Some("key"),
+        allAttributes = Seq(
+          col8Struct,
+          col9NestedStruct,
+          col10Map,
+          col11MapWithStruct,
+          col12Array,
+          col13ArrayWithStruct
+        )
+      )
+    )
+
+    var matchedArrays = nameScope.matchMultipartName(Seq("col12", "element"))
+    assert(
+      matchedArrays == NameTarget(
+        candidates = Seq(GetArrayItem(col12Array, Literal("element"))),
+        aliasName = Some("element"),
+        allAttributes = Seq(
+          col8Struct,
+          col9NestedStruct,
+          col10Map,
+          col11MapWithStruct,
+          col12Array,
+          col13ArrayWithStruct
+        )
+      )
+    )
+
+    matchedArrays = nameScope.matchMultipartName(Seq("col13", "field"))
+    assert(
+      matchedArrays == NameTarget(
+        candidates = Seq(
+          GetArrayStructFields(
+            col13ArrayWithStruct,
+            StructField("field", StringType, true),
+            0,
+            1,
+            true
+          )
+        ),
+        aliasName = Some("field"),
+        allAttributes = Seq(
+          col8Struct,
+          col9NestedStruct,
+          col10Map,
+          col11MapWithStruct,
+          col12Array,
+          col13ArrayWithStruct
+        )
+      )
+    )
+
+    nameScope("table2") = Seq(col8Struct)
+    matchedStructs = nameScope.matchMultipartName(Seq("col8", "field"))
+    assert(
+      matchedStructs == NameTarget(
+        candidates = Seq(
+          GetStructField(
+            col8Struct,
+            0,
+            Some("field")
+          ),
+          GetStructField(
+            col8Struct,
+            0,
+            Some("field")
+          )
+        ),
+        aliasName = Some("field"),
+        allAttributes = Seq(
+          col8Struct,
+          col9NestedStruct,
+          col10Map,
+          col11MapWithStruct,
+          col12Array,
+          col13ArrayWithStruct,
+          col8Struct
+        )
+      )
+    )
+  }
+}
+
+class NameScopeStackSuite extends PlanTest {
+  private val col1Integer = AttributeReference(name = "col1", dataType = IntegerType)()
+  private val col2String = AttributeReference(name = "col2", dataType = StringType)()
+  private val col3Integer = AttributeReference(name = "col3", dataType = IntegerType)()
+  private val col4String = AttributeReference(name = "col4", dataType = StringType)()
+
+  test("Empty stack") {
+    val stack = new NameScopeStack
+
+    assert(stack.top.getAllAttributes.isEmpty)
+  }
+
+  test("Overwrite top of the stack containing single scope") {
+    val stack = new NameScopeStack
+
+    stack.top.update("table1", Seq(col1Integer, col2String))
+    assert(stack.top.getAllAttributes == Seq(col1Integer, col2String))
+
+    stack.overwriteTop("table2", Seq(col3Integer, col4String))
+    assert(stack.top.getAllAttributes == Seq(col3Integer, col4String))
+
+    stack.overwriteTop(Seq(col2String))
+    assert(stack.top.getAllAttributes == Seq(col2String))
+  }
+
+  test("Overwrite top of the stack containing several scopes") {
+    val stack = new NameScopeStack
+
+    stack.top.update("table2", Seq(col3Integer))
+
+    stack.withNewScope {
+      assert(stack.top.getAllAttributes.isEmpty)
+
+      stack.top.update("table1", Seq(col1Integer, col2String))
+      assert(stack.top.getAllAttributes == Seq(col1Integer, col2String))
+
+      stack.overwriteTop("table2", Seq(col3Integer, col4String))
+      assert(stack.top.getAllAttributes == Seq(col3Integer, col4String))
+
+      stack.overwriteTop(Seq(col2String))
+      assert(stack.top.getAllAttributes == Seq(col2String))
+    }
+  }
+
+  test("Scope stacking") {
+    val stack = new NameScopeStack
+
+    stack.top.update("table1", Seq(col1Integer))
+
+    stack.withNewScope {
+      stack.top.update("table2", Seq(col2String))
+
+      stack.withNewScope {
+        stack.top.update("table3", Seq(col3Integer))
+
+        stack.withNewScope {
+          stack.top.update("table4", Seq(col4String))
+
+          assert(stack.top.getAllAttributes == Seq(col4String))
+        }
+
+        assert(stack.top.getAllAttributes == Seq(col3Integer))
+      }
+
+      assert(stack.top.getAllAttributes == Seq(col2String))
+    }
+
+    assert(stack.top.getAllAttributes == Seq(col1Integer))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/ResolverGuardSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/ResolverGuardSuite.scala
new file mode 100644
index 0000000000000..d512adbb0af37
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/ResolverGuardSuite.scala
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.analysis.resolver
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.analysis.resolver.ResolverGuard
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ResolverGuardSuite extends QueryTest with SharedSparkSession {
+
+  // Queries that should pass the OperatorResolverGuard
+
+  test("Select * from an inline table") {
+    checkResolverGuard("SELECT * FROM VALUES(1,2,3)", shouldPass = true)
+  }
+
+  test("Select the named parameters from an inline table") {
+    checkResolverGuard("SELECT col1,col2,col3 FROM VALUES(1,2,3)", shouldPass = true)
+  }
+
+  test("Inline table as a top level operator") {
+    checkResolverGuard("VALUES(1,2,3)", shouldPass = true)
+  }
+
+  test("Select one row") {
+    checkResolverGuard("SELECT 'Hello world!'", shouldPass = true)
+  }
+
+  test("Where clause with a literal") {
+    checkResolverGuard(
+      "SELECT * FROM VALUES(1, 2, false), (3, 4, true) WHERE true",
+      shouldPass = true
+    )
+  }
+
+  test("Where clause with an attribute") {
+    checkResolverGuard(
+      "SELECT * FROM VALUES(1, 2, false), (3, 4, true) WHERE col3",
+      shouldPass = true
+    )
+  }
+
+  test("Explicit cast with auto-alias") {
+    checkResolverGuard(
+      "SELECT CAST(1 AS DECIMAL(3,2))",
+      shouldPass = true
+    )
+  }
+
+  test("Multipart attribute name") {
+    checkResolverGuard("SELECT table.col1 FROM VALUES(1) AS table", shouldPass = true)
+  }
+
+  test("Predicates") {
+    checkResolverGuard("SELECT true and false", shouldPass = true)
+    checkResolverGuard("SELECT true or false", shouldPass = true)
+    checkResolverGuard(
+      "SELECT col1 from VALUES(1,2) where true and false or true",
+      shouldPass = true
+    )
+    checkResolverGuard("SELECT 1 = 2", shouldPass = true)
+    checkResolverGuard("SELECT 1 != 2", shouldPass = true)
+    checkResolverGuard("SELECT 1 IN (1,2,3)", shouldPass = true)
+    checkResolverGuard("SELECT 1 NOT IN (1,2,3)", shouldPass = true)
+    checkResolverGuard("SELECT 1 IS NULL", shouldPass = true)
+    checkResolverGuard("SELECT 1 IS NOT NULL", shouldPass = true)
+    checkResolverGuard("SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR", shouldPass = true)
+  }
+
+  test("Star target") {
+    checkResolverGuard("SELECT table.* FROM VALUES(1) as table", shouldPass = true)
+  }
+
+  test("Binary arithmetic") {
+    checkResolverGuard("SELECT col1+col2 FROM VALUES(1,2)", shouldPass = true)
+    checkResolverGuard("SELECT 1 + 2.3 / 2 - 3 DIV 2 + 3.0 * 10.0", shouldPass = true)
+    checkResolverGuard(
+      "SELECT TIMESTAMP'2011-11-11 11:11:11' - TIMESTAMP'2011-11-11 11:11:10'",
+      shouldPass = true
+    )
+    checkResolverGuard(
+      "SELECT DATE'2020-01-01' - TIMESTAMP'2019-10-06 10:11:12.345678'",
+      shouldPass = true
+    )
+    checkResolverGuard("SELECT DATE'2012-01-01' - INTERVAL 3 HOURS", shouldPass = true)
+    checkResolverGuard(
+      "SELECT DATE'2012-01-01' + INTERVAL '12:12:12' HOUR TO SECOND",
+      shouldPass = true
+    )
+    checkResolverGuard("SELECT DATE'2012-01-01' + 1", shouldPass = true)
+    checkResolverGuard("SELECT 2 * INTERVAL 2 YEAR", shouldPass = true)
+  }
+
+  test("Supported recursive types") {
+    Seq("ARRAY", "MAP", "STRUCT").foreach { typeName =>
+      checkResolverGuard(
+        s"SELECT col1 FROM VALUES($typeName(1,2),3)",
+        shouldPass = true
+      )
+    }
+  }
+
+  test("Recursive types related functions") {
+    checkResolverGuard("SELECT NAMED_STRUCT('a', 1)", shouldPass = true)
+    checkResolverGuard("SELECT MAP_CONTAINS_KEY(MAP(1, 'a', 2, 'b'), 2)", shouldPass = true)
+    checkResolverGuard("SELECT ARRAY_CONTAINS(ARRAY(1, 2, 3), 2);", shouldPass = true)
+  }
+
+  test("Conditional expressions") {
+    checkResolverGuard("SELECT COALESCE(NULL, 1)", shouldPass = true)
+    checkResolverGuard("SELECT col1, IF(col1 > 1, 1, 0) FROM VALUES(1,2),(2,3)", shouldPass = true)
+    checkResolverGuard(
+      "SELECT col1, CASE WHEN col1 > 1 THEN 1 ELSE 0 END FROM VALUES(1,2),(2,3)",
+      shouldPass = true
+    )
+  }
+
+  test("User specified alias") {
+    checkResolverGuard("SELECT 1 AS alias", shouldPass = true)
+  }
+
+  // Queries that shouldn't pass the OperatorResolverGuard
+
+  test("Select from table") {
+    withTable("test_table") {
+      sql("CREATE TABLE test_table (col1 INT, col2 INT)")
+      checkResolverGuard("SELECT * FROM test_table", shouldPass = true)
+    }
+  }
+
+  test("Single-layer subquery") {
+    checkResolverGuard("SELECT * FROM (SELECT * FROM VALUES(1))", shouldPass = true)
+  }
+
+  test("Multi-layer subquery") {
+    checkResolverGuard("SELECT * FROM (SELECT * FROM (SELECT * FROM VALUES(1)))", shouldPass = true)
+  }
+
+  test("Scalar subquery") {
+    checkResolverGuard("SELECT (SELECT * FROM VALUES(1))", shouldPass = false)
+  }
+
+  test("EXISTS subquery") {
+    checkResolverGuard(
+      "SELECT * FROM VALUES (1) WHERE EXISTS (SELECT * FROM VALUES(1))",
+      shouldPass = false
+    )
+  }
+
+  test("IN subquery") {
+    checkResolverGuard(
+      "SELECT * FROM VALUES (1) WHERE col1 IN (SELECT * FROM VALUES(1))",
+      shouldPass = false
+    )
+  }
+
+  test("Function") {
+    checkResolverGuard("SELECT current_date()", shouldPass = false)
+  }
+
+  test("Function without the braces") {
+    checkResolverGuard("SELECT current_date", shouldPass = false)
+  }
+
+  test("Session variables") {
+    withSessionVariable {
+      checkResolverGuard("SELECT session_variable", shouldPass = false)
+    }
+  }
+
+  test("Case sensitive analysis") {
+    withSQLConf("spark.sql.caseSensitive" -> "true") {
+      checkResolverGuard("SELECT 1", shouldPass = false)
+    }
+  }
+
+  private def checkResolverGuard(query: String, shouldPass: Boolean): Unit = {
+    val resolverGuard = new ResolverGuard(spark.sessionState.catalogManager)
+    assert(
+      resolverGuard.apply(sql(query).queryExecution.logical) == shouldPass
+    )
+  }
+
+  private def withSessionVariable(body: => Unit): Unit = {
+    sql("DECLARE session_variable = 1;")
+    try {
+      body
+    } finally {
+      sql("DROP TEMPORARY VARIABLE session_variable;")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/ResolverSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/ResolverSuite.scala
new file mode 100644
index 0000000000000..057724758d332
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/ResolverSuite.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.analysis.resolver
+
+import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.analysis.resolver.{Resolver, ResolverExtension}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.IntegerType
+
+class ResolverSuite extends QueryTest with SharedSparkSession {
+  private val col1Integer = AttributeReference("col1", IntegerType)()
+
+  test("Node matched the extension") {
+    val resolver = createResolver(
+      Seq(
+        new NoopResolver,
+        new TestRelationResolver
+      )
+    )
+
+    val result = resolver.lookupMetadataAndResolve(
+      Project(
+        Seq(UnresolvedAttribute("col1")),
+        TestRelation(resolutionDone = false, output = Seq(col1Integer))
+      )
+    )
+    assert(
+      result == Project(
+        Seq(col1Integer),
+        TestRelation(resolutionDone = true, output = Seq(col1Integer))
+      )
+    )
+  }
+
+  test("Node didn't match the extension") {
+    val resolver = createResolver(
+      Seq(
+        new NoopResolver,
+        new TestRelationResolver
+      )
+    )
+
+    checkError(
+      exception = intercept[AnalysisException](
+        resolver.lookupMetadataAndResolve(
+          Project(
+            Seq(UnresolvedAttribute("col1")),
+            UnknownRelation(output = Seq(col1Integer))
+          )
+        )
+      ),
+      condition = "UNSUPPORTED_SINGLE_PASS_ANALYZER_FEATURE",
+      parameters = Map(
+        "feature" -> ("class " +
+        "org.apache.spark.sql.analysis.resolver.ResolverSuite$UnknownRelation operator resolution")
+      )
+    )
+  }
+
+  test("Ambiguous extensions") {
+    val resolver = createResolver(
+      Seq(
+        new NoopResolver,
+        new TestRelationResolver,
+        new TestRelationBrokenResolver
+      )
+    )
+
+    checkError(
+      exception = intercept[AnalysisException](
+        resolver.lookupMetadataAndResolve(
+          Project(
+            Seq(UnresolvedAttribute("col1")),
+            TestRelation(resolutionDone = false, output = Seq(col1Integer))
+          )
+        )
+      ),
+      condition = "AMBIGUOUS_RESOLVER_EXTENSION",
+      parameters = Map(
+        "operator" -> "org.apache.spark.sql.analysis.resolver.ResolverSuite$TestRelation",
+        "extensions" -> "TestRelationResolver, TestRelationBrokenResolver"
+      )
+    )
+  }
+
+  private def createResolver(extensions: Seq[ResolverExtension] = Seq.empty): Resolver = {
+    new Resolver(spark.sessionState.catalogManager, extensions)
+  }
+
+  private class TestRelationResolver extends ResolverExtension {
+    var timesCalled = 0
+
+    override def resolveOperator: PartialFunction[LogicalPlan, LogicalPlan] = {
+      case testNode: TestRelation if countTimesCalled() =>
+        testNode.copy(resolutionDone = true)
+    }
+
+    private def countTimesCalled(): Boolean = {
+      timesCalled += 1
+      assert(timesCalled == 1)
+      true
+    }
+  }
+
+  private class TestRelationBrokenResolver extends ResolverExtension {
+    override def resolveOperator: PartialFunction[LogicalPlan, LogicalPlan] = {
+      case testNode: TestRelation =>
+        assert(false)
+        testNode
+    }
+  }
+
+  private class NoopResolver extends ResolverExtension {
+    override def resolveOperator: PartialFunction[LogicalPlan, LogicalPlan] = {
+      case node: LogicalPlan if false =>
+        assert(false)
+        node
+    }
+  }
+
+  private case class TestRelation(
+      resolutionDone: Boolean,
+      override val output: Seq[Attribute],
+      override val children: Seq[LogicalPlan] = Seq.empty)
+      extends LogicalPlan {
+    override protected def withNewChildrenInternal(
+        newChildren: IndexedSeq[LogicalPlan]): TestRelation =
+      copy(children = newChildren)
+  }
+
+  private case class UnknownRelation(
+      override val output: Seq[Attribute],
+      override val children: Seq[LogicalPlan] = Seq.empty)
+      extends LogicalPlan {
+    override protected def withNewChildrenInternal(
+        newChildren: IndexedSeq[LogicalPlan]): UnknownRelation =
+      copy(children = newChildren)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/TracksResolvedNodesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/TracksResolvedNodesSuite.scala
new file mode 100644
index 0000000000000..b7bf73f326fa8
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/analysis/resolver/TracksResolvedNodesSuite.scala
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.analysis.resolver
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.analysis.FunctionResolution
+import org.apache.spark.sql.catalyst.analysis.resolver.{
+  ExpressionResolver,
+  NameScopeStack,
+  PlanLogger,
+  Resolver
+}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast, ExprId}
+import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{BooleanType, StringType}
+
+class TracksResolvedNodesSuite extends QueryTest with SharedSparkSession {
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set(SQLConf.ANALYZER_SINGLE_PASS_TRACK_RESOLVED_NODES_ENABLED.key, "true")
+  }
+
+  test("Single-pass contract preserved for equal expressions with different memory addresses") {
+    val expressionResolver = createExpressionResolver()
+    val columnObjFirst =
+      AttributeReference(name = "column", dataType = BooleanType)(exprId = ExprId(0))
+    val columnObjSecond =
+      AttributeReference(name = "column", dataType = BooleanType)(exprId = ExprId(0))
+
+    expressionResolver.resolve(columnObjFirst)
+    expressionResolver.resolve(columnObjSecond)
+  }
+
+  test("Single-pass contract broken for operators") {
+    val resolver = createResolver()
+
+    val project = Project(
+      projectList = Seq(),
+      child = Project(
+        projectList = Seq(),
+        child = OneRowRelation()
+      )
+    )
+
+    val resolvedProject = resolver.lookupMetadataAndResolve(project)
+
+    checkError(
+      exception = intercept[SparkException]({
+        resolver.lookupMetadataAndResolve(resolvedProject.children.head)
+      }),
+      condition = "INTERNAL_ERROR",
+      parameters = Map(
+        "message" -> ("Single-pass resolver attempted to resolve the same " +
+        "node more than once: Project\n+- OneRowRelation\n")
+      )
+    )
+    checkError(
+      exception = intercept[SparkException]({
+        resolver.lookupMetadataAndResolve(resolvedProject)
+      }),
+      condition = "INTERNAL_ERROR",
+      parameters = Map(
+        "message" -> ("Single-pass resolver attempted to resolve the same " +
+        "node more than once: Project\n+- Project\n   +- OneRowRelation\n")
+      )
+    )
+  }
+
+  test("Single-pass contract broken for expressions") {
+    val expressionResolver = createExpressionResolver()
+
+    val cast = Cast(
+      child = AttributeReference(name = "column", dataType = BooleanType)(exprId = ExprId(0)),
+      dataType = StringType
+    )
+
+    val resolvedCast = expressionResolver.resolve(cast)
+
+    checkError(
+      exception = intercept[SparkException]({
+        expressionResolver.resolve(resolvedCast.children.head)
+      }),
+      condition = "INTERNAL_ERROR",
+      parameters = Map(
+        "message" -> ("Single-pass resolver attempted " +
+        "to resolve the same node more than once: column#0")
+      )
+    )
+    checkError(
+      exception = intercept[SparkException]({
+        expressionResolver.resolve(resolvedCast)
+      }),
+      condition = "INTERNAL_ERROR",
+      parameters = Map(
+        "message" -> ("Single-pass resolver attempted " +
+        "to resolve the same node more than once: cast(column#0 as string)")
+      )
+    )
+  }
+
+  private def createResolver(): Resolver = {
+    new Resolver(spark.sessionState.catalogManager)
+  }
+
+  private def createExpressionResolver(): ExpressionResolver = {
+    new ExpressionResolver(
+      createResolver(),
+      new NameScopeStack,
+      new FunctionResolution(
+        spark.sessionState.catalogManager,
+        Resolver.createRelationResolution(spark.sessionState.catalogManager)
+      ),
+      new PlanLogger
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/artifact/ArtifactManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/artifact/ArtifactManagerSuite.scala
index e935af8b8bf8c..a24982aea1585 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/artifact/ArtifactManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/artifact/ArtifactManagerSuite.scala
@@ -23,6 +23,7 @@ import java.nio.file.{Files, Path, Paths}
 import org.apache.commons.io.FileUtils
 
 import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.metrics.source.CodegenMetrics
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
@@ -47,7 +48,7 @@ class ArtifactManagerSuite extends SharedSparkSession {
   private def sessionUUID: String = spark.sessionUUID
 
   override def afterEach(): Unit = {
-    artifactManager.cleanUpResources()
+    artifactManager.cleanUpResourcesForTesting()
     super.afterEach()
   }
 
@@ -208,7 +209,7 @@ class ArtifactManagerSuite extends SharedSparkSession {
         assert(expectedPath.toFile.exists())
 
         // Remove resources
-        artifactManager.cleanUpResources()
+        artifactManager.cleanUpResourcesForTesting()
 
         assert(blockManager.getLocalBytes(blockId).isEmpty)
         assert(!expectedPath.toFile.exists())
@@ -293,7 +294,7 @@ class ArtifactManagerSuite extends SharedSparkSession {
     val sessionDirectory = artifactManager.artifactPath.toFile
     assert(sessionDirectory.exists())
 
-    artifactManager.cleanUpResources()
+    artifactManager.cleanUpResourcesForTesting()
     assert(!sessionDirectory.exists())
     assert(ArtifactManager.artifactRootDirectory.toFile.exists())
   }
@@ -447,4 +448,58 @@ class ArtifactManagerSuite extends SharedSparkSession {
       assert(msg == "Hello Talon! Nice to meet you!")
     }
   }
+
+  test("Codegen cache should be invalid when artifacts are added - class artifact") {
+    withTempDir { dir =>
+      runCodegenTest("class artifact") {
+        val randomFilePath = dir.toPath.resolve("random.class")
+        val testBytes = "test".getBytes(StandardCharsets.UTF_8)
+        Files.write(randomFilePath, testBytes)
+        spark.addArtifact(randomFilePath.toString)
+      }
+    }
+  }
+
+  test("Codegen cache should be invalid when artifacts are added - JAR artifact") {
+    withTempDir { dir =>
+      runCodegenTest("JAR artifact") {
+        val randomFilePath = dir.toPath.resolve("random.jar")
+        val testBytes = "test".getBytes(StandardCharsets.UTF_8)
+        Files.write(randomFilePath, testBytes)
+        spark.addArtifact(randomFilePath.toString)
+      }
+    }
+  }
+
+  private def getCodegenCount: Long = CodegenMetrics.METRIC_COMPILATION_TIME.getCount
+
+  private def runCodegenTest(msg: String)(addOneArtifact: => Unit): Unit = {
+    withSQLConf(SQLConf.ARTIFACTS_SESSION_ISOLATION_ALWAYS_APPLY_CLASSLOADER.key -> "true") {
+      val s = spark
+      import s.implicits._
+
+      val count1 = getCodegenCount
+      // trigger codegen for Dataset
+      Seq(Seq("abc")).toDS().collect()
+      val count2 = getCodegenCount
+      // codegen happens
+      assert(count2 > count1, s"$msg: codegen should happen at the first time")
+
+      // add one artifact, codegen cache should be invalid after this
+      addOneArtifact
+
+      // trigger codegen for another Dataset of same type
+      Seq(Seq("abc")).toDS().collect()
+      // codegen cache should not work for Datasets of same type.
+      val count3 = getCodegenCount
+      assert(count3 > count2, s"$msg: codegen should happen again after adding artifact")
+
+      // trigger again
+      Seq(Seq("abc")).toDS().collect()
+      // codegen should work now as classloader is not changed
+      val count4 = getCodegenCount
+      assert(count4 == count3,
+        s"$msg: codegen should not happen again as classloader is not changed")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollatedFilterPushDownToParquetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollatedFilterPushDownToParquetSuite.scala
index 9b54fe4bb052c..8bb4a1c803e8e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollatedFilterPushDownToParquetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollatedFilterPushDownToParquetSuite.scala
@@ -43,7 +43,7 @@ abstract class CollatedFilterPushDownToParquetSuite extends QueryTest
   val collatedStructNestedCol = "f1"
   val collatedStructFieldAccess = s"$collatedStructCol.$collatedStructNestedCol"
   val collatedArrayCol = "c3"
-  val collatedMapCol = "c4"
+  val nonCollatedMapCol = "c4"
 
   val lcaseCollation = "'UTF8_LCASE'"
 
@@ -69,7 +69,7 @@ abstract class CollatedFilterPushDownToParquetSuite extends QueryTest
            |  named_struct('$collatedStructNestedCol',
            |    COLLATE(c, $lcaseCollation)) as $collatedStructCol,
            |  array(COLLATE(c, $lcaseCollation)) as $collatedArrayCol,
-           |  map(COLLATE(c, $lcaseCollation), 1) as $collatedMapCol
+           |  map(c, 1) as $nonCollatedMapCol
            |FROM VALUES ('aaa'), ('AAA'), ('bbb')
            |as data(c)
            |""".stripMargin)
@@ -215,9 +215,9 @@ abstract class CollatedFilterPushDownToParquetSuite extends QueryTest
 
   test("map - parquet does not support null check on complex types") {
     testPushDown(
-      filterString = s"map_keys($collatedMapCol) != array(collate('aaa', $lcaseCollation))",
+      filterString = s"map_keys($nonCollatedMapCol) != array('aaa')",
       expectedPushedFilters = Seq.empty,
-      expectedRowCount = 1)
+      expectedRowCount = 2)
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationTypePrecedenceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationTypePrecedenceSuite.scala
index 4a904a85e0a7b..7df54b372e8a7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationTypePrecedenceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationTypePrecedenceSuite.scala
@@ -19,11 +19,17 @@ package org.apache.spark.sql.collation
 
 import org.apache.spark.SparkThrowable
 import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
 
 class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
 
   val dataSource: String = "parquet"
+  val UNICODE_COLLATION_NAME = "SYSTEM.BUILTIN.UNICODE"
+  val UNICODE_CI_COLLATION_NAME = "SYSTEM.BUILTIN.UNICODE_CI"
+  val UTF8_BINARY_COLLATION_NAME = "SYSTEM.BUILTIN.UTF8_BINARY"
+  val UTF8_LCASE_COLLATION_NAME = "SYSTEM.BUILTIN.UTF8_LCASE"
 
   private def assertThrowsError(df: => DataFrame, errorClass: String): Unit = {
     val exception = intercept[SparkThrowable] {
@@ -38,22 +44,27 @@ class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
   private def assertImplicitMismatch(df: => DataFrame): Unit =
     assertThrowsError(df, "COLLATION_MISMATCH.IMPLICIT")
 
+  private def assertQuerySchema(df: => DataFrame, expectedSchema: DataType): Unit = {
+    val querySchema = df.schema.fields.head.dataType
+    assert(DataType.equalsIgnoreNullability(querySchema, expectedSchema))
+  }
+
   test("explicit collation propagates up") {
     checkAnswer(
       sql(s"SELECT COLLATION('a' collate unicode)"),
-      Row("UNICODE"))
+      Row(UNICODE_COLLATION_NAME))
 
     checkAnswer(
       sql(s"SELECT COLLATION('a' collate unicode || 'b')"),
-      Row("UNICODE"))
+      Row(UNICODE_COLLATION_NAME))
 
     checkAnswer(
       sql(s"SELECT COLLATION(SUBSTRING('a' collate unicode, 0, 1))"),
-      Row("UNICODE"))
+      Row(UNICODE_COLLATION_NAME))
 
     checkAnswer(
       sql(s"SELECT COLLATION(SUBSTRING('a' collate unicode, 0, 1) || 'b')"),
-      Row("UNICODE"))
+      Row(UNICODE_COLLATION_NAME))
 
     assertExplicitMismatch(
       sql(s"SELECT COLLATION('a' collate unicode || 'b' collate utf8_lcase)"))
@@ -68,9 +79,9 @@ class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
 
   test("implicit collation in columns") {
     val tableName = "implicit_coll_tbl"
-    val c1Collation = "UNICODE"
-    val c2Collation = "UNICODE_CI"
-    val structCollation = "UTF8_LCASE"
+    val c1Collation = UNICODE_COLLATION_NAME
+    val c2Collation = UNICODE_CI_COLLATION_NAME
+    val structCollation = UTF8_LCASE_COLLATION_NAME
     withTable(tableName) {
       sql(s"""
            |CREATE TABLE $tableName (
@@ -99,9 +110,57 @@ class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("lateral alias has implicit strength") {
+    checkAnswer(
+      sql("""
+        |SELECT
+        |  a collate unicode as col1,
+        |  COLLATION(col1 || 'b')
+        |FROM VALUES ('a') AS t(a)
+        |""".stripMargin),
+      Row("a", UNICODE_COLLATION_NAME))
+
+    assertImplicitMismatch(
+      sql("""
+        |SELECT
+        |  a collate unicode as col1,
+        |  a collate utf8_lcase as col2,
+        |  col1 = col2
+        |FROM VALUES ('a') AS t(a)
+        |""".stripMargin))
+
+    checkAnswer(
+      sql("""
+        |SELECT
+        |  a collate unicode as col1,
+        |  COLLATION(col1 || 'b' collate UTF8_LCASE)
+        |FROM VALUES ('a') AS t(a)
+        |""".stripMargin),
+      Row("a", UTF8_LCASE_COLLATION_NAME))
+  }
+
+  test("outer reference has implicit strength") {
+    val tableName = "outer_ref_tbl"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (c STRING COLLATE UNICODE_CI, c1 STRING) USING $dataSource")
+      sql(s"INSERT INTO $tableName VALUES ('a', 'a'), ('A', 'A')")
+
+      checkAnswer(
+        sql(s"SELECT DISTINCT (SELECT COLLATION(c || 'a')) FROM $tableName"),
+        Seq(Row(UNICODE_CI_COLLATION_NAME)))
+
+      assertImplicitMismatch(
+        sql(s"SELECT DISTINCT (SELECT COLLATION(c || c1)) FROM $tableName"))
+
+      checkAnswer(
+        sql(s"SELECT DISTINCT (SELECT COLLATION(c || 'a' collate utf8_lcase)) FROM $tableName"),
+        Seq(Row(UTF8_LCASE_COLLATION_NAME)))
+    }
+  }
+
   test("variables have implicit collation") {
-    val v1Collation = "UTF8_BINARY"
-    val v2Collation = "UTF8_LCASE"
+    val v1Collation = UTF8_BINARY_COLLATION_NAME
+    val v2Collation = UTF8_LCASE_COLLATION_NAME
     sql(s"DECLARE v1 = 'a'")
     sql(s"DECLARE v2 = 'b' collate $v2Collation")
 
@@ -115,7 +174,7 @@ class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
 
     checkAnswer(
       sql(s"SELECT COLLATION(v2 || 'a' COLLATE UTF8_BINARY)"),
-      Row("UTF8_BINARY"))
+      Row(UTF8_BINARY_COLLATION_NAME))
 
     checkAnswer(
       sql(s"SELECT COLLATION(SUBSTRING(v2, 0, 1) || 'a')"),
@@ -137,33 +196,130 @@ class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
     // Simple subquery with explicit collation
     checkAnswer(
       sql(s"SELECT COLLATION((SELECT 'text' COLLATE UTF8_BINARY) || 'suffix')"),
-      Row("UTF8_BINARY")
+      Row(UTF8_BINARY_COLLATION_NAME)
     )
 
     checkAnswer(
       sql(s"SELECT COLLATION((SELECT 'text' COLLATE UTF8_LCASE) || 'suffix')"),
-      Row("UTF8_LCASE")
+      Row(UTF8_LCASE_COLLATION_NAME)
     )
 
     // Nested subquery should retain the collation of the deepest expression
     checkAnswer(
       sql(s"SELECT COLLATION((SELECT (SELECT 'inner' COLLATE UTF8_LCASE) || 'outer'))"),
-      Row("UTF8_LCASE")
+      Row(UTF8_LCASE_COLLATION_NAME)
     )
 
     checkAnswer(
       sql(s"SELECT COLLATION((SELECT (SELECT 'inner' COLLATE UTF8_BINARY) || 'outer'))"),
-      Row("UTF8_BINARY")
+      Row(UTF8_BINARY_COLLATION_NAME)
     )
 
     // Subqueries with mixed collations should follow collation precedence rules
     checkAnswer(
       sql(s"SELECT COLLATION((SELECT 'string1' COLLATE UTF8_LCASE || " +
         s"(SELECT 'string2' COLLATE UTF8_BINARY)))"),
-      Row("UTF8_LCASE")
+      Row(UTF8_LCASE_COLLATION_NAME)
     )
   }
 
+  test("in subquery expression") {
+    val tableName = "subquery_tbl"
+    withTable(tableName) {
+      sql(s"""
+        |CREATE TABLE $tableName (
+        | c1 STRING COLLATE UTF8_LCASE,
+        | c2 STRING COLLATE UNICODE
+        |) USING $dataSource
+        |""".stripMargin)
+
+      sql(s"INSERT INTO $tableName VALUES ('a', 'A')")
+
+      assertImplicitMismatch(
+        sql(s"""
+          |SELECT * FROM $tableName
+          |WHERE c1 IN (SELECT c2 FROM $tableName)
+          |""".stripMargin))
+
+      // this fails since subquery expression collation is implicit by default
+      assertImplicitMismatch(
+        sql(s"""
+          |SELECT * FROM $tableName
+          |WHERE c1 IN (SELECT c2 collate unicode FROM $tableName)
+          |""".stripMargin))
+
+      checkAnswer(
+        sql(s"""
+          |SELECT COUNT(*) FROM $tableName
+          |WHERE c1 collate utf8_lcase IN (SELECT c2 collate unicode FROM $tableName)
+          |""".stripMargin),
+        Seq(Row(1)))
+
+      checkAnswer(
+        sql(s"""
+          |SELECT COUNT(*) FROM $tableName
+          |WHERE c1 collate utf8_lcase IN (SELECT c2 FROM $tableName)
+          |""".stripMargin),
+        Seq(Row(1)))
+
+      checkAnswer(
+        sql(s"""
+          |SELECT COUNT(*) FROM $tableName
+          |WHERE c1 collate unicode IN (SELECT c2 FROM $tableName)
+          |""".stripMargin),
+        Seq(Row(0)))
+
+      checkAnswer(
+        sql(s"""
+          |SELECT COUNT(*) FROM $tableName
+          |WHERE c1 collate unicode IN (SELECT c2 FROM $tableName
+          |  WHERE c2 collate unicode IN (SELECT c1 FROM $tableName))
+          |""".stripMargin),
+        Seq(Row(0)))
+    }
+  }
+
+  test("scalar subquery") {
+    val tableName = "scalar_subquery_tbl"
+    withTable(tableName) {
+      sql(s"""
+        |CREATE TABLE $tableName (
+        | c1 STRING COLLATE UTF8_LCASE,
+        | c2 STRING COLLATE UNICODE
+        |) USING $dataSource
+        |""".stripMargin)
+
+      sql(s"INSERT INTO $tableName VALUES ('a', 'A')")
+
+      assertImplicitMismatch(
+        sql(s"""
+          |SELECT * FROM $tableName
+          |WHERE c1 = (SELECT MAX(c2) FROM $tableName)
+          |""".stripMargin))
+
+      checkAnswer(
+        sql(s"""
+          |SELECT COUNT(*) FROM $tableName
+          |WHERE c1 collate utf8_lcase = (SELECT MAX(c2) collate unicode FROM $tableName)
+          |""".stripMargin),
+        Seq(Row(1)))
+
+      checkAnswer(
+        sql(s"""
+          |SELECT COUNT(*) FROM $tableName
+          |WHERE c1 collate utf8_lcase = (SELECT MAX(c2) FROM $tableName)
+          |""".stripMargin),
+        Seq(Row(1)))
+
+      checkAnswer(
+        sql(s"""
+          |SELECT COUNT(*) FROM $tableName
+          |WHERE c1 collate unicode = (SELECT MAX(c2) FROM $tableName)
+          |""".stripMargin),
+        Seq(Row(0)))
+    }
+  }
+
   test("struct test") {
     val tableName = "struct_tbl"
     val c1Collation = "UNICODE_CI"
@@ -180,28 +336,28 @@ class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
 
       checkAnswer(
         sql(s"SELECT COLLATION(c2.col1.col1 || 'a') FROM $tableName"),
-        Seq(Row(c2Collation)))
+        Seq(Row(UNICODE_COLLATION_NAME)))
 
       checkAnswer(
         sql(s"SELECT COLLATION(c1.col1 || 'a') FROM $tableName"),
-        Seq(Row(c1Collation)))
+        Seq(Row(UNICODE_CI_COLLATION_NAME)))
 
       checkAnswer(
         sql(s"SELECT COLLATION(c1.col1 || 'a' collate UNICODE) FROM $tableName"),
-        Seq(Row("UNICODE")))
+        Seq(Row(UNICODE_COLLATION_NAME)))
 
       checkAnswer(
         sql(s"SELECT COLLATION(struct('a').col1 || 'a' collate UNICODE) FROM $tableName"),
-        Seq(Row("UNICODE")))
+        Seq(Row(UNICODE_COLLATION_NAME)))
 
       checkAnswer(
         sql(s"SELECT COLLATION(struct('a' collate UNICODE).col1 || 'a') FROM $tableName"),
-        Seq(Row("UNICODE")))
+        Seq(Row(UNICODE_COLLATION_NAME)))
 
       checkAnswer(
         sql(s"SELECT COLLATION(struct('a').col1 collate UNICODE || 'a' collate UNICODE) " +
           s"FROM $tableName"),
-        Seq(Row("UNICODE")))
+        Seq(Row(UNICODE_COLLATION_NAME)))
 
       assertExplicitMismatch(
         sql(s"SELECT COLLATION(struct('a').col1 collate UNICODE || 'a' collate UTF8_LCASE) " +
@@ -229,7 +385,7 @@ class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
 
       checkAnswer(
         sql(s"SELECT collation(element_at(array('a', 'b' collate utf8_lcase), 1))"),
-        Seq(Row("UTF8_LCASE")))
+        Seq(Row(UTF8_LCASE_COLLATION_NAME)))
 
       assertExplicitMismatch(
         sql(s"SELECT collation(element_at(array('a' collate unicode, 'b' collate utf8_lcase), 1))")
@@ -238,17 +394,17 @@ class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
       checkAnswer(
         sql(s"SELECT collation(element_at(array('a', 'b' collate utf8_lcase), 1) || c1)" +
           s"from $tableName"),
-        Seq(Row("UTF8_LCASE")))
+        Seq(Row(UTF8_LCASE_COLLATION_NAME)))
 
       checkAnswer(
         sql(s"SELECT collation(element_at(array_append(c2, 'd'), 1)) FROM $tableName"),
-        Seq(Row(arrayCollation))
+        Seq(Row(UNICODE_CI_COLLATION_NAME))
       )
 
       checkAnswer(
         sql(s"SELECT collation(element_at(array_append(c2, 'd' collate utf8_lcase), 1))" +
           s"FROM $tableName"),
-        Seq(Row("UTF8_LCASE"))
+        Seq(Row(UTF8_LCASE_COLLATION_NAME))
       )
     }
   }
@@ -262,65 +418,55 @@ class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
 
       checkAnswer(
         sql(s"SELECT COLLATION(c1[0]) FROM $tableName"),
-        Seq(Row(columnCollation)))
+        Seq(Row(UNICODE_COLLATION_NAME)))
 
       checkAnswer(
         sql(s"SELECT COLLATION(cast(c1 AS ARRAY<STRING>)[0]) FROM $tableName"),
-        Seq(Row("UTF8_BINARY")))
+        Seq(Row(UTF8_BINARY_COLLATION_NAME)))
 
       checkAnswer(
         sql(s"SELECT COLLATION(cast(c1 AS ARRAY<STRING COLLATE UTF8_LCASE>)[0]) FROM $tableName"),
-        Seq(Row("UTF8_LCASE")))
+        Seq(Row(UTF8_LCASE_COLLATION_NAME)))
     }
   }
 
   test("user defined cast") {
     val tableName = "dflt_coll_tbl"
-    val columnCollation = "UNICODE"
+    val columnCollation = UNICODE_COLLATION_NAME
     withTable(tableName) {
       sql(s"CREATE TABLE $tableName (c1 STRING COLLATE $columnCollation) USING $dataSource")
       sql(s"INSERT INTO $tableName VALUES ('a')")
 
-      // only for non string inputs cast results in default collation
       checkAnswer(
-        sql(s"SELECT COLLATION(c1 || CAST(to_char(DATE'2016-04-08', 'y') AS STRING)) " +
-          s"FROM $tableName"),
-        Seq(Row(columnCollation)))
+        sql(s"SELECT COLLATION(CAST(5 AS STRING)) FROM $tableName"),
+        Seq(Row(UTF8_BINARY_COLLATION_NAME)))
 
       checkAnswer(
-        sql(s"SELECT COLLATION(CAST(to_char(DATE'2016-04-08', 'y') AS STRING)) " +
-          s"FROM $tableName"),
-        Seq(Row("UTF8_BINARY")))
-
-      // for string inputs collation is of the child expression
-      checkAnswer(
-        sql(s"SELECT COLLATION(CAST('a' AS STRING)) FROM $tableName"),
-        Seq(Row("UTF8_BINARY")))
+        sql(s"SELECT c1 = cast(5 AS STRING) FROM $tableName"),
+        Seq(Row(false)))
 
       checkAnswer(
         sql(s"SELECT COLLATION(CAST(c1 AS STRING)) FROM $tableName"),
-        Seq(Row(columnCollation)))
+        Seq(Row(UTF8_BINARY_COLLATION_NAME)))
 
       checkAnswer(
-        sql(s"SELECT COLLATION(CAST(c1 collate UTF8_LCASE AS STRING)) FROM $tableName"),
-        Seq(Row("UTF8_LCASE")))
+        sql(s"SELECT c1 = cast(c1 as STRING COLLATE UNICODE) FROM $tableName"),
+        Seq(Row(true)))
 
       checkAnswer(
-        sql(s"SELECT COLLATION(c1 || CAST('a' AS STRING)) FROM $tableName"),
-        Seq(Row(columnCollation)))
+        sql(s"SELECT c1 = cast(5 as STRING COLLATE UNICODE) FROM $tableName"),
+        Seq(Row(false)))
 
       checkAnswer(
-        sql(s"SELECT COLLATION(c1 || CAST('a' collate UTF8_LCASE AS STRING)) FROM $tableName"),
-        Seq(Row("UTF8_LCASE")))
+        sql(s"SELECT COLLATION(CAST(c1 collate UTF8_LCASE AS STRING)) FROM $tableName"),
+        Seq(Row(UTF8_BINARY_COLLATION_NAME)))
 
-      checkAnswer(
-        sql(s"SELECT COLLATION(c1 || CAST(c1 AS STRING)) FROM $tableName"),
-        Seq(Row(columnCollation)))
+      assertImplicitMismatch(
+        sql(s"SELECT c1 = CAST(c1 AS STRING) FROM $tableName"))
 
-      checkAnswer(
-        sql(s"SELECT COLLATION(c1 || SUBSTRING(CAST(c1 AS STRING), 0, 1)) FROM $tableName"),
-        Seq(Row(columnCollation)))
-      }
+      assertImplicitMismatch(
+        sql(s"SELECT c1 = CAST(to_char(DATE'2016-04-08', 'y') AS STRING) FROM $tableName"))
+    }
   }
 
   test("str fns without params have default strength") {
@@ -332,26 +478,26 @@ class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
 
       checkAnswer(
         sql(s"SELECT COLLATION('a' collate utf8_lcase || current_database()) FROM $tableName"),
-        Seq(Row("UTF8_LCASE")))
+        Seq(Row(UTF8_LCASE_COLLATION_NAME)))
 
       checkAnswer(
         sql(s"SELECT COLLATION(c1 || current_database()) FROM $tableName"),
-        Seq(Row(columnCollation)))
+        Seq(Row(UNICODE_COLLATION_NAME)))
 
       checkAnswer(
         sql(s"SELECT COLLATION('a' || current_database()) FROM $tableName"),
-        Seq(Row("UTF8_BINARY")))
+        Seq(Row(UTF8_BINARY_COLLATION_NAME)))
     }
   }
 
   test("functions that contain both string and non string params") {
     checkAnswer(
       sql(s"SELECT COLLATION(elt(2, 'a', 'b'))"),
-      Row("UTF8_BINARY"))
+      Row(UTF8_BINARY_COLLATION_NAME))
 
     checkAnswer(
       sql(s"SELECT COLLATION(elt(2, 'a' collate UTF8_LCASE, 'b'))"),
-      Row("UTF8_LCASE"))
+      Row(UTF8_LCASE_COLLATION_NAME))
 
     assertExplicitMismatch(
       sql(s"SELECT COLLATION(elt(2, 'a' collate UTF8_LCASE, 'b' collate UNICODE))"))
@@ -377,13 +523,219 @@ class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
         s"'name2' collate utf8_lcase, 'value2' collate unicode)"),
       Row(Row("value1", "value2")))
 
-    assertExplicitMismatch(
+    checkAnswer(
       sql(s"SELECT named_struct" +
-        s"('name1' collate unicode, 'value1', 'name2' collate utf8_lcase, 'value2')"))
+        s"('name1' collate unicode, 'value1', 'name2' collate utf8_lcase, 'value2')"),
+      Row(Row("value1", "value2")))
 
-    assertExplicitMismatch(
+    checkAnswer(
       sql(s"SELECT named_struct" +
-        s"('name1', 'value1' collate unicode, 'name2', 'value2' collate utf8_lcase)"))
+        s"('name1', 'value1' collate unicode, 'name2', 'value2' collate utf8_lcase)"),
+      Row(Row("value1", "value2")))
+  }
+
+  test("coercing structs") {
+    assertQuerySchema(
+      sql(s"SELECT array(struct(1, 'a'), struct(2, 'b' collate utf8_lcase))"),
+      ArrayType(
+        StructType(
+          Seq(StructField("col1", IntegerType), StructField("col2", StringType("UTF8_LCASE"))))))
+
+    assertQuerySchema(
+      sql(s"SELECT array(struct(1, 'a' collate utf8_lcase), struct(2, 'b' collate utf8_lcase))"),
+      ArrayType(
+        StructType(
+          Seq(StructField("col1", IntegerType), StructField("col2", StringType("UTF8_LCASE"))))))
+
+    assertExplicitMismatch(
+      sql(s"SELECT array(struct(1, 'a' collate utf8_lcase), struct(2, 'b' collate unicode))"))
+
+    assertImplicitMismatch(sql(s"""
+           |SELECT array(struct(1, c1), struct(2, c2))
+           |FROM VALUES ('a' collate unicode, 'b' collate utf8_lcase) AS t(c1, c2)
+           |""".stripMargin))
+  }
+
+  test("coercing maps") {
+    assertQuerySchema(
+      sql(s"SELECT map('key1', 'val1', 'key2', 'val2')"),
+      MapType(StringType, StringType))
+
+    assertQuerySchema(
+      sql(s"SELECT map('key1' collate utf8_lcase, 'val1', 'key2', 'val2' collate unicode)"),
+      MapType(StringType("UTF8_LCASE"), StringType("UNICODE")))
+
+    assertQuerySchema(
+      sql(s"SELECT ARRAY(map('key1', 'val1'), map('key2' collate UNICODE, 'val2'))"),
+      ArrayType(MapType(StringType("UNICODE"), StringType)))
+
+    assertExplicitMismatch(
+      sql(s"SELECT map('key1', 'val1' collate utf8_lcase, 'key2', 'val2' collate unicode)"))
+  }
+
+  test("user defined cast on maps") {
+    checkAnswer(
+      sql(s"""
+        |SELECT map_contains_key(
+        |  map('a' collate utf8_lcase, 'b'),
+        |  'A' collate utf8_lcase)
+        |""".stripMargin),
+      Seq(Row(true)))
+
+    checkAnswer(
+      sql(s"""
+        |SELECT map_contains_key(
+        |  CAST(map('a' collate utf8_lcase, 'b') AS MAP<STRING, STRING>),
+        |  'A')
+        |""".stripMargin),
+      Seq(Row(false)))
+
+    checkAnswer(
+      sql(s"""
+        |SELECT map_contains_key(
+        |  CAST(map('a' collate utf8_lcase, 'b') AS MAP<STRING COLLATE UNICODE, STRING>),
+        |  'A' COLLATE UNICODE)
+        |""".stripMargin),
+      Seq(Row(false)))
+  }
+
+  test("maps of structs") {
+    assertQuerySchema(
+      sql(s"SELECT map('key1', struct(1, 'a' collate unicode), 'key2', struct(2, 'b'))"),
+      MapType(
+        StringType,
+        StructType(
+          Seq(StructField("col1", IntegerType), StructField("col2", StringType("UNICODE"))))))
+
+    checkAnswer(
+      sql(
+        s"SELECT map('key1', struct(1, 'a' collate unicode_ci)," +
+          s"'key2', struct(2, 'b'))['key1'].col2 = 'A'"),
+      Seq(Row(true)))
+  }
+
+  test("coercing arrays") {
+    assertQuerySchema(sql(s"SELECT array('a', 'b')"), ArrayType(StringType))
+
+    assertQuerySchema(
+      sql(s"SELECT array('a' collate utf8_lcase, 'b')"),
+      ArrayType(StringType("UTF8_LCASE")))
+
+    assertQuerySchema(
+      sql(s"SELECT array('a' collate utf8_lcase, 'b' collate utf8_lcase)"),
+      ArrayType(StringType("UTF8_LCASE")))
+
+    assertExplicitMismatch(sql(s"SELECT array('a' collate utf8_lcase, 'b' collate unicode)"))
+
+    assertQuerySchema(
+      sql(s"SELECT array(array('a', 'b'), array('c' collate utf8_lcase, 'd'))"),
+      ArrayType(ArrayType(StringType("UTF8_LCASE"))))
+
+    checkAnswer(
+      sql(s"SELECT array('a', 'b') = array('A' collate utf8_lcase, 'B')"),
+      Seq(Row(true)))
+
+    checkAnswer(
+      sql(s"SELECT array('a', 'b')[0] = array('A' collate utf8_lcase, 'B')[1]"),
+      Seq(Row(false)))
+
+    assertExplicitMismatch(
+      sql(s"SELECT array('a', 'b' collate unicode) = array('A' collate utf8_lcase, 'B')"))
+  }
+
+  test("user defined cast on arrays") {
+    checkAnswer(
+      sql(s"""
+        |SELECT array_contains(
+        |  array('a', 'b' collate utf8_lcase),
+        |  'A')
+        |""".stripMargin),
+      Seq(Row(true)))
+
+    // should be false because ARRAY<STRING> should take precedence
+    // over UTF8_LCASE in array creation
+    checkAnswer(
+      sql(s"""
+        |SELECT array_contains(
+        |  CAST(array('a', 'b' collate utf8_lcase) AS ARRAY<STRING>),
+        |  'A')
+        |""".stripMargin),
+      Seq(Row(false)))
+
+    checkAnswer(
+      sql(s"""
+        |SELECT array_contains(
+        |  CAST(array('a', 'b' collate utf8_lcase) AS ARRAY<STRING COLLATE UNICODE>),
+        |  'A')
+        |""".stripMargin),
+      Seq(Row(false)))
+
+    checkAnswer(
+      sql(s"""
+        |SELECT array_contains(
+        |  CAST(array('a', 'b' collate utf8_lcase) AS ARRAY<STRING COLLATE UNICODE>),
+        |  'A' collate unicode)
+        |""".stripMargin),
+      Seq(Row(false)))
+  }
+
+  test("array of structs") {
+    assertQuerySchema(
+      sql(s"SELECT array(struct(1, 'a' collate unicode), struct(2, 'b'))[0]"),
+      StructType(
+        Seq(StructField("col1", IntegerType), StructField("col2", StringType("UNICODE")))))
+
+    checkAnswer(
+      sql(s"SELECT array(struct(1, 'a' collate unicode_ci), struct(2, 'b'))[0].col2 = 'A'"),
+      Seq(Row(true)))
+  }
+
+  test("coercing deeply nested complex types") {
+    assertQuerySchema(
+      sql(s"""
+           |SELECT struct(
+           |  struct(1, 'nested' collate unicode),
+           |  array(
+           |    struct(1, 'a' collate utf8_lcase),
+           |    struct(2, 'b' collate utf8_lcase)
+           |  )
+           |)
+           |""".stripMargin),
+      StructType(
+        Seq(
+          StructField(
+            "col1",
+            StructType(
+              Seq(StructField("col1", IntegerType), StructField("col2", StringType("UNICODE"))))),
+          StructField(
+            "col2",
+            ArrayType(
+              StructType(Seq(
+                StructField("col1", IntegerType),
+                StructField("col2", StringType("UTF8_LCASE")))))))))
+
+    assertQuerySchema(
+      sql(s"""
+           |SELECT struct(
+           |  struct(
+           |    array(
+           |      map('key1' collate utf8_lcase, 'val1',
+           |          'key2', 'val2'),
+           |      map('key3', 'val3' collate unicode)
+           |    )
+           |  ),
+           |  42
+           |)
+           |""".stripMargin),
+      StructType(
+        Seq(
+          StructField(
+            "col1",
+            StructType(
+              Seq(StructField(
+                "col1",
+                ArrayType(MapType(StringType("UTF8_LCASE"), StringType("UNICODE"))))))),
+          StructField("col2", IntegerType))))
   }
 
   test("access collated map via literal") {
@@ -393,27 +745,30 @@ class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
       sql(s"SELECT c1 FROM $tableName WHERE $condition = 'B'")
 
     withTable(tableName) {
-      sql(s"""
-           |CREATE TABLE $tableName (
-           |  c1 MAP<STRING COLLATE UNICODE_CI, STRING COLLATE UNICODE_CI>,
-           |  c2 STRING
-           |) USING $dataSource
-           |""".stripMargin)
-
-      sql(s"INSERT INTO $tableName VALUES (map('a', 'b'), 'a')")
-
-      Seq("c1['A']",
-        "c1['A' COLLATE UNICODE_CI]",
-        "c1[c2 COLLATE UNICODE_CI]").foreach { condition =>
-        checkAnswer(selectQuery(condition), Seq(Row(Map("a" -> "b"))))
-      }
-
-      Seq(
-        // different explicit collation
-        "c1['A' COLLATE UNICODE]",
-        // different implicit collation
-        "c1[c2]").foreach { condition =>
-        assertThrowsError(selectQuery(condition), "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+      withSQLConf(SQLConf.ALLOW_COLLATIONS_IN_MAP_KEYS.key -> "true") {
+        sql(
+          s"""
+             |CREATE TABLE $tableName (
+             |  c1 MAP<STRING COLLATE UNICODE_CI, STRING COLLATE UNICODE_CI>,
+             |  c2 STRING
+             |) USING $dataSource
+             |""".stripMargin)
+
+        sql(s"INSERT INTO $tableName VALUES (map('a', 'b'), 'a')")
+
+        Seq("c1['A']",
+          "c1['A' COLLATE UNICODE_CI]",
+          "c1[c2 COLLATE UNICODE_CI]").foreach { condition =>
+          checkAnswer(selectQuery(condition), Seq(Row(Map("a" -> "b"))))
+        }
+
+        Seq(
+          // different explicit collation
+          "c1['A' COLLATE UNICODE]",
+          // different implicit collation
+          "c1[c2]").foreach { condition =>
+          assertThrowsError(selectQuery(condition), "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+        }
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
new file mode 100644
index 0000000000000..69f1c6da65d12
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
@@ -0,0 +1,502 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.collation
+
+import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
+import org.apache.spark.sql.catalyst.util.CollationFactory
+import org.apache.spark.sql.connector.DatasourceV2SQLBase
+import org.apache.spark.sql.internal.SqlApiConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StringType
+
+abstract class DefaultCollationTestSuite extends QueryTest with SharedSparkSession {
+
+  def dataSource: String = "parquet"
+  def testTable: String = "test_tbl"
+  def testView: String = "test_view"
+  protected val fullyQualifiedPrefix = s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}."
+
+  def withSessionCollationAndTable(collation: String, testTables: String*)(f: => Unit): Unit = {
+    withTable(testTables: _*) {
+      withSessionCollation(collation) {
+        f
+      }
+    }
+  }
+
+  def withSessionCollationAndView(collation: String, viewNames: String*)(f: => Unit): Unit = {
+    withView(viewNames: _*) {
+      withSessionCollation(collation) {
+        f
+      }
+    }
+  }
+
+  def withSessionCollation(collation: String)(f: => Unit): Unit = {
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      f
+    }
+  }
+
+  def assertTableColumnCollation(
+      table: String,
+      column: String,
+      expectedCollation: String): Unit = {
+    val colType = spark.table(table).schema(column).dataType
+    assert(colType === StringType(expectedCollation))
+  }
+
+  def assertThrowsImplicitMismatch(f: => DataFrame): Unit = {
+    val exception = intercept[AnalysisException] {
+      f
+    }
+    assert(exception.getCondition === "COLLATION_MISMATCH.IMPLICIT")
+  }
+
+  // region DDL tests
+
+  test("create/alter table") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      // create table with implicit collation
+      sql(s"CREATE TABLE $testTable (c1 STRING) USING $dataSource")
+      assertTableColumnCollation(testTable, "c1", "UTF8_BINARY")
+
+      // alter table add column with implicit collation
+      sql(s"ALTER TABLE $testTable ADD COLUMN c2 STRING")
+      assertTableColumnCollation(testTable, "c2", "UTF8_BINARY")
+
+      sql(s"ALTER TABLE $testTable ALTER COLUMN c2 TYPE STRING COLLATE UNICODE")
+      assertTableColumnCollation(testTable, "c2", "UNICODE")
+
+      sql(s"ALTER TABLE $testTable ALTER COLUMN c2 TYPE STRING")
+      assertTableColumnCollation(testTable, "c2", "UTF8_BINARY")
+    }
+  }
+
+  test("create table with explicit collation") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING COLLATE UTF8_LCASE) USING $dataSource")
+      assertTableColumnCollation(testTable, "c1", "UTF8_LCASE")
+    }
+
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING COLLATE UNICODE) USING $dataSource")
+      assertTableColumnCollation(testTable, "c1", "UNICODE")
+    }
+  }
+
+  test("create table as select") {
+    // literals in select do not pick up session collation
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable USING $dataSource AS SELECT
+           |  'a' AS c1,
+           |  'a' || 'a' AS c2,
+           |  SUBSTRING('a', 1, 1) AS c3,
+           |  SUBSTRING(SUBSTRING('ab', 1, 1), 1, 1) AS c4,
+           |  'a' = 'A' AS truthy
+           |""".stripMargin)
+      assertTableColumnCollation(testTable, "c1", "UTF8_BINARY")
+      assertTableColumnCollation(testTable, "c2", "UTF8_BINARY")
+      assertTableColumnCollation(testTable, "c3", "UTF8_BINARY")
+      assertTableColumnCollation(testTable, "c4", "UTF8_BINARY")
+
+      checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE truthy"), Seq(Row(0)))
+    }
+
+    // literals in inline table do not pick up session collation
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable USING $dataSource AS
+           |SELECT c1, c1 = 'A' as c2 FROM VALUES ('a'), ('A') AS vals(c1)
+           |""".stripMargin)
+      assertTableColumnCollation(testTable, "c1", "UTF8_BINARY")
+      checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE c2"), Seq(Row(1)))
+    }
+
+    // cast in select does not pick up session collation
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable USING $dataSource AS SELECT cast('a' AS STRING) AS c1")
+      assertTableColumnCollation(testTable, "c1", "UTF8_BINARY")
+    }
+  }
+
+  test("ctas with complex types") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable USING $dataSource AS
+           |SELECT
+           |  struct('a') AS c1,
+           |  map('a', 'b') AS c2,
+           |  array('a') AS c3
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT COLLATION(c1.col1) FROM $testTable"),
+        Seq(Row(fullyQualifiedPrefix + "UTF8_BINARY")))
+      checkAnswer(sql(s"SELECT COLLATION(c2['a']) FROM $testTable"),
+        Seq(Row(fullyQualifiedPrefix + "UTF8_BINARY")))
+      checkAnswer(sql(s"SELECT COLLATION(c3[0]) FROM $testTable"),
+        Seq(Row(fullyQualifiedPrefix + "UTF8_BINARY")))
+    }
+  }
+
+  test("ctas with union") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable USING $dataSource AS
+           |SELECT 'a' = 'A' AS c1
+           |UNION
+           |SELECT 'b' = 'B' AS c1
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT * FROM $testTable"), Seq(Row(false)))
+    }
+
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable USING $dataSource AS
+           |SELECT 'a' = 'A' AS c1
+           |UNION ALL
+           |SELECT 'b' = 'B' AS c1
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT * FROM $testTable"), Seq(Row(false), Row(false)))
+    }
+  }
+
+  test("add column") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING COLLATE UTF8_LCASE) USING $dataSource")
+      assertTableColumnCollation(testTable, "c1", "UTF8_LCASE")
+
+      sql(s"ALTER TABLE $testTable ADD COLUMN c2 STRING")
+      assertTableColumnCollation(testTable, "c2", "UTF8_BINARY")
+
+      sql(s"ALTER TABLE $testTable ADD COLUMN c3 STRING COLLATE UNICODE")
+      assertTableColumnCollation(testTable, "c3", "UNICODE")
+    }
+  }
+
+  test("inline table in CTAS") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable
+           |USING $dataSource
+           |AS SELECT *
+           |FROM (VALUES ('a', 'a' = 'A'))
+           |AS inline_table(c1, c2);
+           |""".stripMargin)
+
+      assertTableColumnCollation(testTable, "c1", "UTF8_BINARY")
+      checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE c2"), Seq(Row(0)))
+    }
+  }
+
+  test("subsequent analyzer iterations correctly resolve default string types") {
+    // since concat coercion happens after resolving default types this test
+    // makes sure that we are correctly resolving the default string types
+    // in subsequent analyzer iterations
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable
+           |USING $dataSource AS
+           |SELECT CONCAT(X'68656C6C6F', 'world') AS c1
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT c1 FROM $testTable"), Seq(Row("helloworld")))
+    }
+
+    // ELT is similar
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+             |CREATE TABLE $testTable
+             |USING $dataSource AS
+             |SELECT ELT(1, X'68656C6C6F', 'world') AS c1
+             |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT c1 FROM $testTable"), Seq(Row("hello")))
+    }
+  }
+
+  // endregion
+
+  // region DML tests
+
+  test("literals with default collation") {
+    val sessionCollation = "UTF8_LCASE"
+    val sessionCollationFullyQualified = fullyQualifiedPrefix + sessionCollation
+      withSessionCollation(sessionCollation) {
+
+      // literal without collation
+      checkAnswer(sql("SELECT COLLATION('a')"), Seq(Row(sessionCollationFullyQualified)))
+
+      checkAnswer(sql("SELECT COLLATION(map('a', 'b')['a'])"),
+        Seq(Row(sessionCollationFullyQualified)))
+
+      checkAnswer(sql("SELECT COLLATION(array('a')[0])"), Seq(Row(sessionCollationFullyQualified)))
+
+      checkAnswer(sql("SELECT COLLATION(struct('a' as c)['c'])"),
+        Seq(Row(sessionCollationFullyQualified)))
+    }
+  }
+
+  test("literals with explicit collation") {
+    val unicodeCollation = fullyQualifiedPrefix + "UNICODE"
+    withSessionCollation("UTF8_LCASE") {
+      checkAnswer(sql("SELECT COLLATION('a' collate unicode)"), Seq(Row(unicodeCollation)))
+
+      checkAnswer(
+        sql("SELECT COLLATION(map('a', 'b' collate unicode)['a'])"),
+        Seq(Row(unicodeCollation)))
+
+      checkAnswer(sql("SELECT COLLATION(array('a' collate unicode)[0])"),
+        Seq(Row(unicodeCollation)))
+
+      checkAnswer(
+        sql("SELECT COLLATION(struct('a' collate unicode as c)['c'])"),
+        Seq(Row(unicodeCollation)))
+    }
+  }
+
+  test("cast is aware of session collation") {
+    val sessionCollation = "UTF8_LCASE"
+    val sessionCollationFullyQualified = fullyQualifiedPrefix + sessionCollation
+    withSessionCollation(sessionCollation) {
+      checkAnswer(sql("SELECT COLLATION(cast('a' as STRING))"),
+        Seq(Row(sessionCollationFullyQualified)))
+
+      checkAnswer(
+        sql("SELECT COLLATION(cast(map('a', 'b') as MAP<STRING, STRING>)['a'])"),
+        Seq(Row(sessionCollationFullyQualified)))
+
+      checkAnswer(
+        sql("SELECT COLLATION(map_keys(cast(map('a', 'b') as MAP<STRING, STRING>))[0])"),
+        Seq(Row(sessionCollationFullyQualified)))
+
+      checkAnswer(
+        sql("SELECT COLLATION(cast(array('a') as ARRAY<STRING>)[0])"),
+        Seq(Row(sessionCollationFullyQualified)))
+
+      checkAnswer(
+        sql("SELECT COLLATION(cast(struct('a' as c) as STRUCT<c: STRING>)['c'])"),
+        Seq(Row(sessionCollationFullyQualified)))
+    }
+  }
+
+  test("expressions in where are aware of session collation") {
+    withSessionCollation("UTF8_LCASE") {
+      // expression in where is aware of session collation
+      checkAnswer(sql("SELECT 1 WHERE 'a' = 'A'"), Seq(Row(1)))
+
+      checkAnswer(sql("SELECT 1 WHERE 'a' = cast('A' as STRING)"), Seq(Row(1)))
+    }
+  }
+
+  test("having group by is aware of session collation") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING) USING $dataSource")
+      sql(s"INSERT INTO $testTable VALUES ('a'), ('A')")
+
+      // having clause uses session (default) collation
+      checkAnswer(
+        sql(s"SELECT COUNT(*) FROM $testTable GROUP BY c1 HAVING 'a' = 'A'"),
+        Seq(Row(1), Row(1)))
+
+      // having clause uses column (implicit) collation
+      checkAnswer(
+        sql(s"SELECT COUNT(*) FROM $testTable GROUP BY c1 HAVING c1 = 'A'"),
+        Seq(Row(1)))
+    }
+  }
+
+  test("min/max are aware of session collation") {
+    // scalastyle:off nonascii
+    withSessionCollationAndTable("UNICODE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING) USING $dataSource")
+      sql(s"INSERT INTO $testTable VALUES ('1'), ('½')")
+
+      checkAnswer(sql(s"SELECT MIN(c1) FROM $testTable"), Seq(Row("1")))
+
+      checkAnswer(sql(s"SELECT MAX(c1) FROM $testTable"), Seq(Row("½")))
+    }
+    // scalastyle:on nonascii
+  }
+
+  test("union operation with subqueries") {
+    withSessionCollation("UTF8_LCASE") {
+      checkAnswer(
+        sql(s"""
+             |SELECT 'a' = 'A'
+             |UNION
+             |SELECT 'b' = 'B'
+             |""".stripMargin),
+        Seq(Row(true)))
+
+      checkAnswer(
+        sql(s"""
+             |SELECT 'a' = 'A'
+             |UNION ALL
+             |SELECT 'b' = 'B'
+             |""".stripMargin),
+        Seq(Row(true), Row(true)))
+    }
+  }
+
+  test("inline table in SELECT") {
+    withSessionCollation("UTF8_LCASE") {
+      val df = s"""
+           |SELECT *
+           |FROM (VALUES ('a', 'a' = 'A'))
+           |""".stripMargin
+
+      checkAnswer(sql(df), Seq(Row("a", true)))
+    }
+  }
+
+  test("inline table in insert") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING, c2 BOOLEAN) USING $dataSource")
+
+      sql(s"INSERT INTO $testTable VALUES ('a', 'a' = 'A')")
+      checkAnswer(sql(s"SELECT * FROM $testTable"), Seq(Row("a", true)))
+    }
+  }
+
+  test("literals in insert inherit session level collation") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 BOOLEAN) USING $dataSource")
+
+      sql(s"INSERT INTO $testTable VALUES ('a' = 'A')")
+      sql(s"INSERT INTO $testTable VALUES (array_contains(array('a'), 'A'))")
+      sql(s"INSERT INTO $testTable VALUES (CONCAT(X'68656C6C6F', 'world') = 'HELLOWORLD')")
+
+      checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE c1"), Seq(Row(3)))
+    }
+  }
+
+  // endregion
+}
+
+class DefaultCollationTestSuiteV1 extends DefaultCollationTestSuite {
+
+  test("create/alter view created from a table") {
+    val sessionCollation = "UTF8_LCASE"
+    withSessionCollationAndTable(sessionCollation, testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING, c2 STRING COLLATE UNICODE_CI) USING $dataSource")
+      sql(s"INSERT INTO $testTable VALUES ('a', 'a'), ('A', 'A')")
+
+      withView(testView) {
+        sql(s"CREATE VIEW $testView AS SELECT * FROM $testTable")
+
+        assertTableColumnCollation(testView, "c1", "UTF8_BINARY")
+        assertTableColumnCollation(testView, "c2", "UNICODE_CI")
+        checkAnswer(
+          sql(s"SELECT DISTINCT COLLATION(c1), COLLATION('a') FROM $testView"),
+          Row(fullyQualifiedPrefix + "UTF8_BINARY", fullyQualifiedPrefix + sessionCollation))
+
+        // filter should use session collation
+        checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE 'a' = 'A'"), Row(2))
+
+        // filter should use column collation
+        checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = 'A'"), Row(1))
+
+        checkAnswer(
+          sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = substring('A', 0, 1)"),
+          Row(1))
+
+        // literal with explicit collation wins
+        checkAnswer(
+          sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = 'A' collate UNICODE_CI"),
+          Row(2))
+
+        // two implicit collations -> errors out
+        assertThrowsImplicitMismatch(sql(s"SELECT c1 = c2 FROM $testView"))
+
+        sql(s"ALTER VIEW $testView AS SELECT c1 COLLATE UNICODE_CI AS c1, c2 FROM $testTable")
+        assertTableColumnCollation(testView, "c1", "UNICODE_CI")
+        assertTableColumnCollation(testView, "c2", "UNICODE_CI")
+        checkAnswer(
+          sql(s"SELECT DISTINCT COLLATION(c1), COLLATION('a') FROM $testView"),
+          Row(fullyQualifiedPrefix + "UNICODE_CI", fullyQualifiedPrefix + sessionCollation))
+
+        // after alter both rows should be returned
+        checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = 'A'"), Row(2))
+      }
+    }
+  }
+
+  test("join view with table") {
+    val viewTableName = "view_table"
+    val joinTableName = "join_table"
+    val sessionCollation = "sr"
+
+    withSessionCollationAndTable(sessionCollation, viewTableName, joinTableName) {
+      sql(s"CREATE TABLE $viewTableName (c1 STRING COLLATE UNICODE_CI) USING $dataSource")
+      sql(s"CREATE TABLE $joinTableName (c1 STRING COLLATE UTF8_LCASE) USING $dataSource")
+      sql(s"INSERT INTO $viewTableName VALUES ('a')")
+      sql(s"INSERT INTO $joinTableName VALUES ('A')")
+
+      withView(testView) {
+        sql(s"CREATE VIEW $testView AS SELECT * FROM $viewTableName")
+
+        assertThrowsImplicitMismatch(
+          sql(s"SELECT * FROM $testView JOIN $joinTableName ON $testView.c1 = $joinTableName.c1"))
+
+        checkAnswer(
+          sql(s"""
+                 |SELECT COLLATION($testView.c1), COLLATION($joinTableName.c1)
+                 |FROM $testView JOIN $joinTableName
+                 |ON $testView.c1 = $joinTableName.c1 COLLATE UNICODE_CI
+                 |""".stripMargin),
+          Row(fullyQualifiedPrefix + "UNICODE_CI", fullyQualifiedPrefix + "UTF8_LCASE"))
+      }
+    }
+  }
+}
+
+class DefaultCollationTestSuiteV2 extends DefaultCollationTestSuite with DatasourceV2SQLBase {
+  override def testTable: String = s"testcat.${super.testTable}"
+  override def testView: String = s"testcat.${super.testView}"
+
+  // delete only works on v2
+  test("delete behavior") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING) USING $dataSource")
+      sql(s"INSERT INTO $testTable VALUES ('a'), ('A')")
+
+      sql(s"DELETE FROM $testTable WHERE 'a' = 'A'")
+      checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable"), Seq(Row(0)))
+    }
+  }
+
+  test("inline table in RTAS") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING, c2 BOOLEAN) USING $dataSource")
+      sql(s"""
+           |REPLACE TABLE $testTable
+           |USING $dataSource
+           |AS SELECT *
+           |FROM (VALUES ('a', 'a' = 'A'))
+           |AS inline_table(c1, c2);
+           |""".stripMargin)
+
+      assertTableColumnCollation(testTable, "c1", "UTF8_BINARY")
+      checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE c2"), Seq(Row(0)))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
index 21aa57cc1eace..00e1f2f93fdcb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
@@ -1396,4 +1396,39 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
         parameters = Map("columnName" -> "`data`"))
     }
   }
+
+  test("Alter column type between string and char/varchar") {
+    val types = Seq(
+      ("STRING", "\"STRING\""),
+      ("STRING COLLATE UTF8_LCASE", "\"STRING COLLATE UTF8_LCASE\""),
+      ("CHAR(5)", "\"CHAR\\(5\\)\""),
+      ("VARCHAR(5)", "\"VARCHAR\\(5\\)\""))
+    types.flatMap { a => types.map { b => (a, b) } }
+      .filter { case (a, b) => a != b }
+      .filter { case ((a, _), (b, _)) => !a.startsWith("STRING") || !b.startsWith("STRING") }
+      .foreach { case ((from, originType), (to, newType)) =>
+        val t = "table_name"
+        withTable(t) {
+          sql(s"CREATE TABLE $t (id $from) USING PARQUET")
+          val sql1 = s"ALTER TABLE $t ALTER COLUMN id TYPE $to"
+          checkErrorMatchPVals(
+            exception = intercept[AnalysisException] {
+              sql(sql1)
+            },
+            condition = "NOT_SUPPORTED_CHANGE_COLUMN",
+            sqlState = None,
+            parameters = Map(
+              "originType" -> originType,
+              "newType" -> newType,
+              "newName" -> "`id`",
+              "originName" -> "`id`",
+              "table" -> ".*table_name.*"),
+            context = ExpectedContext(
+              fragment = sql1,
+              start = 0,
+              stop = sql1.length - 1)
+          )
+        }
+      }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetricsSuite.scala
new file mode 100644
index 0000000000000..fe28b85528632
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetricsSuite.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util
+
+import org.apache.spark.sql.QueryTest.withQueryExecutionsCaptured
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Identifier, InMemoryTable, InMemoryTableCatalog, StagedTable, StagingInMemoryTableCatalog}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.metric.{CustomMetric, CustomSumMetric, CustomTaskMetric}
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.datasources.v2.{AtomicCreateTableAsSelectExec, AtomicReplaceTableAsSelectExec, AtomicReplaceTableExec, CreateTableAsSelectExec, ReplaceTableAsSelectExec, ReplaceTableExec}
+
+class StagingInMemoryTableCatalogWithMetrics extends StagingInMemoryTableCatalog {
+
+  case class TestSupportedCommitMetric(name: String, description: String) extends CustomSumMetric
+
+  override def supportedCustomMetrics(): Array[CustomMetric] = Array(
+    TestSupportedCommitMetric("numFiles", "number of written files"),
+    TestSupportedCommitMetric("numOutputRows", "number of output rows"),
+    TestSupportedCommitMetric("numOutputBytes", "written output"))
+
+  private class TestStagedTableWithMetric(
+      ident: Identifier,
+      delegateTable: InMemoryTable
+  ) extends TestStagedTable(ident, delegateTable) with StagedTable {
+
+    private var stagedChangesCommitted = false
+
+    override def commitStagedChanges(): Unit = {
+      tables.put(ident, delegateTable)
+      stagedChangesCommitted = true
+    }
+
+    override def reportDriverMetrics: Array[CustomTaskMetric] = {
+      assert(stagedChangesCommitted)
+      StagingInMemoryTableCatalogWithMetrics.testMetrics
+    }
+  }
+
+  override def stageCreate(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable = {
+    new TestStagedTableWithMetric(
+      ident,
+      new InMemoryTable(s"$name.${ident.quoted}",
+        CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties))
+  }
+
+  override def stageReplace(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    stageCreate(ident, columns, partitions, properties)
+
+  override def stageCreateOrReplace(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    stageCreate(ident, columns, partitions, properties)
+}
+
+object StagingInMemoryTableCatalogWithMetrics {
+
+  case class TestCustomTaskMetric(name: String, value: Long) extends CustomTaskMetric
+
+  val testMetrics: Array[CustomTaskMetric] = Array(
+    TestCustomTaskMetric("numFiles", 1337),
+    TestCustomTaskMetric("numOutputRows", 1338),
+    TestCustomTaskMetric("numOutputBytes", 1339))
+}
+
+class DataSourceV2MetricsSuite extends DatasourceV2SQLBase {
+
+  private val testCatalog = "test_catalog"
+  private val atomicTestCatalog = "atomic_test_catalog"
+  private val nonExistingTable = "non_existing_table"
+  private val existingTable = "existing_table"
+
+  private def captureStagedTableWrite(thunk: => Unit): SparkPlan = {
+    val physicalPlans = withQueryExecutionsCaptured(spark)(thunk).map(_.executedPlan)
+    val stagedTableWrites = physicalPlans.filter {
+      case _: AtomicCreateTableAsSelectExec | _: CreateTableAsSelectExec |
+           _: AtomicReplaceTableAsSelectExec | _: ReplaceTableAsSelectExec |
+           _: AtomicReplaceTableExec | _: ReplaceTableExec => true
+      case _ => false
+    }
+    assert(stagedTableWrites.size === 1)
+    stagedTableWrites.head
+  }
+
+  private def commands: Seq[String => Unit] = Seq(
+    { catalogName =>
+      sql(s"CREATE TABLE $catalogName.$nonExistingTable AS SELECT * FROM $existingTable") },
+    { catalogName =>
+      spark.table(existingTable).write.saveAsTable(s"$catalogName.$nonExistingTable") },
+    { catalogName =>
+      sql(s"CREATE OR REPLACE TABLE $catalogName.$nonExistingTable " +
+          s"AS SELECT * FROM $existingTable") },
+    { catalogName =>
+      sql(s"REPLACE TABLE $catalogName.$existingTable AS SELECT * FROM $existingTable") },
+    { catalogName =>
+        spark.table(existingTable)
+          .write.mode("overwrite").saveAsTable(s"$catalogName.$existingTable") },
+    { catalogName =>
+      sql(s"REPLACE TABLE $catalogName.$existingTable (id bigint, data string)") })
+
+  private def catalogCommitMetricsTest(
+      testName: String, catalogName: String)(testFunction: SparkPlan => Unit): Unit = {
+    commands.foreach { command =>
+      test(s"$testName - $command") {
+        registerCatalog(testCatalog, classOf[InMemoryTableCatalog])
+        registerCatalog(atomicTestCatalog, classOf[StagingInMemoryTableCatalogWithMetrics])
+        withTable(existingTable, s"$catalogName.$existingTable") {
+          sql(s"CREATE TABLE $existingTable (id bigint, data string)")
+          sql(s"CREATE TABLE $catalogName.$existingTable (id bigint, data string)")
+
+          testFunction(captureStagedTableWrite(command(catalogName)))
+        }
+      }
+    }
+  }
+
+  catalogCommitMetricsTest(
+      "No metrics in the plan if the catalog does not support them", testCatalog) { sparkPlan =>
+    val metrics = sparkPlan.metrics
+
+    assert(metrics.isEmpty)
+  }
+
+  catalogCommitMetricsTest(
+      "Plan metrics values are the values from the catalog", atomicTestCatalog) { sparkPlan =>
+    val metrics = sparkPlan.metrics
+
+    assert(metrics.size === StagingInMemoryTableCatalogWithMetrics.testMetrics.length)
+    StagingInMemoryTableCatalogWithMetrics.testMetrics.foreach(customTaskMetric =>
+      assert(metrics(customTaskMetric.name()).value === customTaskMetric.value()))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2OptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2OptionSuite.scala
new file mode 100644
index 0000000000000..70291336ba317
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2OptionSuite.scala
@@ -0,0 +1,327 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.QueryTest.withQueryExecutionsCaptured
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.connector.catalog.InMemoryBaseTable
+import org.apache.spark.sql.execution.CommandResultExec
+import org.apache.spark.sql.execution.datasources.v2._
+import org.apache.spark.sql.functions.lit
+
+class DataSourceV2OptionSuite extends DatasourceV2SQLBase {
+  import testImplicits._
+
+  private val catalogAndNamespace = "testcat.ns1.ns2."
+
+  test("SPARK-36680: Supports Dynamic Table Options for SQL Select") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+
+      var df = sql(s"SELECT * FROM $t1")
+      var collected = df.queryExecution.optimizedPlan.collect {
+        case scan: DataSourceV2ScanRelation =>
+          assert(scan.relation.options.isEmpty)
+      }
+      assert (collected.size == 1)
+      checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
+
+      df = sql(s"SELECT * FROM $t1 WITH (`split-size` = 5)")
+      collected = df.queryExecution.optimizedPlan.collect {
+        case scan: DataSourceV2ScanRelation =>
+          assert(scan.relation.options.get("split-size") == "5")
+      }
+      assert (collected.size == 1)
+      checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
+
+      collected = df.queryExecution.executedPlan.collect {
+        case BatchScanExec(_, scan: InMemoryBaseTable#InMemoryBatchScan, _, _, _, _) =>
+          assert(scan.options.get("split-size") === "5")
+      }
+      assert (collected.size == 1)
+
+      val noValues = intercept[AnalysisException](
+        sql(s"SELECT * FROM $t1 WITH (`split-size`)"))
+      assert(noValues.message.contains(
+        "Operation not allowed: Values must be specified for key(s): [split-size]"))
+    }
+  }
+
+  test("SPARK-50286: Propagate options for DataFrameReader") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+
+      var df = spark.table(t1)
+      var collected = df.queryExecution.optimizedPlan.collect {
+        case scan: DataSourceV2ScanRelation =>
+          assert(scan.relation.options.isEmpty)
+      }
+      assert (collected.size == 1)
+      checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
+
+      df = spark.read.option("split-size", "5").table(t1)
+      collected = df.queryExecution.optimizedPlan.collect {
+        case scan: DataSourceV2ScanRelation =>
+          assert(scan.relation.options.get("split-size") == "5")
+      }
+      assert (collected.size == 1)
+      checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
+
+      collected = df.queryExecution.executedPlan.collect {
+        case BatchScanExec(_, scan: InMemoryBaseTable#InMemoryBatchScan, _, _, _, _) =>
+          assert(scan.options.get("split-size") === "5")
+      }
+      assert (collected.size == 1)
+    }
+  }
+
+  test("SPARK-49098, SPARK-50286: Supports Dynamic Table Options for SQL Insert") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      val df = sql(s"INSERT INTO $t1 WITH (`write.split-size` = 10) VALUES (1, 'a'), (2, 'b')")
+
+      var collected = df.queryExecution.optimizedPlan.collect {
+        case CommandResult(_, AppendData(relation: DataSourceV2Relation, _, _, _, _, _), _, _) =>
+          assert(relation.options.get("write.split-size") == "10")
+      }
+      assert (collected.size == 1)
+
+      collected = df.queryExecution.executedPlan.collect {
+        case CommandResultExec(
+          _, AppendDataExec(_, _, write),
+          _) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#Append]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      val insertResult = sql(s"SELECT * FROM $t1")
+      checkAnswer(insertResult, Seq(Row(1, "a"), Row(2, "b")))
+    }
+  }
+
+  test("SPARK-50286: Propagate options for DataFrameWriter Append") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      val captured = withQueryExecutionsCaptured(spark) {
+        Seq(1 -> "a", 2 -> "b").toDF("id", "data")
+          .write
+          .option("write.split-size", "10")
+          .mode("append")
+          .insertInto(t1)
+      }
+      assert(captured.size === 1)
+      val qe = captured.head
+      var collected = qe.optimizedPlan.collect {
+        case AppendData(_: DataSourceV2Relation, _, writeOptions, _, _, _) =>
+          assert(writeOptions("write.split-size") == "10")
+      }
+      assert (collected.size == 1)
+
+      collected = qe.executedPlan.collect {
+        case AppendDataExec(_, _, write) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#Append]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+    }
+  }
+
+  test("SPARK-50286: Propagate options for DataFrameWriterV2 Append") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      val captured = withQueryExecutionsCaptured(spark) {
+        Seq(1 -> "a", 2 -> "b").toDF("id", "data")
+          .writeTo(t1)
+          .option("write.split-size", "10")
+          .append()
+      }
+      assert(captured.size === 1)
+      val qe = captured.head
+      var collected = qe.optimizedPlan.collect {
+        case AppendData(_: DataSourceV2Relation, _, writeOptions, _, _, _) =>
+          assert(writeOptions("write.split-size") == "10")
+      }
+      assert (collected.size == 1)
+
+      collected = qe.executedPlan.collect {
+        case AppendDataExec(_, _, write) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#Append]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+    }
+  }
+
+  test("SPARK-49098, SPARK-50286: Supports Dynamic Table Options for SQL Insert Overwrite") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+
+      val df = sql(s"INSERT OVERWRITE $t1 WITH (`write.split-size` = 10) " +
+        s"VALUES (3, 'c'), (4, 'd')")
+      var collected = df.queryExecution.optimizedPlan.collect {
+        case CommandResult(_,
+          OverwriteByExpression(relation: DataSourceV2Relation, _, _, _, _, _, _),
+          _, _) =>
+          assert(relation.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      collected = df.queryExecution.executedPlan.collect {
+        case CommandResultExec(
+          _, OverwriteByExpressionExec(_, _, write),
+          _) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#TruncateAndAppend]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      val insertResult = sql(s"SELECT * FROM $t1")
+      checkAnswer(insertResult, Seq(Row(3, "c"), Row(4, "d")))
+    }
+  }
+
+  test("SPARK-50286: Propagate options for DataFrameWriterV2 OverwritePartitions") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+
+      val captured = withQueryExecutionsCaptured(spark) {
+        Seq(3 -> "c", 4 -> "d").toDF("id", "data")
+          .writeTo(t1)
+          .option("write.split-size", "10")
+          .overwritePartitions()
+      }
+      assert(captured.size === 1)
+      val qe = captured.head
+      var collected = qe.optimizedPlan.collect {
+        case OverwritePartitionsDynamic(_: DataSourceV2Relation, _, writeOptions, _, _) =>
+          assert(writeOptions("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      collected = qe.executedPlan.collect {
+        case OverwritePartitionsDynamicExec(_, _, write) =>
+          val dynOverwrite = write.toBatch.asInstanceOf[InMemoryBaseTable#DynamicOverwrite]
+          assert(dynOverwrite.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+    }
+  }
+
+  test("SPARK-49098, SPARK-50286: Supports Dynamic Table Options for SQL Insert Replace") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+
+      val df = sql(s"INSERT INTO $t1 WITH (`write.split-size` = 10) " +
+        s"REPLACE WHERE TRUE " +
+        s"VALUES (3, 'c'), (4, 'd')")
+      var collected = df.queryExecution.optimizedPlan.collect {
+        case CommandResult(_,
+          OverwriteByExpression(relation: DataSourceV2Relation, _, _, _, _, _, _),
+          _, _) =>
+          assert(relation.options.get("write.split-size") == "10")
+      }
+      assert (collected.size == 1)
+
+      collected = df.queryExecution.executedPlan.collect {
+        case CommandResultExec(
+          _, OverwriteByExpressionExec(_, _, write),
+          _) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#TruncateAndAppend]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      val insertResult = sql(s"SELECT * FROM $t1")
+      checkAnswer(insertResult, Seq(Row(3, "c"), Row(4, "d")))
+    }
+  }
+
+  test("SPARK-50286: Propagate options for DataFrameWriter Overwrite") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      val captured = withQueryExecutionsCaptured(spark) {
+        Seq(1 -> "a", 2 -> "b").toDF("id", "data")
+          .write
+          .option("write.split-size", "10")
+          .mode("overwrite")
+          .insertInto(t1)
+      }
+      assert(captured.size === 1)
+
+      val qe = captured.head
+      var collected = qe.optimizedPlan.collect {
+        case OverwriteByExpression(_: DataSourceV2Relation, _, _, writeOptions, _, _, _) =>
+          assert(writeOptions("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      collected = qe.executedPlan.collect {
+        case OverwriteByExpressionExec(_, _, write) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#TruncateAndAppend]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+    }
+  }
+
+  test("SPARK-50286: Propagate options for DataFrameWriterV2 Overwrite") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+
+      val captured = withQueryExecutionsCaptured(spark) {
+        Seq(3 -> "c", 4 -> "d").toDF("id", "data")
+          .writeTo(t1)
+          .option("write.split-size", "10")
+          .overwrite(lit(true))
+      }
+      assert(captured.size === 1)
+      val qe = captured.head
+
+      var collected = qe.optimizedPlan.collect {
+        case OverwriteByExpression(_: DataSourceV2Relation, _, _, writeOptions, _, _, _) =>
+          assert(writeOptions("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      collected = qe.executedPlan.collect {
+        case OverwriteByExpressionExec(_, _, write) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#TruncateAndAppend]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 510ea49b58418..8d255e9efda54 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.CurrentUserContext.CURRENT_USER
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchNamespaceException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, ColumnStat, CommandResult, OverwriteByExpression}
+import org.apache.spark.sql.catalyst.plans.logical.ColumnStat
 import org.apache.spark.sql.catalyst.statsEstimation.StatsEstimationTestBase
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog.{Column => ColumnV2, _}
@@ -44,7 +44,6 @@ import org.apache.spark.sql.execution.FilterExec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelationWithTable}
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
@@ -279,8 +278,8 @@ class DataSourceV2SQLSuiteV1Filter
 
   test("CreateTable: without USING clause") {
     withSQLConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key -> "false") {
-      // unset this config to use the default v2 session catalog.
-      spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+      // use the default v2 session catalog.
+      spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION, "builtin")
       val testCatalog = catalog("testcat").asTableCatalog
 
       sql("CREATE TABLE testcat.t1 (id int)")
@@ -786,8 +785,8 @@ class DataSourceV2SQLSuiteV1Filter
   }
 
   test("CreateTableAsSelect: v2 session catalog can load v1 source table") {
-    // unset this config to use the default v2 session catalog.
-    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    // use the default v2 session catalog.
+    spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION, "builtin")
 
     val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
     df.createOrReplaceTempView("source")
@@ -847,8 +846,8 @@ class DataSourceV2SQLSuiteV1Filter
 
   // TODO: ignored by SPARK-31707, restore the test after create table syntax unification
   ignore("CreateTableAsSelect: without USING clause") {
-    // unset this config to use the default v2 session catalog.
-    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    // use the default v2 session catalog.
+    spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION, "builtin")
     val testCatalog = catalog("testcat").asTableCatalog
 
     sql("CREATE TABLE testcat.t1 AS SELECT 1 i")
@@ -1087,11 +1086,11 @@ class DataSourceV2SQLSuiteV1Filter
     Seq(true, false).foreach { useV1Table =>
       val format = if (useV1Table) "json" else v2Format
       if (useV1Table) {
-        // unset this config to use the default v2 session catalog.
-        spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+        // use the default v2 session catalog.
+        spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION, "builtin")
       } else {
         spark.conf.set(
-          V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[InMemoryTableSessionCatalog].getName)
+          V2_SESSION_CATALOG_IMPLEMENTATION, classOf[InMemoryTableSessionCatalog].getName)
       }
 
       withTable("t") {
@@ -1261,8 +1260,12 @@ class DataSourceV2SQLSuiteV1Filter
       PROP_OWNER -> "it will be set to the current user",
       PROP_EXTERNAL -> "please use CREATE EXTERNAL TABLE"
     )
+    val excludedProperties = Set(TableCatalog.PROP_COMMENT, TableCatalog.PROP_COLLATION)
+    val tableLegacyProperties = CatalogV2Util.TABLE_RESERVED_PROPERTIES
+      .filterNot(excludedProperties.contains)
+
     withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "false")) {
-      CatalogV2Util.TABLE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+      tableLegacyProperties.foreach { key =>
         Seq("OPTIONS", "TBLPROPERTIES").foreach { clause =>
           Seq("CREATE", "REPLACE").foreach { action =>
             val sqlText = s"$action TABLE testcat.reservedTest (key int) " +
@@ -1315,7 +1318,7 @@ class DataSourceV2SQLSuiteV1Filter
       }
     }
     withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) {
-      CatalogV2Util.TABLE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+      tableLegacyProperties.foreach { key =>
         Seq("OPTIONS", "TBLPROPERTIES").foreach { clause =>
           withTable("testcat.reservedTest") {
             Seq("CREATE", "REPLACE").foreach { action =>
@@ -1812,8 +1815,8 @@ class DataSourceV2SQLSuiteV1Filter
   }
 
   test("SPARK-46972: asymmetrical replacement for char/varchar in V2SessionCatalog.createTable") {
-    // unset this config to use the default v2 session catalog.
-    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    // use the default v2 session catalog.
+    spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION, "builtin")
     withTable("t") {
       sql(s"CREATE TABLE t(c char(1), v varchar(2)) USING $v2Source")
     }
@@ -2530,8 +2533,8 @@ class DataSourceV2SQLSuiteV1Filter
   }
 
   test("SPARK-30001: session catalog name can be specified in SQL statements") {
-    // unset this config to use the default v2 session catalog.
-    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    // use the default v2 session catalog.
+    spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION, "builtin")
 
     withTable("t") {
       sql("CREATE TABLE t USING json AS SELECT 1 AS i")
@@ -2595,8 +2598,8 @@ class DataSourceV2SQLSuiteV1Filter
   }
 
   test("SPARK-30094: current namespace is used during table resolution") {
-    // unset this config to use the default v2 session catalog.
-    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    // use the default v2 session catalog.
+    spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION, "builtin")
 
     withTable("spark_catalog.default.t", "testcat.ns.t") {
       sql("CREATE TABLE t USING parquet AS SELECT 1")
@@ -2610,8 +2613,8 @@ class DataSourceV2SQLSuiteV1Filter
   }
 
   test("SPARK-30284: CREATE VIEW should track the current catalog and namespace") {
-    // unset this config to use the default v2 session catalog.
-    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    // use the default v2 session catalog.
+    spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION, "builtin")
     val sessionCatalogName = CatalogManager.SESSION_CATALOG_NAME
 
     sql("CREATE NAMESPACE testcat.ns1.ns2")
@@ -2648,8 +2651,8 @@ class DataSourceV2SQLSuiteV1Filter
   }
 
   test("COMMENT ON NAMESPACE") {
-    // unset this config to use the default v2 session catalog.
-    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    // use the default v2 session catalog.
+    spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION, "builtin")
     // Session catalog is used.
     sql("CREATE NAMESPACE ns")
     checkNamespaceComment("ns", "minor revision")
@@ -2682,8 +2685,8 @@ class DataSourceV2SQLSuiteV1Filter
   }
 
   test("COMMENT ON TABLE") {
-    // unset this config to use the default v2 session catalog.
-    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    // use the default v2 session catalog.
+    spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION, "builtin")
     // Session catalog is used.
     withTable("t") {
       sql("CREATE TABLE t(k int) USING json")
@@ -3390,6 +3393,7 @@ class DataSourceV2SQLSuiteV1Filter
            |TBLPROPERTIES ('prop1' = '1', 'prop2' = '2')
            |PARTITIONED BY (a)
            |LOCATION '/tmp'
+           |DEFAULT COLLATION sr_CI_AI
         """.stripMargin)
 
       val table = spark.sessionState.catalogManager.v2SessionCatalog.asTableCatalog
@@ -3397,6 +3401,7 @@ class DataSourceV2SQLSuiteV1Filter
       val properties = table.properties
       assert(properties.get(TableCatalog.PROP_PROVIDER) == "parquet")
       assert(properties.get(TableCatalog.PROP_COMMENT) == "This is a comment")
+      assert(properties.get(TableCatalog.PROP_COLLATION) == "sr_CI_AI")
       assert(properties.get(TableCatalog.PROP_LOCATION) == "file:/tmp")
       assert(properties.containsKey(TableCatalog.PROP_OWNER))
       assert(properties.get(TableCatalog.PROP_EXTERNAL) == "true")
@@ -3634,96 +3639,6 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
-
-  test("SPARK-36680: Supports Dynamic Table Options for Spark SQL") {
-    val t1 = s"${catalogAndNamespace}table"
-    withTable(t1) {
-      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
-      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
-
-      var df = sql(s"SELECT * FROM $t1")
-      var collected = df.queryExecution.optimizedPlan.collect {
-        case scan: DataSourceV2ScanRelation =>
-          assert(scan.relation.options.isEmpty)
-      }
-      assert (collected.size == 1)
-      checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
-
-      df = sql(s"SELECT * FROM $t1 WITH (`split-size` = 5)")
-      collected = df.queryExecution.optimizedPlan.collect {
-        case scan: DataSourceV2ScanRelation =>
-          assert(scan.relation.options.get("split-size") == "5")
-      }
-      assert (collected.size == 1)
-      checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
-
-      val noValues = intercept[AnalysisException](
-        sql(s"SELECT * FROM $t1 WITH (`split-size`)"))
-      assert(noValues.message.contains(
-        "Operation not allowed: Values must be specified for key(s): [split-size]"))
-    }
-  }
-
-  test("SPARK-36680: Supports Dynamic Table Options for Insert") {
-    val t1 = s"${catalogAndNamespace}table"
-    withTable(t1) {
-      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
-      val df = sql(s"INSERT INTO $t1 WITH (`write.split-size` = 10) VALUES (1, 'a'), (2, 'b')")
-
-      val collected = df.queryExecution.optimizedPlan.collect {
-        case CommandResult(_, AppendData(relation: DataSourceV2Relation, _, _, _, _, _), _, _) =>
-          assert(relation.options.get("write.split-size") == "10")
-      }
-      assert (collected.size == 1)
-
-      val insertResult = sql(s"SELECT * FROM $t1")
-      checkAnswer(insertResult, Seq(Row(1, "a"), Row(2, "b")))
-    }
-  }
-
-  test("SPARK-36680: Supports Dynamic Table Options for Insert Overwrite") {
-    val t1 = s"${catalogAndNamespace}table"
-    withTable(t1) {
-      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
-      sql(s"INSERT INTO $t1 WITH (`write.split-size` = 10) VALUES (1, 'a'), (2, 'b')")
-
-      val df = sql(s"INSERT OVERWRITE $t1 WITH (`write.split-size` = 10) " +
-        s"VALUES (3, 'c'), (4, 'd')")
-      val collected = df.queryExecution.optimizedPlan.collect {
-        case CommandResult(_,
-          OverwriteByExpression(relation: DataSourceV2Relation, _, _, _, _, _, _),
-          _, _) =>
-          assert(relation.options.get("write.split-size") == "10")
-      }
-      assert (collected.size == 1)
-
-      val insertResult = sql(s"SELECT * FROM $t1")
-      checkAnswer(insertResult, Seq(Row(3, "c"), Row(4, "d")))
-    }
-  }
-
-  test("SPARK-36680: Supports Dynamic Table Options for Insert Replace") {
-    val t1 = s"${catalogAndNamespace}table"
-    withTable(t1) {
-      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
-      sql(s"INSERT INTO $t1 WITH (`write.split-size` = 10) VALUES (1, 'a'), (2, 'b')")
-
-      val df = sql(s"INSERT INTO $t1 WITH (`write.split-size` = 10) " +
-        s"REPLACE WHERE TRUE " +
-        s"VALUES (3, 'c'), (4, 'd')")
-      val collected = df.queryExecution.optimizedPlan.collect {
-        case CommandResult(_,
-          OverwriteByExpression(relation: DataSourceV2Relation, _, _, _, _, _, _),
-          _, _) =>
-          assert(relation.options.get("write.split-size") == "10")
-      }
-      assert (collected.size == 1)
-
-      val insertResult = sql(s"SELECT * FROM $t1")
-      checkAnswer(insertResult, Seq(Row(3, "c"), Row(4, "d")))
-    }
-  }
-
   test("SPARK-49183: custom spark_catalog generates location for managed tables") {
     // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
     // configure a new implementation.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTests.scala
index fd022580db42b..26f64ceb33fe3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTests.scala
@@ -100,8 +100,8 @@ trait DeleteFromTests extends DatasourceV2SQLBase {
   }
 
   test("DeleteFrom: DELETE is only supported with v2 tables") {
-    // unset this config to use the default v2 session catalog.
-    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    // use the default v2 session catalog.
+    spark.conf.set(V2_SESSION_CATALOG_IMPLEMENTATION, "builtin")
     val v1Table = "tbl"
     withTable(v1Table) {
       sql(s"CREATE TABLE $v1Table" +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
index 152896499010c..c24f52bd93070 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
@@ -370,6 +370,62 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
     checkAnswer(df.sort("res"), Seq(Row(10.0), Row(15.5), Row(41.0)))
   }
 
+  test("SPARK-48655: order by on partition keys should not introduce additional shuffle") {
+    val items_partitions = Array(identity("price"), identity("id"))
+    createTable(items, itemsColumns, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+      s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+      s"(1, 'aa', 41.0, cast('2020-01-02' as timestamp)), " +
+      s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+      s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp)), " +
+      s"(null, 'cc', 15.5, cast('2020-02-01' as timestamp)), " +
+      s"(3, 'cc', null, cast('2020-02-01' as timestamp))")
+
+    Seq(true, false).foreach { sortingEnabled =>
+      withSQLConf(SQLConf.V2_BUCKETING_SORTING_ENABLED.key -> sortingEnabled.toString) {
+
+        def verifyShuffle(cmd: String, answer: Seq[Row]): Unit = {
+          val df = sql(cmd)
+          if (sortingEnabled) {
+            assert(collectAllShuffles(df.queryExecution.executedPlan).isEmpty,
+              "should contain no shuffle when sorting by partition values")
+          } else {
+            assert(collectAllShuffles(df.queryExecution.executedPlan).size == 1,
+              "should contain one shuffle when optimization is disabled")
+          }
+          checkAnswer(df, answer)
+        }: Unit
+
+        verifyShuffle(
+          s"SELECT price, id FROM testcat.ns.$items ORDER BY price ASC, id ASC",
+          Seq(Row(null, 3), Row(10.0, 2), Row(15.5, null),
+            Row(15.5, 3), Row(40.0, 1), Row(41.0, 1)))
+
+        verifyShuffle(
+          s"SELECT price, id FROM testcat.ns.$items " +
+            s"ORDER BY price ASC NULLS LAST, id ASC NULLS LAST",
+          Seq(Row(10.0, 2), Row(15.5, 3), Row(15.5, null),
+            Row(40.0, 1), Row(41.0, 1), Row(null, 3)))
+
+        verifyShuffle(
+          s"SELECT price, id FROM testcat.ns.$items ORDER BY price DESC, id ASC",
+          Seq(Row(41.0, 1), Row(40.0, 1), Row(15.5, null),
+            Row(15.5, 3), Row(10.0, 2), Row(null, 3)))
+
+        verifyShuffle(
+          s"SELECT price, id FROM testcat.ns.$items ORDER BY price DESC, id DESC",
+          Seq(Row(41.0, 1), Row(40.0, 1), Row(15.5, 3),
+            Row(15.5, null), Row(10.0, 2), Row(null, 3)))
+
+        verifyShuffle(
+          s"SELECT price, id FROM testcat.ns.$items " +
+            s"ORDER BY price DESC NULLS FIRST, id DESC NULLS FIRST",
+          Seq(Row(null, 3), Row(41.0, 1), Row(40.0, 1),
+            Row(15.5, null), Row(15.5, 3), Row(10.0, 2)));
+      }
+    }
+  }
+
   test("SPARK-49179: Fix v2 multi bucketed inner joins throw AssertionError") {
     val cols = Array(
       Column.create("id", LongType),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
index 04fc7e23ebb24..68c2a01c69aea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
@@ -24,7 +24,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode, SparkSession, SQLContext}
-import org.apache.spark.sql.QueryTest.withPhysicalPlansCaptured
+import org.apache.spark.sql.QueryTest.withQueryExecutionsCaptured
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -213,8 +213,8 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
         .getOrCreate()
 
       def captureWrite(sparkSession: SparkSession)(thunk: => Unit): SparkPlan = {
-        val physicalPlans = withPhysicalPlansCaptured(sparkSession, thunk)
-        val v1FallbackWritePlans = physicalPlans.filter {
+        val queryExecutions = withQueryExecutionsCaptured(sparkSession)(thunk)
+        val v1FallbackWritePlans = queryExecutions.map(_.executedPlan).filter {
           case _: AppendDataExecV1 | _: OverwriteByExpressionExecV1 => true
           case _ => false
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
index 5091c72ef96ac..67fca09802139 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
@@ -53,7 +53,8 @@ class V2CommandsCaseSensitivitySuite
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         Seq("ID", "iD").foreach { ref =>
           val tableSpec =
-            UnresolvedTableSpec(Map.empty, None, OptionList(Seq.empty), None, None, None, false)
+            UnresolvedTableSpec(Map.empty, None, OptionList(Seq.empty),
+              None, None, None, None, false)
           val plan = CreateTableAsSelect(
             UnresolvedIdentifier(Array("table_name").toImmutableArraySeq),
             Expressions.identity(ref) :: Nil,
@@ -77,7 +78,8 @@ class V2CommandsCaseSensitivitySuite
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         Seq("POINT.X", "point.X", "poInt.x", "poInt.X").foreach { ref =>
           val tableSpec =
-            UnresolvedTableSpec(Map.empty, None, OptionList(Seq.empty), None, None, None, false)
+            UnresolvedTableSpec(Map.empty, None, OptionList(Seq.empty),
+              None, None, None, None, false)
           val plan = CreateTableAsSelect(
             UnresolvedIdentifier(Array("table_name").toImmutableArraySeq),
             Expressions.bucket(4, ref) :: Nil,
@@ -102,7 +104,8 @@ class V2CommandsCaseSensitivitySuite
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         Seq("ID", "iD").foreach { ref =>
           val tableSpec =
-            UnresolvedTableSpec(Map.empty, None, OptionList(Seq.empty), None, None, None, false)
+            UnresolvedTableSpec(Map.empty, None, OptionList(Seq.empty),
+              None, None, None, None, false)
           val plan = ReplaceTableAsSelect(
             UnresolvedIdentifier(Array("table_name").toImmutableArraySeq),
             Expressions.identity(ref) :: Nil,
@@ -126,7 +129,8 @@ class V2CommandsCaseSensitivitySuite
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         Seq("POINT.X", "point.X", "poInt.x", "poInt.X").foreach { ref =>
           val tableSpec =
-            UnresolvedTableSpec(Map.empty, None, OptionList(Seq.empty), None, None, None, false)
+            UnresolvedTableSpec(Map.empty, None, OptionList(Seq.empty),
+              None, None, None, None, false)
           val plan = ReplaceTableAsSelect(
             UnresolvedIdentifier(Array("table_name").toImmutableArraySeq),
             Expressions.bucket(4, ref) :: Nil,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/functions/V2FunctionBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/functions/V2FunctionBenchmark.scala
index 1401048cf705d..a5f0285bf2eff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/functions/V2FunctionBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/functions/V2FunctionBenchmark.scala
@@ -21,15 +21,16 @@ import test.org.apache.spark.sql.connector.catalog.functions.JavaLongAdd
 import test.org.apache.spark.sql.connector.catalog.functions.JavaLongAdd.{JavaLongAddDefault, JavaLongAddMagic, JavaLongAddStaticMagic}
 
 import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{BinaryArithmetic, EvalMode, Expression}
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode._
 import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryCatalog}
 import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, ScalarFunction, UnboundFunction}
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
 import org.apache.spark.sql.functions.col
-import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{AbstractDataType, DataType, LongType, NumericType, StructType}
 
@@ -64,6 +65,7 @@ object V2FunctionBenchmark extends SqlBasedBenchmark {
       N: Long,
       codegenEnabled: Boolean,
       resultNullable: Boolean): Unit = {
+    import spark.toRichColumn
     withSQLConf(s"spark.sql.catalog.$catalogName" -> classOf[InMemoryCatalog].getName) {
       createFunction("java_long_add_default",
         new JavaLongAdd(new JavaLongAddDefault(resultNullable)))
@@ -81,7 +83,9 @@ object V2FunctionBenchmark extends SqlBasedBenchmark {
             s"codegen = $codegenEnabled"
         val benchmark = new Benchmark(name, N, output = output)
         benchmark.addCase(s"native_long_add", numIters = 3) { _ =>
-          spark.range(N).select(NativeAdd(col("id"), col("id"), resultNullable)).noop()
+          spark.range(N)
+            .select(Column(NativeAdd(col("id").expr, col("id").expr, resultNullable)))
+            .noop()
         }
         Seq("java_long_add_default", "java_long_add_magic", "java_long_add_static_magic",
             "scala_long_add_default", "scala_long_add_magic").foreach { functionName =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 92c175fe2f94a..779b5ba530aa6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -901,6 +901,23 @@ class QueryCompilationErrorsSuite
     }
   }
 
+  test("SPARK-50779: the object level collations feature is unsupported when flag is disabled") {
+    withSQLConf(SQLConf.OBJECT_LEVEL_COLLATIONS_ENABLED.key -> "false") {
+      Seq(
+        "CREATE TABLE t (c STRING) USING parquet DEFAULT COLLATION UNICODE",
+        "REPLACE TABLE t (c STRING) USING parquet DEFAULT COLLATION UNICODE_CI",
+        "ALTER TABLE t DEFAULT COLLATION sr_CI_AI",
+        "CREATE VIEW v DEFAULT COLLATION UNICODE as SELECT * FROM t",
+        "CREATE TEMPORARY VIEW v DEFAULT COLLATION UTF8_LCASE as SELECT * FROM t"
+      ).foreach { sqlText =>
+        checkError(
+          exception = intercept[AnalysisException](sql(sqlText)),
+          condition = "UNSUPPORTED_FEATURE.OBJECT_LEVEL_COLLATIONS"
+        )
+      }
+    }
+  }
+
   test("UNSUPPORTED_CALL: call the unsupported method update()") {
     checkError(
       exception = intercept[SparkUnsupportedOperationException] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala
index f07d2d6620f72..fde5a32e722f4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala
@@ -145,7 +145,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
         sql("select array(1, 2, 3, 4, 5)[8]").collect()
       },
       condition = "INVALID_ARRAY_INDEX",
-      parameters = Map("indexValue" -> "8", "arraySize" -> "5", "ansiConfig" -> ansiConf),
+      parameters = Map("indexValue" -> "8", "arraySize" -> "5"),
       context = ExpectedContext(fragment = "array(1, 2, 3, 4, 5)[8]", start = 7, stop = 29))
 
     checkError(
@@ -153,7 +153,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
         OneRowRelation().select(lit(Array(1, 2, 3, 4, 5))(8)).collect()
       },
       condition = "INVALID_ARRAY_INDEX",
-      parameters = Map("indexValue" -> "8", "arraySize" -> "5", "ansiConfig" -> ansiConf),
+      parameters = Map("indexValue" -> "8", "arraySize" -> "5"),
       context = ExpectedContext(
         fragment = "apply",
         callSitePattern = getCurrentClassCallSitePattern))
@@ -165,7 +165,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
         sql("select element_at(array(1, 2, 3, 4, 5), 8)").collect()
       },
       condition = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
-      parameters = Map("indexValue" -> "8", "arraySize" -> "5", "ansiConfig" -> ansiConf),
+      parameters = Map("indexValue" -> "8", "arraySize" -> "5"),
       context = ExpectedContext(
         fragment = "element_at(array(1, 2, 3, 4, 5), 8)",
         start = 7,
@@ -176,7 +176,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
         OneRowRelation().select(element_at(lit(Array(1, 2, 3, 4, 5)), 8)).collect()
       },
       condition = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
-      parameters = Map("indexValue" -> "8", "arraySize" -> "5", "ansiConfig" -> ansiConf),
+      parameters = Map("indexValue" -> "8", "arraySize" -> "5"),
       context =
         ExpectedContext(fragment = "element_at", callSitePattern = getCurrentClassCallSitePattern))
   }
@@ -240,8 +240,8 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       },
       condition = "CANNOT_PARSE_TIMESTAMP",
       parameters = Map(
-        "message" -> "Text 'abc' could not be parsed at index 0",
-        "ansiConfig" -> ansiConf)
+        "func" -> "`try_to_timestamp`",
+        "message" -> "Text 'abc' could not be parsed at index 0")
     )
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index 1adb1fdf05032..17c3c1e1e2a70 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -351,7 +351,7 @@ class QueryExecutionErrorsSuite
         sql("select timestampadd(YEAR, 1000000, timestamp'2022-03-09 01:02:03')").collect()
       },
       condition = "DATETIME_OVERFLOW",
-      parameters = Map("operation" -> "add 1000000 YEAR to TIMESTAMP '2022-03-09 01:02:03'"),
+      parameters = Map("operation" -> "add 1000000L YEAR to TIMESTAMP '2022-03-09 01:02:03'"),
       sqlState = "22008")
   }
 
@@ -1258,6 +1258,22 @@ class QueryExecutionErrorsSuite
       )
     )
   }
+
+  test("SPARK-50485: Unwrap SparkThrowable in UEE thrown by tableRelationCache") {
+    withTable("t") {
+      sql("CREATE TABLE t (a INT)")
+      checkError(
+        exception = intercept[SparkUnsupportedOperationException] {
+          sql("ALTER TABLE t SET LOCATION 'https://mister/spark'")
+        },
+        condition = "FAILED_READ_FILE.UNSUPPORTED_FILE_SYSTEM",
+        parameters = Map(
+          "path" -> "https://mister/spark",
+          "fileSystemClass" -> "org.apache.hadoop.fs.http.HttpsFileSystem",
+          "method" -> "listStatus"))
+      sql("ALTER TABLE t SET LOCATION '/mister/spark'")
+    }
+  }
 }
 
 class FakeFileSystemSetPermission extends LocalFileSystem {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
index 0078c3f9f65de..31b002a1e245d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
@@ -32,10 +32,10 @@ import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark sql test jar>
- *   2. build/sbt build/sbt ";project sql;set javaOptions
- *        in Test += \"-Dspark.memory.debugFill=false\";Test/runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt ";project sql;set javaOptions
- *        in Test += \"-Dspark.memory.debugFill=false\";Test/runMain <this class>"
+ *   2. build/sbt build/sbt ";project sql;
+ *        set Test / javaOptions += \"-Dspark.memory.debugFill=false\";Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt ";project sql;
+ *        set Test / javaOptions += \"-Dspark.memory.debugFill=false\";Test/runMain <this class>"
  *      Results will be written to
  *      "benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt".
  * }}}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffsetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffsetSuite.scala
index 8d640a1840f4c..d1b11a74cf35f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffsetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffsetSuite.scala
@@ -17,10 +17,13 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.{Dataset, QueryTest}
+import org.apache.spark.sql.IntegratedUDFTestUtils._
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.functions.rand
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.IntegerType
 
 class InsertSortForLimitAndOffsetSuite extends QueryTest
   with SharedSparkSession
@@ -51,6 +54,7 @@ class InsertSortForLimitAndOffsetSuite extends QueryTest
   private def hasLocalSort(plan: SparkPlan): Boolean = {
     find(plan) {
       case GlobalLimitExec(_, s: SortExec, _) => !s.global
+      case GlobalLimitExec(_, ProjectExec(_, s: SortExec), _) => !s.global
       case _ => false
     }.isDefined
   }
@@ -91,12 +95,16 @@ class InsertSortForLimitAndOffsetSuite extends QueryTest
       // one partition to read the range-partition shuffle and there is only one shuffle block for
       // the final single-partition shuffle, random fetch order is no longer an issue.
       SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "false") {
-      val df = spark.range(10).orderBy($"id" % 8).limit(2).distinct()
-      df.collect()
-      val physicalPlan = df.queryExecution.executedPlan
-      assertHasGlobalLimitExec(physicalPlan)
-      // Extra local sort is needed for middle LIMIT
-      assert(hasLocalSort(physicalPlan))
+      val df = 1.to(10).map(v => v -> v).toDF("c1", "c2").orderBy($"c1" % 8)
+      verifySortAdded(df.limit(2))
+      verifySortAdded(df.filter($"c2" > rand()).limit(2))
+      verifySortAdded(df.select($"c2").limit(2))
+      verifySortAdded(df.filter($"c2" > rand()).select($"c2").limit(2))
+
+      assume(shouldTestPythonUDFs)
+      val pythonTestUDF = TestPythonUDF(name = "pyUDF", Some(IntegerType))
+      verifySortAdded(df.filter(pythonTestUDF($"c2") > rand()).limit(2))
+      verifySortAdded(df.select(pythonTestUDF($"c2")).limit(2))
     }
   }
 
@@ -110,11 +118,28 @@ class InsertSortForLimitAndOffsetSuite extends QueryTest
   }
 
   test("middle OFFSET preserves data ordering with the extra sort") {
-    val df = spark.range(10).orderBy($"id" % 8).offset(2).distinct()
-    df.collect()
-    val physicalPlan = df.queryExecution.executedPlan
+    val df = 1.to(10).map(v => v -> v).toDF("c1", "c2").orderBy($"c1" % 8)
+    verifySortAdded(df.offset(2))
+    verifySortAdded(df.filter($"c2" > rand()).offset(2))
+    verifySortAdded(df.select($"c2").offset(2))
+    verifySortAdded(df.filter($"c2" > rand()).select($"c2").offset(2))
+
+    assume(shouldTestPythonUDFs)
+    val pythonTestUDF = TestPythonUDF(name = "pyUDF", Some(IntegerType))
+    verifySortAdded(df.filter(pythonTestUDF($"c2") > rand()).offset(2))
+    verifySortAdded(df.select(pythonTestUDF($"c2")).offset(2))
+  }
+
+  private def verifySortAdded(df: Dataset[_]): Unit = {
+    // Do distinct to trigger a shuffle, so that the LIMIT/OFFSET below won't be planned as
+    // `CollectLimitExec`
+    val shuffled = df.distinct()
+    shuffled.collect()
+    val physicalPlan = shuffled.queryExecution.executedPlan
     assertHasGlobalLimitExec(physicalPlan)
-    // Extra local sort is needed for middle OFFSET
+    // Extra local sort is needed for middle LIMIT/OFFSET
     assert(hasLocalSort(physicalPlan))
+    // Make sure the schema does not change.
+    assert(physicalPlan.schema == shuffled.schema)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
index 974be2f627998..d670b3d8c77d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -18,11 +18,12 @@ package org.apache.spark.sql.execution
 
 import scala.collection.mutable
 import scala.io.Source
+import scala.util.Try
 
 import org.apache.spark.sql.{AnalysisException, Dataset, ExtendedExplainGenerator, FastOperator}
-import org.apache.spark.sql.catalyst.{QueryPlanningTracker, QueryPlanningTrackerCallback}
-import org.apache.spark.sql.catalyst.analysis.CurrentNamespace
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.catalyst.{QueryPlanningTracker, QueryPlanningTrackerCallback, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis.{CurrentNamespace, UnresolvedFunction, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.expressions.{Alias, UnsafeRow}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{CommandResult, LogicalPlan, OneRowRelation, Project, ShowTables, SubqueryAlias}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
@@ -405,6 +406,21 @@ class QueryExecutionSuite extends SharedSparkSession {
     }
   }
 
+  test("SPARK-50600: Failed analysis should send analyzed event") {
+    val mockCallback = MockCallback()
+
+    def table(ref: String): LogicalPlan = UnresolvedRelation(TableIdentifier(ref))
+
+    val unresolvedUndefinedFunc = UnresolvedFunction("unknown", Seq.empty, isDistinct = false)
+    val plan = Project(Seq(Alias(unresolvedUndefinedFunc, "call1")()), table("table"))
+    val dataset = Try {
+      val df = Dataset.ofRows(spark, plan, new QueryPlanningTracker(Some(mockCallback)))
+      df.queryExecution.assertAnalyzed()
+    }
+    assert(dataset.failed.get.isInstanceOf[AnalysisException])
+    mockCallback.assertAnalyzed()
+  }
+
   case class MockCallbackEagerCommand(
       var trackerAnalyzed: QueryPlanningTracker = null,
       var trackerReadyForExecution: QueryPlanningTracker = null)
@@ -447,6 +463,15 @@ class QueryExecutionSuite extends SharedSparkSession {
       var trackerAnalyzed: QueryPlanningTracker = null,
       var trackerReadyForExecution: QueryPlanningTracker = null)
       extends QueryPlanningTrackerCallback {
+    override def analysisFailed(
+        trackerFromCallback: QueryPlanningTracker,
+        analyzedPlan: LogicalPlan): Unit = {
+      trackerAnalyzed = trackerFromCallback
+      assert(!trackerAnalyzed.phases.keySet.contains(QueryPlanningTracker.ANALYSIS))
+      assert(!trackerAnalyzed.phases.keySet.contains(QueryPlanningTracker.OPTIMIZATION))
+      assert(!trackerAnalyzed.phases.keySet.contains(QueryPlanningTracker.PLANNING))
+      assert(analyzedPlan != null)
+    }
     def analyzed(trackerFromCallback: QueryPlanningTracker, plan: LogicalPlan): Unit = {
       trackerAnalyzed = trackerFromCallback
       assert(trackerAnalyzed.phases.keySet.contains(QueryPlanningTracker.ANALYSIS))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala
new file mode 100644
index 0000000000000..4da3b9ab1d06b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLFunctionSuite.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Test suite for SQL user-defined functions (UDFs).
+ */
+class SQLFunctionSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  protected override def beforeAll(): Unit = {
+    super.beforeAll()
+    Seq((0, 1), (1, 2)).toDF("a", "b").createOrReplaceTempView("t")
+  }
+
+  test("SQL scalar function") {
+    withUserDefinedFunction("area" -> false) {
+      sql(
+        """
+          |CREATE FUNCTION area(width DOUBLE, height DOUBLE)
+          |RETURNS DOUBLE
+          |RETURN width * height
+          |""".stripMargin)
+      checkAnswer(sql("SELECT area(1, 2)"), Row(2))
+      checkAnswer(sql("SELECT area(a, b) FROM t"), Seq(Row(0), Row(2)))
+    }
+  }
+
+  test("SQL scalar function with subquery in the function body") {
+    withUserDefinedFunction("foo" -> false) {
+      withTable("tbl") {
+        sql("CREATE TABLE tbl AS SELECT * FROM VALUES (1, 2), (1, 3), (2, 3) t(a, b)")
+        sql(
+          """
+            |CREATE FUNCTION foo(x INT) RETURNS INT
+            |RETURN SELECT SUM(b) FROM tbl WHERE x = a;
+            |""".stripMargin)
+        checkAnswer(sql("SELECT foo(1)"), Row(5))
+        checkAnswer(sql("SELECT foo(a) FROM t"), Seq(Row(null), Row(5)))
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 03d6eb1a50209..acc3cdb01bf3f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -93,6 +93,15 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
       parameters = Map.empty)
   }
 
+  test("SET with semi-colons") {
+    assertEqual(s"SET;", SetCommand(None))
+    assertEqual(s"SET    ;", SetCommand(None))
+    assertEqual(s"SET -v;", SetCommand(Some("-v" -> None)))
+    assertEqual(s"SET -v    ;", SetCommand(Some("-v" -> None)))
+    assertEqual(s"SET spark.sql.ansi.enabled;", SetCommand(Some("spark.sql.ansi.enabled" -> None)))
+    assertEqual(s"SET spark.sql.ansi.enabled ;", SetCommand(Some("spark.sql.ansi.enabled" -> None)))
+  }
+
   test("Report Error for invalid usage of SET command") {
     assertEqual("SET", SetCommand(None))
     assertEqual("SET -v", SetCommand(Some("-v", None)))
@@ -885,105 +894,118 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
   // scalastyle:on
 
   test("Operator pipe SQL syntax") {
-    withSQLConf(SQLConf.OPERATOR_PIPE_SYNTAX_ENABLED.key -> "true") {
-      // Basic selection.
-      // Here we check that every parsed plan contains a projection and a source relation or
-      // inline table.
-      def check(query: String, patterns: Seq[TreePattern]): Unit = {
-        val plan: LogicalPlan = parser.parsePlan(query)
-        assert(patterns.exists(plan.containsPattern), s"Failed to parse $query, plan: $plan")
-        assert(plan.containsAnyPattern(UNRESOLVED_RELATION, LOCAL_RELATION))
-      }
-      def checkPipeSelect(query: String): Unit = check(query, Seq(PROJECT))
-      checkPipeSelect("TABLE t |> SELECT 1 AS X")
-      checkPipeSelect("TABLE t |> SELECT 1 AS X, 2 AS Y |> SELECT X + Y AS Z")
-      checkPipeSelect("VALUES (0), (1) tab(col) |> SELECT col * 2 AS result")
-      checkPipeSelect("TABLE t |> EXTEND X + 1 AS Y")
-      checkPipeSelect("TABLE t |> EXTEND X + 1 AS Y, X + 2 Z")
-      // Basic WHERE operators.
-      def checkPipeWhere(query: String): Unit = check(query, Seq(FILTER))
-      checkPipeWhere("TABLE t |> WHERE X = 1")
-      checkPipeWhere("TABLE t |> SELECT X, LENGTH(Y) AS Z |> WHERE X + LENGTH(Y) < 4")
-      checkPipeWhere("TABLE t |> WHERE X = 1 AND Y = 2 |> WHERE X + Y = 3")
-      checkPipeWhere("VALUES (0), (1) tab(col) |> WHERE col < 1")
-      // PIVOT and UNPIVOT operations
-      def checkPivotUnpivot(query: String): Unit = check(query, Seq(PIVOT, UNPIVOT))
-      checkPivotUnpivot(
-        """
-          |SELECT * FROM VALUES
-          |  ("dotNET", 2012, 10000),
-          |  ("Java", 2012, 20000),
-          |  ("dotNET", 2012, 5000),
-          |  ("dotNET", 2013, 48000),
-          |  ("Java", 2013, 30000)
-          |  AS courseSales(course, year, earnings)
-          ||> PIVOT (
-          |  SUM(earnings)
-          |  FOR course IN ('dotNET', 'Java')
-          |)
-          |""".stripMargin)
-      checkPivotUnpivot(
-        """
-          |SELECT * FROM VALUES
-          |  ("dotNET", 15000, 48000, 22500),
-          |  ("Java", 20000, 30000, NULL)
-          |  AS courseEarnings(course, `2012`, `2013`, `2014`)
-          ||> UNPIVOT (
-          |  earningsYear FOR year IN (`2012`, `2013`, `2014`)
-          |)
-          |""".stripMargin)
-      // Sampling operations
-      def checkSample(query: String): Unit = {
-        val plan: LogicalPlan = parser.parsePlan(query)
-        assert(plan.collectFirst(_.isInstanceOf[Sample]).nonEmpty)
-        assert(plan.containsAnyPattern(UNRESOLVED_RELATION, LOCAL_RELATION))
-      }
-      checkSample("TABLE t |> TABLESAMPLE (50 PERCENT)")
-      checkSample("TABLE t |> TABLESAMPLE (5 ROWS)")
-      checkSample("TABLE t |> TABLESAMPLE (BUCKET 4 OUT OF 10)")
-      // Joins.
-      def checkPipeJoin(query: String): Unit = check(query, Seq(JOIN))
-      Seq("", "INNER", "LEFT", "LEFT OUTER", "SEMI", "LEFT SEMI", "RIGHT", "RIGHT OUTER", "FULL",
-        "FULL OUTER", "ANTI", "LEFT ANTI", "CROSS").foreach { joinType =>
-        checkPipeJoin(s"TABLE t |> $joinType JOIN other ON (t.x = other.x)")
-      }
-      // Set operations
-      def checkDistinct(query: String): Unit = check(query, Seq(DISTINCT_LIKE))
-      def checkExcept(query: String): Unit = check(query, Seq(EXCEPT))
-      def checkIntersect(query: String): Unit = check(query, Seq(INTERSECT))
-      def checkUnion(query: String): Unit = check(query, Seq(UNION))
-      checkDistinct("TABLE t |> UNION DISTINCT TABLE t")
-      checkExcept("TABLE t |> EXCEPT ALL TABLE t")
-      checkExcept("TABLE t |> EXCEPT DISTINCT TABLE t")
-      checkExcept("TABLE t |> MINUS ALL TABLE t")
-      checkExcept("TABLE t |> MINUS DISTINCT TABLE t")
-      checkIntersect("TABLE t |> INTERSECT ALL TABLE t")
-      checkUnion("TABLE t |> UNION ALL TABLE t")
-      // Sorting and distributing operators.
-      def checkSort(query: String): Unit = check(query, Seq(SORT))
-      def checkRepartition(query: String): Unit = check(query, Seq(REPARTITION_OPERATION))
-      def checkLimit(query: String): Unit = check(query, Seq(LIMIT))
-      checkSort("TABLE t |> ORDER BY x")
-      checkSort("TABLE t |> SELECT x |> SORT BY x")
-      checkLimit("TABLE t |> LIMIT 1")
-      checkLimit("TABLE t |> LIMIT 2 OFFSET 1")
-      checkRepartition("TABLE t |> DISTRIBUTE BY x |> WHERE x = 1")
-      checkRepartition("TABLE t |> CLUSTER BY x |> TABLESAMPLE (100 PERCENT)")
-      checkRepartition("TABLE t |> SORT BY x DISTRIBUTE BY x")
-      // Aggregation
-      def checkAggregate(query: String): Unit = check(query, Seq(AGGREGATE))
-      checkAggregate("SELECT a, b FROM t |> AGGREGATE SUM(a)")
-      checkAggregate("SELECT a, b FROM t |> AGGREGATE SUM(a) AS result GROUP BY b")
-      checkAggregate("SELECT a, b FROM t |> AGGREGATE GROUP BY b")
-      checkAggregate("SELECT a, b FROM t |> AGGREGATE COUNT(*) AS result GROUP BY b")
-      // Window
-      def checkWindow(query: String): Unit = check(query, Seq(WITH_WINDOW_DEFINITION))
-      checkWindow(
-        """
-          |TABLE windowTestData
-          ||> SELECT cate, SUM(val) OVER w
-          |   WINDOW w AS (PARTITION BY cate ORDER BY val)
-          |""".stripMargin)
+    // Basic selection.
+    // Here we check that every parsed plan contains a projection and a source relation or
+    // inline table.
+    def check(query: String, patterns: Seq[TreePattern]): Unit = {
+      val plan: LogicalPlan = parser.parsePlan(query)
+      assert(patterns.exists(plan.containsPattern), s"Failed to parse $query, plan: $plan")
+      assert(plan.containsAnyPattern(UNRESOLVED_RELATION, LOCAL_RELATION))
+    }
+    def checkPipeSelect(query: String): Unit = check(query, Seq(PROJECT))
+    checkPipeSelect("TABLE t |> SELECT 1 AS X")
+    checkPipeSelect("TABLE t |> SELECT 1 AS X, 2 AS Y |> SELECT X + Y AS Z")
+    checkPipeSelect("VALUES (0), (1) tab(col) |> SELECT col * 2 AS result")
+    checkPipeSelect("TABLE t |> EXTEND X + 1 AS Y")
+    checkPipeSelect("TABLE t |> EXTEND X + 1 AS Y, X + 2 Z")
+    checkPipeSelect("TABLE t |> EXTEND 1 AS z, 2 AS Z |> SET z = 1, Z = 2")
+    // FROM operators.
+    def checkPipeSelectFrom(query: String): Unit = check(query, Seq(PROJECT))
+    checkPipeSelectFrom("FROM t |> SELECT 1 AS X")
+    // Basic WHERE operators.
+    def checkPipeWhere(query: String): Unit = check(query, Seq(FILTER))
+    checkPipeWhere("TABLE t |> WHERE X = 1")
+    checkPipeWhere("TABLE t |> SELECT X, LENGTH(Y) AS Z |> WHERE X + LENGTH(Y) < 4")
+    checkPipeWhere("TABLE t |> WHERE X = 1 AND Y = 2 |> WHERE X + Y = 3")
+    checkPipeWhere("VALUES (0), (1) tab(col) |> WHERE col < 1")
+    // PIVOT and UNPIVOT operations
+    def checkPivotUnpivot(query: String): Unit = check(query, Seq(PIVOT, UNPIVOT))
+    checkPivotUnpivot(
+      """
+        |SELECT * FROM VALUES
+        |  ("dotNET", 2012, 10000),
+        |  ("Java", 2012, 20000),
+        |  ("dotNET", 2012, 5000),
+        |  ("dotNET", 2013, 48000),
+        |  ("Java", 2013, 30000)
+        |  AS courseSales(course, year, earnings)
+        ||> PIVOT (
+        |  SUM(earnings)
+        |  FOR course IN ('dotNET', 'Java')
+        |)
+        |""".stripMargin)
+    checkPivotUnpivot(
+      """
+        |SELECT * FROM VALUES
+        |  ("dotNET", 15000, 48000, 22500),
+        |  ("Java", 20000, 30000, NULL)
+        |  AS courseEarnings(course, `2012`, `2013`, `2014`)
+        ||> UNPIVOT (
+        |  earningsYear FOR year IN (`2012`, `2013`, `2014`)
+        |)
+        |""".stripMargin)
+    // Sampling operations
+    def checkSample(query: String): Unit = {
+      val plan: LogicalPlan = parser.parsePlan(query)
+      assert(plan.collectFirst(_.isInstanceOf[Sample]).nonEmpty)
+      assert(plan.containsAnyPattern(UNRESOLVED_RELATION, LOCAL_RELATION))
+    }
+    checkSample("TABLE t |> TABLESAMPLE (50 PERCENT)")
+    checkSample("TABLE t |> TABLESAMPLE (5 ROWS)")
+    checkSample("TABLE t |> TABLESAMPLE (BUCKET 4 OUT OF 10)")
+    // Joins.
+    def checkPipeJoin(query: String): Unit = check(query, Seq(JOIN))
+    Seq("", "INNER", "LEFT", "LEFT OUTER", "SEMI", "LEFT SEMI", "RIGHT", "RIGHT OUTER", "FULL",
+      "FULL OUTER", "ANTI", "LEFT ANTI", "CROSS").foreach { joinType =>
+      checkPipeJoin(s"TABLE t |> $joinType JOIN other ON (t.x = other.x)")
+    }
+    // Set operations
+    def checkDistinct(query: String): Unit = check(query, Seq(DISTINCT_LIKE))
+    def checkExcept(query: String): Unit = check(query, Seq(EXCEPT))
+    def checkIntersect(query: String): Unit = check(query, Seq(INTERSECT))
+    def checkUnion(query: String): Unit = check(query, Seq(UNION))
+    checkDistinct("TABLE t |> UNION DISTINCT TABLE t")
+    checkExcept("TABLE t |> EXCEPT ALL TABLE t")
+    checkExcept("TABLE t |> EXCEPT DISTINCT TABLE t")
+    checkExcept("TABLE t |> MINUS ALL TABLE t")
+    checkExcept("TABLE t |> MINUS DISTINCT TABLE t")
+    checkIntersect("TABLE t |> INTERSECT ALL TABLE t")
+    checkUnion("TABLE t |> UNION ALL TABLE t")
+    // Sorting and distributing operators.
+    def checkSort(query: String): Unit = check(query, Seq(SORT))
+    def checkRepartition(query: String): Unit = check(query, Seq(REPARTITION_OPERATION))
+    def checkLimit(query: String): Unit = check(query, Seq(LIMIT))
+    checkSort("TABLE t |> ORDER BY x")
+    checkSort("TABLE t |> SELECT x |> SORT BY x")
+    checkLimit("TABLE t |> LIMIT 1")
+    checkLimit("TABLE t |> LIMIT 2 OFFSET 1")
+    checkRepartition("TABLE t |> DISTRIBUTE BY x |> WHERE x = 1")
+    checkRepartition("TABLE t |> CLUSTER BY x |> TABLESAMPLE (100 PERCENT)")
+    checkRepartition("TABLE t |> SORT BY x DISTRIBUTE BY x")
+    // Aggregation
+    def checkAggregate(query: String): Unit = check(query, Seq(AGGREGATE))
+    checkAggregate("SELECT a, b FROM t |> AGGREGATE SUM(a)")
+    checkAggregate("SELECT a, b FROM t |> AGGREGATE SUM(a) AS result GROUP BY b")
+    checkAggregate("SELECT a, b FROM t |> AGGREGATE GROUP BY b")
+    checkAggregate("SELECT a, b FROM t |> AGGREGATE COUNT(*) AS result GROUP BY b")
+    // Window
+    def checkWindow(query: String): Unit = check(query, Seq(WITH_WINDOW_DEFINITION))
+    checkWindow(
+      """
+        |TABLE windowTestData
+        ||> SELECT cate, SUM(val) OVER w
+        |   WINDOW w AS (PARTITION BY cate ORDER BY val)
+        |""".stripMargin)
+    withSQLConf(SQLConf.OPERATOR_PIPE_SYNTAX_ENABLED.key -> "false") {
+      val sql = s"TABLE t |> SELECT 1 AS X"
+      checkError(
+        exception = parseException(sql),
+        condition = "_LEGACY_ERROR_TEMP_0035",
+        parameters = Map("message" -> "Operator pipe SQL syntax using |>"),
+        context = ExpectedContext(
+          fragment = sql,
+          start = 0,
+          stop = sql.length - 1))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/LargeRowBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/LargeRowBenchmark.scala
new file mode 100644
index 0000000000000..8b4f78e79913a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/LargeRowBenchmark.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.functions.lit
+
+/**
+ * Benchmark to measure performance for large row table.
+ * {{{
+ *   To run this benchmark:
+ *   1. without sbt: bin/spark-submit --class <this class>
+ *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/LargeRowBenchmark-results.txt".
+ * }}}
+ */
+object LargeRowBenchmark extends SqlBasedBenchmark {
+
+  /**
+   * Prepares a table with large row for benchmarking. The table will be written into
+   * the given path.
+   */
+  private def writeLargeRow(path: String, rowsNum: Int, numCols: Int, cellSizeMb: Double): Unit = {
+    val stringLength = (cellSizeMb * 1024 * 1024).toInt
+    spark.range(rowsNum)
+      .select(Seq.tabulate(numCols)(i => lit("a" * stringLength).as(s"col$i")): _*)
+      .write.parquet(path)
+  }
+
+  private def runLargeRowBenchmark(rowsNum: Int, numCols: Int, cellSizeMb: Double): Unit = {
+    withTempPath { path =>
+      val benchmark = new Benchmark(
+        s"#rows: $rowsNum, #cols: $numCols, cell: $cellSizeMb MB", rowsNum, output = output)
+      writeLargeRow(path.getAbsolutePath, rowsNum, numCols, cellSizeMb)
+      val df = spark.read.parquet(path.getAbsolutePath)
+      df.createOrReplaceTempView("T")
+      benchmark.addCase("built-in UPPER") { _ =>
+        val sqlSelect = df.columns.map(c => s"UPPER($c) as $c").mkString(", ")
+        spark.sql(s"SELECT $sqlSelect FROM T").noop()
+      }
+      benchmark.addCase("udf UPPER") { _ =>
+        val sqlSelect = df.columns.map(c => s"udfUpper($c) as $c").mkString(", ")
+        spark.sql(s"SELECT $sqlSelect FROM T").noop()
+      }
+      benchmark.run()
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("Large Row Benchmark") {
+      val udfUpper = (s: String) => s.toUpperCase()
+      spark.udf.register("udfUpper", udfUpper(_: String): String)
+
+      val benchmarks = Array(
+        Map("rows" -> 100, "cols" -> 10, "cellSizeMb" -> 1.3), //  OutOfMemory @ 100, 10, 1.4
+        Map("rows" -> 1, "cols" -> 1, "cellSizeMb" -> 300.0), //  OutOfMemory @ 1, 1, 400
+        Map("rows" -> 1, "cols" -> 200, "cellSizeMb" -> 1.0) //  OutOfMemory @ 1, 300, 1
+      )
+
+      benchmarks.foreach { b =>
+        val rows = b("rows").asInstanceOf[Int]
+        val cols = b("cols").asInstanceOf[Int]
+        val cellSizeMb = b("cellSizeMb").asInstanceOf[Double]
+        runLargeRowBenchmark(rows, cols, cellSizeMb)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SetOperationsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SetOperationsBenchmark.scala
new file mode 100644
index 0000000000000..379e31ead2f31
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SetOperationsBenchmark.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+
+/**
+ * Benchmark to measure performance for set operations.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class>
+ *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/SetOperationsBenchmark-results.txt".
+ * }}}
+ */
+object SetOperationsBenchmark extends SqlBasedBenchmark {
+  private val setOperations = Seq("UNION ALL", "EXCEPT ALL", "INTERSECT ALL")
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("Set Operations Benchmark") {
+      val numOperations = 500
+      val numValues = 30
+
+      val benchmark =
+        new Benchmark(
+          "Parsing + Analysis",
+          valuesPerIteration = numOperations * numValues,
+          output = output
+        )
+
+      for (operation <- setOperations) {
+        benchmark.addCase(operation) { _ =>
+          spark
+            .sql(
+              generateQuery(
+                operation = operation,
+                numOperations = numOperations,
+                numValues = numValues
+              )
+            )
+            .queryExecution
+            .analyzed
+          ()
+        }
+      }
+
+      benchmark.run()
+    }
+  }
+
+  private def generateQuery(operation: String, numOperations: Int, numValues: Int) = {
+    s"""
+    SELECT
+      *
+    FROM
+      ${generateOperations(
+      operation = operation,
+      numOperations = numOperations,
+      numValues = numValues
+    )}
+      """
+  }
+
+  private def generateOperations(operation: String, numOperations: Int, numValues: Int) = {
+    (0 until numOperations).map(_ => generateValues(numValues)).mkString(s" ${operation} ")
+  }
+
+  private def generateValues(num: Int) = {
+    s"VALUES (${(0 until num).mkString(", ")})"
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
index cb25942822f46..13ea6f5a30536 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
@@ -23,6 +23,7 @@ import org.apache.spark.SparkNumberFormatException
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -97,10 +98,20 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with DDLCommandTestUtils
     withNamespaceAndTable("ns", "tbl") { t =>
       spark.sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        val errMsg = intercept[AnalysisException] {
-          spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'")
-        }.getMessage
-        assert(errMsg.contains("ID is not a valid partition column"))
+        val expectedTableName = if (commandVersion == DDLCommandTestUtils.V1_COMMAND_VERSION) {
+          s"`$SESSION_CATALOG_NAME`.`ns`.`tbl`"
+        } else {
+          "`test_catalog`.`ns`.`tbl`"
+        }
+        checkError(
+          exception = intercept[AnalysisException] {
+            spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'")
+          },
+          condition = "PARTITIONS_NOT_FOUND",
+          parameters = Map(
+            "partitionList" -> "`ID`",
+            "tableName" -> expectedTableName)
+        )
       }
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
         spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
index 279042f675cd5..a49a94174195c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -103,10 +104,20 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
     withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        val errMsg = intercept[AnalysisException] {
-          sql(s"ALTER TABLE $t DROP PARTITION (ID=1)")
-        }.getMessage
-        assert(errMsg.contains("ID is not a valid partition column"))
+        val expectedTableName = if (commandVersion == DDLCommandTestUtils.V1_COMMAND_VERSION) {
+          s"`$SESSION_CATALOG_NAME`.`ns`.`tbl`"
+        } else {
+          "`test_catalog`.`ns`.`tbl`"
+        }
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"ALTER TABLE $t DROP PARTITION (ID=1)")
+          },
+          condition = "PARTITIONS_NOT_FOUND",
+          parameters = Map(
+            "partitionList" -> "`ID`",
+            "tableName" -> expectedTableName)
+        )
       }
 
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
index 905e6cfb9caaa..186f2b293ea81 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionsAlreadyExistException}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -170,10 +171,20 @@ trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUt
       checkPartitions(t, Map("id" -> "1"))
 
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        val errMsg = intercept[AnalysisException] {
-          sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)")
-        }.getMessage
-        assert(errMsg.contains("ID is not a valid partition column"))
+        val expectedTableName = if (commandVersion == DDLCommandTestUtils.V1_COMMAND_VERSION) {
+          s"`$SESSION_CATALOG_NAME`.`ns`.`tbl`"
+        } else {
+          "`test_catalog`.`ns`.`tbl`"
+        }
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)")
+          },
+          condition = "PARTITIONS_NOT_FOUND",
+          parameters = Map(
+            "partitionList" -> "`ID`",
+            "tableName" -> expectedTableName)
+        )
         checkPartitions(t, Map("id" -> "1"))
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesSuiteBase.scala
index 52a90497fdd37..9ec63acb1d3a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesSuiteBase.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.command
 import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, TableCatalog}
+import org.apache.spark.sql.connector.catalog.TableCatalog
 import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
 import org.apache.spark.sql.internal.SQLConf
 
@@ -89,7 +89,7 @@ trait AlterTableSetTblPropertiesSuiteBase extends QueryTest with DDLCommandTestU
       PROP_EXTERNAL -> "please use CREATE EXTERNAL TABLE"
     )
     withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "false")) {
-      CatalogV2Util.TABLE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+      tableLegacyProperties.foreach { key =>
         withNamespaceAndTable("ns", "tbl") { t =>
           val sqlText = s"ALTER TABLE $t SET TBLPROPERTIES ('$key'='bar')"
           checkError(
@@ -109,7 +109,7 @@ trait AlterTableSetTblPropertiesSuiteBase extends QueryTest with DDLCommandTestU
       }
     }
     withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) {
-      CatalogV2Util.TABLE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+      tableLegacyProperties.foreach { key =>
         Seq("OPTIONS", "TBLPROPERTIES").foreach { clause =>
           withNamespaceAndTable("ns", "tbl") { t =>
             sql(s"CREATE TABLE $t (key int) USING parquet $clause ('$key'='bar')")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableUnsetTblPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableUnsetTblPropertiesSuiteBase.scala
index 0013919fca08f..0e9e9d9c60815 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableUnsetTblPropertiesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableUnsetTblPropertiesSuiteBase.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.command
 import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, TableCatalog}
+import org.apache.spark.sql.connector.catalog.TableCatalog
 import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
 import org.apache.spark.sql.internal.SQLConf
 
@@ -109,7 +109,7 @@ trait AlterTableUnsetTblPropertiesSuiteBase extends QueryTest with DDLCommandTes
       PROP_EXTERNAL -> "please use CREATE EXTERNAL TABLE"
     )
     withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "false")) {
-      CatalogV2Util.TABLE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+      tableLegacyProperties.foreach { key =>
         withNamespaceAndTable("ns", "tbl") { t =>
           val sqlText = s"ALTER TABLE $t UNSET TBLPROPERTIES ('$key')"
           checkError(
@@ -129,7 +129,7 @@ trait AlterTableUnsetTblPropertiesSuiteBase extends QueryTest with DDLCommandTes
       }
     }
     withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) {
-      CatalogV2Util.TABLE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+      tableLegacyProperties.foreach { key =>
         Seq("OPTIONS", "TBLPROPERTIES").foreach { clause =>
           withNamespaceAndTable("ns", "tbl") { t =>
             sql(s"CREATE TABLE $t (key int) USING parquet $clause ('$key'='bar')")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionParserSuite.scala
new file mode 100644
index 0000000000000..75b42c6440719
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionParserSuite.scala
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedIdentifier}
+import org.apache.spark.sql.catalyst.catalog.LanguageSQL
+import org.apache.spark.sql.catalyst.plans.logical.CreateUserDefinedFunction
+import org.apache.spark.sql.execution.SparkSqlParser
+
+class CreateSQLFunctionParserSuite extends AnalysisTest {
+  private lazy val parser = new SparkSqlParser()
+
+  private def intercept(sqlCommand: String, messages: String*): Unit =
+    interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)()
+
+  private def checkParseError(
+      sqlCommand: String,
+      errorClass: String,
+      parameters: Map[String, String],
+      queryContext: Array[ExpectedContext] = Array.empty): Unit =
+    assertParseErrorClass(parser.parsePlan, sqlCommand, errorClass, parameters, queryContext)
+
+  // scalastyle:off argcount
+  private def createSQLFunction(
+      nameParts: Seq[String],
+      inputParamText: Option[String] = None,
+      returnTypeText: String = "INT",
+      exprText: Option[String] = None,
+      queryText: Option[String] = None,
+      comment: Option[String] = None,
+      isDeterministic: Option[Boolean] = None,
+      containsSQL: Option[Boolean] = None,
+      isTableFunc: Boolean = false,
+      ignoreIfExists: Boolean = false,
+      replace: Boolean = false): CreateUserDefinedFunction = {
+    // scalastyle:on argcount
+    CreateUserDefinedFunction(
+      UnresolvedIdentifier(nameParts),
+      inputParamText = inputParamText,
+      returnTypeText = returnTypeText,
+      exprText = exprText,
+      queryText = queryText,
+      comment = comment,
+      isDeterministic = isDeterministic,
+      containsSQL = containsSQL,
+      language = LanguageSQL,
+      isTableFunc = isTableFunc,
+      ignoreIfExists = ignoreIfExists,
+      replace = replace)
+  }
+
+  // scalastyle:off argcount
+  private def createSQLFunctionCommand(
+      name: String,
+      inputParamText: Option[String] = None,
+      returnTypeText: String = "INT",
+      exprText: Option[String] = None,
+      queryText: Option[String] = None,
+      comment: Option[String] = None,
+      isDeterministic: Option[Boolean] = None,
+      containsSQL: Option[Boolean] = None,
+      isTableFunc: Boolean = false,
+      ignoreIfExists: Boolean = false,
+      replace: Boolean = false): CreateSQLFunctionCommand = {
+    // scalastyle:on argcount
+    CreateSQLFunctionCommand(
+      FunctionIdentifier(name),
+      inputParamText = inputParamText,
+      returnTypeText = returnTypeText,
+      exprText = exprText,
+      queryText = queryText,
+      comment = comment,
+      isDeterministic = isDeterministic,
+      containsSQL = containsSQL,
+      isTableFunc = isTableFunc,
+      isTemp = true,
+      ignoreIfExists = ignoreIfExists,
+      replace = replace)
+  }
+
+  test("create temporary SQL functions") {
+    comparePlans(
+      parser.parsePlan("CREATE TEMPORARY FUNCTION a() RETURNS INT RETURN 1"),
+      createSQLFunctionCommand("a", exprText = Some("1")))
+
+    comparePlans(
+      parser.parsePlan(
+        "CREATE TEMPORARY FUNCTION a(x INT) RETURNS TABLE (a INT) RETURN SELECT x"),
+      createSQLFunctionCommand(
+        name = "a",
+        inputParamText = Some("x INT"),
+        returnTypeText = "a INT",
+        queryText = Some("SELECT x"),
+        isTableFunc = true))
+
+    comparePlans(
+      parser.parsePlan("CREATE OR REPLACE TEMPORARY FUNCTION a() RETURNS INT RETURN 1"),
+      createSQLFunctionCommand("a", exprText = Some("1"), replace = true))
+
+    checkParseError(
+      "CREATE TEMPORARY FUNCTION a.b() RETURNS INT RETURN 1",
+      errorClass = "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE",
+      parameters = Map("database" -> "`a`"),
+      queryContext = Array(
+        ExpectedContext("CREATE TEMPORARY FUNCTION a.b() RETURNS INT RETURN 1", 0, 51)
+      )
+    )
+
+    checkParseError(
+      "CREATE TEMPORARY FUNCTION a.b.c() RETURNS INT RETURN 1",
+      errorClass = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
+      parameters = Map(
+        "statement" -> "CREATE TEMPORARY FUNCTION",
+        "name" -> "`a`.`b`.`c`"),
+      queryContext = Array(
+        ExpectedContext("CREATE TEMPORARY FUNCTION a.b.c() RETURNS INT RETURN 1", 0, 53)
+      )
+    )
+
+    checkParseError(
+      "CREATE TEMPORARY FUNCTION IF NOT EXISTS a() RETURNS INT RETURN 1",
+      errorClass = "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_IF_NOT_EXISTS",
+      parameters = Map.empty,
+      queryContext = Array(
+        ExpectedContext("CREATE TEMPORARY FUNCTION IF NOT EXISTS a() RETURNS INT RETURN 1", 0, 63)
+      )
+    )
+  }
+
+  test("create persistent SQL functions") {
+    comparePlans(
+      parser.parsePlan("CREATE FUNCTION a() RETURNS INT RETURN 1"),
+      createSQLFunction(Seq("a"), exprText = Some("1")))
+
+    comparePlans(
+      parser.parsePlan("CREATE FUNCTION a.b(x INT) RETURNS INT RETURN x"),
+      createSQLFunction(Seq("a", "b"), Some("x INT"), exprText = Some("x")))
+
+    comparePlans(parser.parsePlan(
+      "CREATE FUNCTION a.b.c(x INT) RETURNS TABLE (a INT) RETURN SELECT x"),
+      createSQLFunction(Seq("a", "b", "c"), Some("x INT"), returnTypeText = "a INT", None,
+        Some("SELECT x"), isTableFunc = true))
+
+    comparePlans(parser.parsePlan("CREATE FUNCTION IF NOT EXISTS a() RETURNS INT RETURN 1"),
+      createSQLFunction(Seq("a"), exprText = Some("1"), ignoreIfExists = true)
+    )
+
+    comparePlans(parser.parsePlan("CREATE OR REPLACE FUNCTION a() RETURNS INT RETURN 1"),
+      createSQLFunction(Seq("a"), exprText = Some("1"), replace = true))
+
+    comparePlans(
+      parser.parsePlan(
+        """
+          |CREATE FUNCTION a(x INT COMMENT 'x') RETURNS INT
+          |LANGUAGE SQL DETERMINISTIC CONTAINS SQL
+          |COMMENT 'function'
+          |RETURN x
+          |""".stripMargin),
+      createSQLFunction(Seq("a"), inputParamText = Some("x INT COMMENT 'x'"),
+        exprText = Some("x"), isDeterministic = Some(true), containsSQL = Some(true),
+        comment = Some("function"))
+    )
+
+    intercept("CREATE OR REPLACE FUNCTION IF NOT EXISTS a() RETURNS INT RETURN 1",
+      "Cannot create a routine with both IF NOT EXISTS and REPLACE specified")
+  }
+
+  test("create SQL functions with unsupported routine characteristics") {
+    intercept("CREATE FUNCTION foo() RETURNS INT LANGUAGE blah RETURN 1",
+      "Operation not allowed: Unsupported language for user defined functions: blah")
+
+    intercept("CREATE FUNCTION foo() RETURNS INT SPECIFIC foo1 RETURN 1",
+      "Operation not allowed: SQL function with SPECIFIC name is not supported")
+
+    intercept("CREATE FUNCTION foo() RETURNS INT NO SQL RETURN 1",
+      "Operation not allowed: SQL function with NO SQL is not supported")
+
+    intercept("CREATE FUNCTION foo() RETURNS INT NO SQL CONTAINS SQL RETURN 1",
+      "Found duplicate clauses: SQL DATA ACCESS")
+
+    intercept("CREATE FUNCTION foo() RETURNS INT RETURNS NULL ON NULL INPUT RETURN 1",
+      "Operation not allowed: SQL function with RETURNS NULL ON NULL INPUT is not supported")
+
+    intercept("CREATE FUNCTION foo() RETURNS INT SQL SECURITY INVOKER RETURN 1",
+      "Operation not allowed: SQL function with SQL SECURITY INVOKER is not supported")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
index 39f2abd35c2b5..39624a33d8614 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
@@ -26,6 +26,7 @@ import org.scalatest.Tag
 
 import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, TableCatalog}
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.test.SQLTestUtils
 
@@ -172,6 +173,11 @@ trait DDLCommandTestUtils extends SQLTestUtils {
     FileUtils.copyDirectory(new File(part0Loc), new File(part1Loc))
     part1Loc
   }
+
+  def tableLegacyProperties: Seq[String] = {
+    val excludedProperties = Set(TableCatalog.PROP_COMMENT, TableCatalog.PROP_COLLATION)
+    CatalogV2Util.TABLE_RESERVED_PROPERTIES.filterNot(excludedProperties.contains)
+  }
 }
 
 object DDLCommandTestUtils {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index 8b868c0e17230..3dea8593b428d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.command
 
 import org.apache.spark.SparkThrowable
-import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, SchemaCompensation, UnresolvedAttribute, UnresolvedFunctionName, UnresolvedIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, FileResource, FunctionResource, JarResource}
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -37,9 +36,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     super.parseException(parser.parsePlan)(sqlText)
   }
 
-  private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)()
-
   private def compareTransformQuery(sql: String, expected: LogicalPlan): Unit = {
     val plan = parser.parsePlan(sql).asInstanceOf[ScriptTransformation].copy(ioschema = null)
     comparePlans(plan, expected, checkAnalysis = false)
@@ -498,6 +494,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       UnresolvedIdentifier(Seq("view1")),
       Seq.empty[(String, Option[String])],
       None,
+      None,
       Map.empty[String, String],
       Some("SELECT * FROM tab1"),
       parser.parsePlan("SELECT * FROM tab1"),
@@ -513,6 +510,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       Seq("a").asTableIdentifier,
       Seq.empty[(String, Option[String])],
       None,
+      None,
       Map.empty[String, String],
       Some("SELECT * FROM tab1"),
       parser.parsePlan("SELECT * FROM tab1"),
@@ -539,6 +537,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
         |(col1, col3 COMMENT 'hello')
         |TBLPROPERTIES('prop1Key'="prop1Val")
         |COMMENT 'BLABLA'
+        |DEFAULT COLLATION uNiCodE
         |AS SELECT * FROM tab1
       """.stripMargin
     val parsed1 = parser.parsePlan(v1)
@@ -546,6 +545,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       UnresolvedIdentifier(Seq("view1")),
       Seq("col1" -> None, "col3" -> Some("hello")),
       Some("BLABLA"),
+      Some("UNICODE"),
       Map("prop1Key" -> "prop1Val"),
       Some("SELECT * FROM tab1"),
       parser.parsePlan("SELECT * FROM tab1"),
@@ -559,6 +559,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
         |CREATE OR REPLACE GLOBAL TEMPORARY VIEW a
         |(col1, col3 COMMENT 'hello')
         |COMMENT 'BLABLA'
+        |DEFAULT COLLATION uNiCoDe
         |AS SELECT * FROM tab1
           """.stripMargin
     val parsed2 = parser.parsePlan(v2)
@@ -566,6 +567,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       Seq("a").asTableIdentifier,
       Seq("col1" -> None, "col3" -> Some("hello")),
       Some("BLABLA"),
+      Some("UNICODE"),
       Map(),
       Some("SELECT * FROM tab1"),
       parser.parsePlan("SELECT * FROM tab1"),
@@ -821,44 +823,4 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       parser.parsePlan("SHOW CATALOGS LIKE 'defau*'"),
       ShowCatalogsCommand(Some("defau*")))
   }
-
-  test("Create SQL functions") {
-    comparePlans(
-      parser.parsePlan("CREATE TEMP FUNCTION foo() RETURNS INT RETURN 1"),
-      CreateSQLFunctionCommand(
-        FunctionIdentifier("foo"),
-        inputParamText = None,
-        returnTypeText = "INT",
-        exprText = Some("1"),
-        queryText = None,
-        comment = None,
-        isDeterministic = None,
-        containsSQL = None,
-        isTableFunc = false,
-        isTemp = true,
-        ignoreIfExists = false,
-        replace = false))
-    intercept("CREATE FUNCTION foo() RETURNS INT RETURN 1",
-      "Operation not allowed: creating persistent SQL functions is not supported")
-  }
-
-  test("create SQL functions with unsupported routine characteristics") {
-    intercept("CREATE FUNCTION foo() RETURNS INT LANGUAGE blah RETURN 1",
-      "Operation not allowed: Unsupported language for user defined functions: blah")
-
-    intercept("CREATE FUNCTION foo() RETURNS INT SPECIFIC foo1 RETURN 1",
-      "Operation not allowed: SQL function with SPECIFIC name is not supported")
-
-    intercept("CREATE FUNCTION foo() RETURNS INT NO SQL RETURN 1",
-      "Operation not allowed: SQL function with NO SQL is not supported")
-
-    intercept("CREATE FUNCTION foo() RETURNS INT NO SQL CONTAINS SQL RETURN 1",
-      "Found duplicate clauses: SQL DATA ACCESS")
-
-    intercept("CREATE FUNCTION foo() RETURNS INT RETURNS NULL ON NULL INPUT RETURN 1",
-      "Operation not allowed: SQL function with RETURNS NULL ON NULL INPUT is not supported")
-
-    intercept("CREATE FUNCTION foo() RETURNS INT SQL SECURITY INVOKER RETURN 1",
-      "Operation not allowed: SQL function with SQL SECURITY INVOKER is not supported")
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 32a63f5c61976..d91d762048d29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -2324,9 +2324,9 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       // Plain `StringType`.
       sql("CREATE TABLE t1(col STRING) USING parquet")
       sql("INSERT INTO t1 VALUES ('a')")
-      checkAnswer(sql("SELECT COLLATION(col) FROM t1"), Row("UTF8_BINARY"))
+      checkAnswer(sql("SELECT COLLATION(col) FROM t1"), Row("SYSTEM.BUILTIN.UTF8_BINARY"))
       sql("ALTER TABLE t1 ALTER COLUMN col TYPE STRING COLLATE UTF8_LCASE")
-      checkAnswer(sql("SELECT COLLATION(col) FROM t1"), Row("UTF8_LCASE"))
+      checkAnswer(sql("SELECT COLLATION(col) FROM t1"), Row("SYSTEM.BUILTIN.UTF8_LCASE"))
 
       // Invalid "ALTER COLUMN" to Integer.
       val alterInt = "ALTER TABLE t1 ALTER COLUMN col TYPE INTEGER"
@@ -2348,23 +2348,23 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       // `ArrayType` with collation.
       sql("CREATE TABLE t2(col ARRAY<STRING>) USING parquet")
       sql("INSERT INTO t2 VALUES (ARRAY('a'))")
-      checkAnswer(sql("SELECT COLLATION(col[0]) FROM t2"), Row("UTF8_BINARY"))
+      checkAnswer(sql("SELECT COLLATION(col[0]) FROM t2"), Row("SYSTEM.BUILTIN.UTF8_BINARY"))
       assertThrows[AnalysisException] {
         sql("ALTER TABLE t2 ALTER COLUMN col TYPE ARRAY<STRING COLLATE UTF8_LCASE>")
       }
-      checkAnswer(sql("SELECT COLLATION(col[0]) FROM t2"), Row("UTF8_BINARY"))
+      checkAnswer(sql("SELECT COLLATION(col[0]) FROM t2"), Row("SYSTEM.BUILTIN.UTF8_BINARY"))
 
       // `MapType` with collation.
       sql("CREATE TABLE t3(col MAP<STRING, STRING>) USING parquet")
       sql("INSERT INTO t3 VALUES (MAP('k', 'v'))")
-      checkAnswer(sql("SELECT COLLATION(col['k']) FROM t3"), Row("UTF8_BINARY"))
+      checkAnswer(sql("SELECT COLLATION(col['k']) FROM t3"), Row("SYSTEM.BUILTIN.UTF8_BINARY"))
       assertThrows[AnalysisException] {
         sql(
           """
             |ALTER TABLE t3 ALTER COLUMN col TYPE
             |MAP<STRING, STRING COLLATE UTF8_LCASE>""".stripMargin)
       }
-      checkAnswer(sql("SELECT COLLATION(col['k']) FROM t3"), Row("UTF8_BINARY"))
+      checkAnswer(sql("SELECT COLLATION(col['k']) FROM t3"), Row("SYSTEM.BUILTIN.UTF8_BINARY"))
 
       // Invalid change of map key collation.
       val alterMap =
@@ -2388,11 +2388,11 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       // `StructType` with collation.
       sql("CREATE TABLE t4(col STRUCT<a:STRING>) USING parquet")
       sql("INSERT INTO t4 VALUES (NAMED_STRUCT('a', 'value'))")
-      checkAnswer(sql("SELECT COLLATION(col.a) FROM t4"), Row("UTF8_BINARY"))
+      checkAnswer(sql("SELECT COLLATION(col.a) FROM t4"), Row("SYSTEM.BUILTIN.UTF8_BINARY"))
       assertThrows[AnalysisException] {
         sql("ALTER TABLE t4 ALTER COLUMN col TYPE STRUCT<a:STRING COLLATE UTF8_LCASE>")
       }
-      checkAnswer(sql("SELECT COLLATION(col.a) FROM t4"), Row("UTF8_BINARY"))
+      checkAnswer(sql("SELECT COLLATION(col.a) FROM t4"), Row("SYSTEM.BUILTIN.UTF8_BINARY"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
index 944f20bf8e924..f8174d24c9499 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.sql.execution.command
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedTableOrView}
-import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
 import org.apache.spark.sql.catalyst.plans.logical.{DescribeColumn, DescribeRelation}
+import org.apache.spark.sql.test.SharedSparkSession
+
+class DescribeTableParserSuite extends SharedSparkSession with AnalysisTest {
+  private def parsePlan(statement: String) = spark.sessionState.sqlParser.parsePlan(statement)
 
-class DescribeTableParserSuite extends AnalysisTest {
   test("SPARK-17328: Fix NPE with EXPLAIN DESCRIBE TABLE") {
     comparePlans(parsePlan("describe t"),
       DescribeRelation(
@@ -75,6 +78,12 @@ class DescribeTableParserSuite extends AnalysisTest {
         UnresolvedAttribute(Seq("col")),
         isExtended = true))
 
+    val error = intercept[AnalysisException](parsePlan("DESCRIBE EXTENDED t col AS JSON"))
+
+    checkError(
+      exception = error,
+      condition = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON")
+
     val sql = "DESCRIBE TABLE t PARTITION (ds='1970-01-01') col"
     checkError(
       exception = parseException(parsePlan)(sql),
@@ -85,4 +94,17 @@ class DescribeTableParserSuite extends AnalysisTest {
         start = 0,
         stop = 47))
   }
+
+  test("retain sql text position") {
+    val tbl = "unknown"
+    val sqlStatement = s"DESCRIBE TABLE $tbl"
+    val startPos = sqlStatement.indexOf(tbl)
+    assert(startPos != -1)
+    assertAnalysisErrorCondition(
+      parsePlan(sqlStatement),
+      "TABLE_OR_VIEW_NOT_FOUND",
+      Map("relationName" -> s"`$tbl`"),
+      Array(ExpectedContext(tbl, startPos, startPos + tbl.length - 1))
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala
index c4e9ff93ef85d..f8d2e9dd3a3cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala
@@ -293,4 +293,29 @@ trait DescribeTableSuiteBase extends QueryTest with DDLCommandTestUtils {
           Row("col1", "string", null)))
     }
   }
+
+  Seq(true, false).foreach { hasCollations =>
+    test(s"DESCRIBE TABLE EXTENDED with collation specified = $hasCollations") {
+
+      withNamespaceAndTable("ns", "tbl") { tbl =>
+        val getCollationDescription = () => sql(s"DESCRIBE TABLE EXTENDED $tbl")
+          .where("col_name = 'Collation'")
+
+        val defaultCollation = if (hasCollations) "DEFAULT COLLATION uNiCoDe" else ""
+
+        sql(s"CREATE TABLE $tbl (id string) $defaultUsing $defaultCollation")
+        val descriptionDf = getCollationDescription()
+
+        if (hasCollations) {
+          checkAnswer(descriptionDf, Seq(Row("Collation", "UNICODE", "")))
+        } else {
+          assert(descriptionDf.isEmpty)
+        }
+
+        sql(s"ALTER TABLE $tbl DEFAULT COLLATION UniCode_cI_rTrIm")
+        val newDescription = getCollationDescription()
+        checkAnswer(newDescription, Seq(Row("Collation", "UNICODE_CI_RTRIM", "")))
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 92467cbcb6c05..2cc203129817b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, AnalysisTest, An
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog, TempVariableManager}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast, EqualTo, Expression, InSubquery, IntegerLiteral, ListQuery, Literal, StringLiteral}
 import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
-import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
+import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{AlterColumn, AnalysisOnlyCommand, AppendData, Assignment, CreateTable, CreateTableAsSelect, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, InsertIntoStatement, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, OverwriteByExpression, OverwritePartitionsDynamic, Project, SetTableLocation, SetTableProperties, ShowTableProperties, SubqueryAlias, UnsetTableProperties, UpdateAction, UpdateTable}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
@@ -45,11 +45,12 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode}
 import org.apache.spark.sql.sources.SimpleScanSource
+import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{BooleanType, CharType, DoubleType, IntegerType, LongType, StringType, StructField, StructType, VarcharType}
 import org.apache.spark.unsafe.types.UTF8String
 
-class PlanResolutionSuite extends AnalysisTest {
-  import CatalystSqlParser._
+class PlanResolutionSuite extends SharedSparkSession with AnalysisTest {
+  private def parsePlan(statement: String) = spark.sessionState.sqlParser.parsePlan(statement)
 
   private val v1Format = classOf[SimpleScanSource].getName
   private val v2Format = classOf[FakeV2Provider].getName
@@ -240,7 +241,7 @@ class PlanResolutionSuite extends AnalysisTest {
     }
     // We don't check analysis here by default, as we expect the plan to be unresolved
     // such as `CreateTable`.
-    val analyzed = analyzer.execute(CatalystSqlParser.parsePlan(query))
+    val analyzed = analyzer.execute(parsePlan(query))
     if (checkAnalysis) {
       analyzer.checkAnalysis(analyzed)
     }
@@ -2867,9 +2868,8 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[ParseException] {
         parsePlan(query)
       },
-      condition = "_LEGACY_ERROR_TEMP_0035",
-      parameters = Map(
-        "message" -> "CREATE TEMPORARY TABLE ..., use CREATE TEMPORARY VIEW instead"),
+      condition = "_LEGACY_ERROR_TEMP_0046",
+      parameters = Map(),
       context = ExpectedContext(fragment = query, start = 0, stop = 48))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
index 462b967a75900..f7d41556b4e6b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{StringType, StructType}
 
@@ -66,10 +67,20 @@ trait ShowPartitionsSuiteBase extends QueryTest with DDLCommandTestUtils {
   test("non-partitioning columns") {
     withNamespaceAndTable("ns", "dateTable") { t =>
       createDateTable(t)
-      val errMsg = intercept[AnalysisException] {
-        sql(s"SHOW PARTITIONS $t PARTITION(abcd=2015, xyz=1)")
-      }.getMessage
-      assert(errMsg.contains("abcd is not a valid partition column"))
+      val expectedTableName = if (commandVersion == DDLCommandTestUtils.V1_COMMAND_VERSION) {
+        s"`$SESSION_CATALOG_NAME`.`ns`.`datetable`"
+      } else {
+        "`test_catalog`.`ns`.`dateTable`"
+      }
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SHOW PARTITIONS $t PARTITION(abcd=2015, xyz=1)")
+        },
+        condition = "PARTITIONS_NOT_FOUND",
+        parameters = Map(
+          "partitionList" -> "`abcd`",
+          "tableName" -> expectedTableName)
+      )
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
index f6a5f6a7da26a..dbeb67c253208 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
@@ -353,7 +353,7 @@ trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
              |View Text: SELECT id FROM $catalog.$namespace.$table
              |View Schema Mode: BINDING
              |View Catalog and Namespace: spark_catalog.default
-             |View Query Output Columns: [id]
+             |View Query Output Columns: [`id`]
              |Schema: root
              | |-- id: integer (nullable = true)""".stripMargin
         assert(actualLocalResult === expectedLocalResult)
@@ -380,7 +380,7 @@ trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
              |View Text: SELECT id FROM $catalog.$namespace.$table
              |View Schema Mode: BINDING
              |View Catalog and Namespace: spark_catalog.default
-             |View Query Output Columns: [id]
+             |View Query Output Columns: [`id`]
              |Schema: root
              | |-- id: integer (nullable = true)""".stripMargin
         assert(actualGlobalResult1 === expectedGlobalResult1)
@@ -398,7 +398,7 @@ trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
              |View Text: SELECT id FROM $catalog.$namespace.$table
              |View Schema Mode: BINDING
              |View Catalog and Namespace: spark_catalog.default
-             |View Query Output Columns: [id]
+             |View Query Output Columns: [`id`]
              |Schema: root
              | |-- id: integer (nullable = true)""".stripMargin
         assert(actualLocalResult2 === expectedLocalResult2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala
index 8c985ea1f0527..b61065f41c5e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -103,10 +104,20 @@ trait TruncateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
       }
 
       // throw exception if the column in partition spec is not a partition column.
-      val errMsg = intercept[AnalysisException] {
-        sql(s"TRUNCATE TABLE $t PARTITION (unknown = 1)")
-      }.getMessage
-      assert(errMsg.contains("unknown is not a valid partition column"))
+      val expectedTableName = if (commandVersion == DDLCommandTestUtils.V1_COMMAND_VERSION) {
+        s"`$SESSION_CATALOG_NAME`.`ns`.`parttable`"
+      } else {
+        "`test_catalog`.`ns`.`partTable`"
+      }
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"TRUNCATE TABLE $t PARTITION (unknown = 1)")
+        },
+        condition = "PARTITIONS_NOT_FOUND",
+        parameters = Map(
+          "partitionList" -> "`unknown`",
+          "tableName" -> expectedTableName)
+      )
     }
   }
 
@@ -117,10 +128,28 @@ trait TruncateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
       sql(s"CREATE TABLE $t (c0 INT) $defaultUsing")
       sql(s"INSERT INTO $t SELECT 0")
 
-      val errMsg = intercept[AnalysisException] {
-        sql(s"TRUNCATE TABLE $t PARTITION (c0=1)")
-      }.getMessage
-      assert(errMsg.contains(invalidPartColumnError))
+      val expectedTableName = if (commandVersion == DDLCommandTestUtils.V1_COMMAND_VERSION) {
+        s"`$SESSION_CATALOG_NAME`.`ns`.`tbl`"
+      } else {
+        "`test_catalog`.`ns`.`tbl`"
+      }
+      val expectedCondition = if (commandVersion == DDLCommandTestUtils.V1_COMMAND_VERSION) {
+        "_LEGACY_ERROR_TEMP_1267"
+      } else {
+        "PARTITIONS_NOT_FOUND"
+      }
+      val expectedParameters = if (commandVersion == DDLCommandTestUtils.V1_COMMAND_VERSION) {
+        Map("tableIdentWithDB" -> expectedTableName)
+      } else {
+        Map("partitionList" -> "`c0`", "tableName" -> expectedTableName)
+      }
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"TRUNCATE TABLE $t PARTITION (c0=1)")
+        },
+        condition = expectedCondition,
+        parameters = expectedParameters
+      )
     }
   }
 
@@ -145,10 +174,20 @@ trait TruncateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
       sql(s"INSERT INTO $t PARTITION (id=0) SELECT 'abc'")
       sql(s"INSERT INTO $t PARTITION (id=1) SELECT 'def'")
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        val errMsg = intercept[AnalysisException] {
-          sql(s"TRUNCATE TABLE $t PARTITION (ID=1)")
-        }.getMessage
-        assert(errMsg.contains("ID is not a valid partition column"))
+        val expectedTableName = if (commandVersion == DDLCommandTestUtils.V1_COMMAND_VERSION) {
+          s"`$SESSION_CATALOG_NAME`.`ns`.`tbl`"
+        } else {
+          "`test_catalog`.`ns`.`tbl`"
+        }
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"TRUNCATE TABLE $t PARTITION (ID=1)")
+          },
+          condition = "PARTITIONS_NOT_FOUND",
+          parameters = Map(
+            "partitionList" -> "`ID`",
+            "tableName" -> expectedTableName)
+        )
       }
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
         sql(s"TRUNCATE TABLE $t PARTITION (ID=1)")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetLocationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetLocationSuite.scala
index 8f5af2e1f2e76..343a591fb5585 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetLocationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetLocationSuite.scala
@@ -93,8 +93,8 @@ trait AlterTableSetLocationSuiteBase extends command.AlterTableSetLocationSuiteB
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $t PARTITION (A='1', B='2') SET LOCATION '/path/to/part/ways3'")
           },
-          condition = "_LEGACY_ERROR_TEMP_1231",
-          parameters = Map("key" -> "A", "tblName" -> "`spark_catalog`.`ns`.`tbl`")
+          condition = "PARTITIONS_NOT_FOUND",
+          parameters = Map("partitionList" -> "`A`", "tableName" -> "`spark_catalog`.`ns`.`tbl`")
         )
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
index eaf016ac2fa9f..3602853e53aa8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
@@ -19,6 +19,10 @@ package org.apache.spark.sql.execution.command.v1
 
 import java.util.Locale
 
+import org.json4s._
+import org.json4s.jackson.JsonMethods.parse
+
+import org.apache.spark.SPARK_VERSION
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution.command
@@ -36,9 +40,12 @@ import org.apache.spark.sql.types.StringType
  */
 trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
   with command.TestsV1AndV2Commands {
+  implicit val formats: org.json4s.DefaultFormats.type = org.json4s.DefaultFormats
 
   def getProvider(): String = defaultUsing.stripPrefix("USING").trim.toLowerCase(Locale.ROOT)
 
+  val iso8601Regex = raw"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?Z$$".r
+
   test("Describing of a non-existent partition") {
     withNamespaceAndTable("ns", "table") { tbl =>
       spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " +
@@ -203,6 +210,410 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
           Row("histogram", "NULL")))
     }
   }
+
+  test("DESCRIBE AS JSON partitions, clusters, buckets") {
+    withNamespaceAndTable("ns", "table") { t =>
+      val tableCreationStr =
+        s"""
+           |CREATE TABLE $t (
+           |  employee_id INT,
+           |  employee_name STRING,
+           |  department STRING,
+           |  hire_date DATE
+           |) USING parquet
+           |OPTIONS ('compression' = 'snappy', 'max_records' = '1000')
+           |PARTITIONED BY (department, hire_date)
+           |CLUSTERED BY (employee_id) SORTED BY (employee_name ASC) INTO 4 BUCKETS
+           |COMMENT 'Employee data table for testing partitions and buckets'
+           |TBLPROPERTIES ('version' = '1.0')
+           |""".stripMargin
+      spark.sql(tableCreationStr)
+      val descriptionDf = spark.sql(s"DESCRIBE EXTENDED $t AS JSON")
+      val firstRow = descriptionDf.select("json_metadata").head()
+      val jsonValue = firstRow.getString(0)
+      val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
+
+      val expectedOutput = DescribeTableJson(
+        table_name = Some("table"),
+        catalog_name = Some(SESSION_CATALOG_NAME),
+        namespace = Some(List("ns")),
+        schema_name = Some("ns"),
+        columns = Some(List(
+          TableColumn("employee_id", Type("int"), true),
+          TableColumn("employee_name", Type("string"), true),
+          TableColumn("department", Type("string"), true),
+          TableColumn("hire_date", Type("date"), true)
+        )),
+        last_access = Some("UNKNOWN"),
+        created_by = Some(s"Spark $SPARK_VERSION"),
+        `type` = Some("MANAGED"),
+        provider = Some("parquet"),
+        bucket_columns = Some(List("employee_id")),
+        sort_columns = Some(List("employee_name")),
+        comment = Some("Employee data table for testing partitions and buckets"),
+        table_properties = Some(Map(
+          "version" -> "1.0"
+        )),
+        serde_library = if (getProvider() == "hive") {
+          Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")
+        } else {
+          None
+        },
+        storage_properties = Some(Map(
+          "compression" -> "snappy",
+          "max_records" -> "1000"
+        )),
+        partition_provider = Some("Catalog"),
+        partition_columns = Some(List("department", "hire_date"))
+      )
+
+      assert(parsedOutput.location.isDefined)
+      assert(iso8601Regex.matches(parsedOutput.created_time.get))
+      assert(expectedOutput == parsedOutput.copy(location = None, created_time = None))
+    }
+  }
+
+  test("DESCRIBE AS JSON partition spec") {
+    withNamespaceAndTable("ns", "table") { t =>
+      val tableCreationStr =
+        s"""
+           |CREATE TABLE $t (
+           |  id INT,
+           |  name STRING,
+           |  region STRING,
+           |  category STRING
+           |) USING parquet
+           |PARTITIONED BY (region, category)
+           |COMMENT 'test partition spec'
+           |TBLPROPERTIES ('t' = 'test')
+           |""".stripMargin
+      spark.sql(tableCreationStr)
+      spark.sql(s"ALTER TABLE $t ADD PARTITION (region='USA', category='tech')")
+
+      val descriptionDf =
+        spark.sql(s"DESCRIBE FORMATTED $t PARTITION (region='USA', category='tech') AS JSON")
+      val firstRow = descriptionDf.select("json_metadata").head()
+      val jsonValue = firstRow.getString(0)
+      val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
+
+      val expectedOutput = DescribeTableJson(
+        table_name = Some("table"),
+        catalog_name = Some("spark_catalog"),
+        namespace = Some(List("ns")),
+        schema_name = Some("ns"),
+        columns = Some(List(
+          TableColumn("id", Type("int"), true),
+          TableColumn("name", Type("string"), true),
+          TableColumn("region", Type("string"), true),
+          TableColumn("category", Type("string"), true)
+        )),
+        last_access = Some("UNKNOWN"),
+        created_by = Some(s"Spark $SPARK_VERSION"),
+        `type` = Some("MANAGED"),
+        provider = Some("parquet"),
+        bucket_columns = Some(Nil),
+        sort_columns = Some(Nil),
+        comment = Some("test partition spec"),
+        table_properties = Some(Map(
+          "t" -> "test"
+        )),
+        serde_library = if (getProvider() == "hive") {
+          Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")
+        } else {
+          None
+        },
+        partition_provider = Some("Catalog"),
+        partition_columns = Some(List("region", "category")),
+        partition_values = Some(Map("region" -> "USA", "category" -> "tech"))
+      )
+
+      assert(parsedOutput.location.isDefined)
+      assert(iso8601Regex.matches(parsedOutput.created_time.get))
+      assert(expectedOutput == parsedOutput.copy(
+        location = None, created_time = None, storage_properties = None))
+    }
+  }
+
+  test("DESCRIBE AS JSON default values") {
+    withNamespaceAndTable("ns", "table") { t =>
+      val tableCreationStr =
+        s"""
+           |CREATE TABLE $t (
+           |  id INT DEFAULT 1,
+           |  name STRING DEFAULT 'unknown',
+           |  created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+           |  is_active BOOLEAN DEFAULT true
+           |)
+           |USING parquet COMMENT 'table_comment'
+           |""".stripMargin
+      spark.sql(tableCreationStr)
+
+      val descriptionDf = spark.sql(s"DESC EXTENDED $t AS JSON")
+      val firstRow = descriptionDf.select("json_metadata").head()
+      val jsonValue = firstRow.getString(0)
+      val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
+
+      val expectedOutput = DescribeTableJson(
+        table_name = Some("table"),
+        catalog_name = Some("spark_catalog"),
+        namespace = Some(List("ns")),
+        schema_name = Some("ns"),
+        columns = Some(List(
+          TableColumn("id", Type("int"), default = Some("1")),
+          TableColumn("name", Type("string"), default = Some("'unknown'")),
+          TableColumn("created_at", Type("timestamp_ltz"), default = Some("CURRENT_TIMESTAMP")),
+          TableColumn("is_active", Type("boolean"), default = Some("true"))
+        )),
+        last_access = Some("UNKNOWN"),
+        created_by = Some(s"Spark $SPARK_VERSION"),
+        `type` = Some("MANAGED"),
+        storage_properties = None,
+        provider = Some("parquet"),
+        bucket_columns = Some(Nil),
+        sort_columns = Some(Nil),
+        comment = Some("table_comment"),
+        serde_library = if (getProvider() == "hive") {
+          Some("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")
+        } else {
+          None
+        },
+        table_properties = None
+      )
+      assert(parsedOutput.location.isDefined)
+      assert(iso8601Regex.matches(parsedOutput.created_time.get))
+      assert(expectedOutput == parsedOutput.copy(location = None, created_time = None))
+    }
+  }
+
+  test("DESCRIBE AS JSON view") {
+    Seq(true, false).foreach { isTemp =>
+      withNamespaceAndTable("ns", "table") { t =>
+        withView("view") {
+          val tableCreationStr =
+            s"""
+               |CREATE TABLE $t (id INT, name STRING, created_at TIMESTAMP)
+               |  USING parquet
+               |  OPTIONS ('compression' 'snappy')
+               |  CLUSTERED BY (id, name) SORTED BY (created_at) INTO 4 BUCKETS
+               |  COMMENT 'test temp view'
+               |  TBLPROPERTIES ('parquet.encryption' = 'true')
+               |""".stripMargin
+          spark.sql(tableCreationStr)
+          val viewType = if (isTemp) "TEMP VIEW" else "VIEW"
+          spark.sql(s"CREATE $viewType view AS SELECT * FROM $t")
+          val descriptionDf = spark.sql(s"DESCRIBE EXTENDED view AS JSON")
+          val firstRow = descriptionDf.select("json_metadata").head()
+          val jsonValue = firstRow.getString(0)
+          val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
+
+          val expectedOutput = DescribeTableJson(
+            table_name = Some("view"),
+            catalog_name = if (isTemp) Some("system") else Some("spark_catalog"),
+            namespace = if (isTemp) Some(List("session")) else Some(List("default")),
+            schema_name = if (isTemp) Some("session") else Some("default"),
+            columns = Some(List(
+              TableColumn("id", Type("int")),
+              TableColumn("name", Type("string")),
+              TableColumn("created_at", Type("timestamp_ltz"))
+            )),
+            last_access = Some("UNKNOWN"),
+            created_by = Some(s"Spark $SPARK_VERSION"),
+            `type` = Some("VIEW"),
+            view_text = Some("SELECT * FROM spark_catalog.ns.table"),
+            view_original_text = if (isTemp) None else Some("SELECT * FROM spark_catalog.ns.table"),
+            // TODO: this is unexpected and temp view should also use COMPENSATION mode.
+            view_schema_mode = if (isTemp) Some("BINDING") else Some("COMPENSATION"),
+            view_catalog_and_namespace = Some("spark_catalog.default"),
+            view_query_output_columns = Some(List("id", "name", "created_at"))
+          )
+
+          assert(iso8601Regex.matches(parsedOutput.created_time.get))
+          assert(expectedOutput == parsedOutput.copy(
+            created_time = None,
+            table_properties = None,
+            storage_properties = None,
+            serde_library = None))
+        }
+      }
+    }
+  }
+
+  test("DESCRIBE AS JSON for column throws Analysis Exception") {
+    withNamespaceAndTable("ns", "table") { t =>
+      val tableCreationStr =
+        s"""
+           |CREATE TABLE ns.table(
+           |        cust_id INT,
+           |        state VARCHAR(20),
+           |        name STRING COMMENT "Short name"
+           |    )
+           |    USING parquet
+           |    PARTITIONED BY (state)
+           |""".stripMargin
+      spark.sql(tableCreationStr)
+      spark.sql("INSERT INTO ns.table PARTITION (state = \"CA\") VALUES (100, \"Jane\")")
+      val error = intercept[AnalysisException] {
+        spark.sql("DESCRIBE FORMATTED ns.table ns.table.name AS JSON")
+      }
+
+      checkError(
+        exception = error,
+        condition = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_JSON")
+    }
+  }
+
+  test("DESCRIBE AS JSON complex types") {
+    withNamespaceAndTable("ns", "table") { t =>
+      val tableCreationStr =
+        s"""
+           |CREATE TABLE $t (
+           |  id STRING,
+           |  logs VARIANT,
+           |  nested_struct STRUCT<
+           |    name: STRING,
+           |    age: INT,
+           |    contact: STRUCT<
+           |      email: STRING,
+           |      phone_numbers: ARRAY<STRING>,
+           |      addresses: ARRAY<STRUCT<
+           |        street: STRING,
+           |        city: STRING,
+           |        zip: INT
+           |      >>
+           |    >
+           |  >,
+           |  preferences MAP<STRING, ARRAY<STRING>>
+           |) USING parquet
+           |  OPTIONS (option1 'value1', option2 'value2')
+           |  PARTITIONED BY (id)
+           |  COMMENT 'A table with nested complex types'
+           |  TBLPROPERTIES ('property1' = 'value1', 'password' = 'password')
+        """.stripMargin
+      spark.sql(tableCreationStr)
+      val descriptionDf = spark.sql(s"DESCRIBE EXTENDED $t AS JSON")
+      val firstRow = descriptionDf.select("json_metadata").head()
+      val jsonValue = firstRow.getString(0)
+      val parsedOutput = parse(jsonValue).extract[DescribeTableJson]
+
+      val expectedOutput = DescribeTableJson(
+        table_name = Some("table"),
+        catalog_name = Some("spark_catalog"),
+        namespace = Some(List("ns")),
+        schema_name = Some("ns"),
+        columns = Some(List(
+          TableColumn(
+            name = "logs",
+            `type` = Type("variant"),
+            default = None
+          ),
+          TableColumn(
+            name = "nested_struct",
+            `type` = Type(
+              name = "struct",
+              fields = Some(List(
+                Field(
+                  name = "name",
+                  `type` = Type("string")
+                ),
+                Field(
+                  name = "age",
+                  `type` = Type("int")
+                ),
+                Field(
+                  name = "contact",
+                  `type` = Type(
+                    name = "struct",
+                    fields = Some(List(
+                      Field(
+                        name = "email",
+                        `type` = Type("string")
+                      ),
+                      Field(
+                        name = "phone_numbers",
+                        `type` = Type(
+                          name = "array",
+                          element_type = Some(Type("string")),
+                          element_nullable = Some(true)
+                        )
+                      ),
+                      Field(
+                        name = "addresses",
+                        `type` = Type(
+                          name = "array",
+                          element_type = Some(Type(
+                            name = "struct",
+                            fields = Some(List(
+                              Field(
+                                name = "street",
+                                `type` = Type("string")
+                              ),
+                              Field(
+                                name = "city",
+                                `type` = Type("string")
+                              ),
+                              Field(
+                                name = "zip",
+                                `type` = Type("int")
+                              )
+                            ))
+                          )),
+                          element_nullable = Some(true)
+                        )
+                      )
+                    ))
+                  )
+                )
+              ))
+            ),
+            default = None
+          ),
+          TableColumn(
+            name = "preferences",
+            `type` = Type(
+              name = "map",
+              key_type = Some(Type("string")),
+              value_type = Some(Type(
+                name = "array",
+                element_type = Some(Type("string")),
+                element_nullable = Some(true)
+              )),
+              value_nullable = Some(true)
+            ),
+            default = None
+          ),
+          TableColumn(
+            name = "id",
+            `type` = Type("string"),
+            default = None
+          )
+        )),
+        serde_library = if (getProvider() == "hive") {
+          Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
+        } else {
+          None
+        },
+        storage_properties = Some(Map(
+          "option1" -> "value1",
+          "option2" -> "value2"
+        )),
+        last_access = Some("UNKNOWN"),
+        created_by = Some(s"Spark $SPARK_VERSION"),
+        `type` = Some("MANAGED"),
+        provider = Some("parquet"),
+        comment = Some("A table with nested complex types"),
+        table_properties = Some(Map(
+          "password" -> "*********(redacted)",
+          "property1" -> "value1"
+        )),
+        partition_provider = Some("Catalog"),
+        partition_columns = Some(List("id"))
+      )
+
+      assert(parsedOutput.location.isDefined)
+      assert(iso8601Regex.matches(parsedOutput.created_time.get))
+      assert(expectedOutput == parsedOutput.copy(location = None, created_time = None))
+    }
+  }
 }
 
 /**
@@ -218,6 +629,7 @@ class DescribeTableSuite extends DescribeTableSuiteBase with CommandSuiteBase {
         " PARTITIONED BY (id)" +
         " TBLPROPERTIES ('bar'='baz')" +
         " COMMENT 'this is a test table'" +
+        " DEFAULT COLLATION unicode" +
         " LOCATION 'file:/tmp/testcat/table_name'")
       val descriptionDf = spark.sql(s"DESCRIBE TABLE EXTENDED $tbl")
       assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
@@ -241,6 +653,7 @@ class DescribeTableSuite extends DescribeTableSuiteBase with CommandSuiteBase {
           Row("Type", "EXTERNAL", ""),
           Row("Provider", getProvider(), ""),
           Row("Comment", "this is a test table", ""),
+          Row("Collation", "UNICODE", ""),
           Row("Table Properties", "[bar=baz]", ""),
           Row("Location", "file:/tmp/testcat/table_name", ""),
           Row("Partition Provider", "Catalog", "")))
@@ -275,3 +688,63 @@ class DescribeTableSuite extends DescribeTableSuiteBase with CommandSuiteBase {
     }
   }
 }
+
+/** Represents JSON output of DESCRIBE TABLE AS JSON */
+case class DescribeTableJson(
+    table_name: Option[String] = None,
+    catalog_name: Option[String] = None,
+    namespace: Option[List[String]] = Some(Nil),
+    schema_name: Option[String] = None,
+    columns: Option[List[TableColumn]] = Some(Nil),
+    created_time: Option[String] = None,
+    last_access: Option[String] = None,
+    created_by: Option[String] = None,
+    `type`: Option[String] = None,
+    provider: Option[String] = None,
+    bucket_columns: Option[List[String]] = Some(Nil),
+    sort_columns: Option[List[String]] = Some(Nil),
+    comment: Option[String] = None,
+    table_properties: Option[Map[String, String]] = None,
+    location: Option[String] = None,
+    serde_library: Option[String] = None,
+    storage_properties: Option[Map[String, String]] = None,
+    partition_provider: Option[String] = None,
+    partition_columns: Option[List[String]] = Some(Nil),
+    partition_values: Option[Map[String, String]] = None,
+    view_text: Option[String] = None,
+    view_original_text: Option[String] = None,
+    view_schema_mode: Option[String] = None,
+    view_catalog_and_namespace: Option[String] = None,
+    view_query_output_columns: Option[List[String]] = None
+  )
+
+/** Used for columns field of DescribeTableJson */
+case class TableColumn(
+  name: String,
+  `type`: Type,
+  element_nullable: Boolean = true,
+  comment: Option[String] = None,
+  default: Option[String] = None
+)
+
+case class Type(
+   name: String,
+   fields: Option[List[Field]] = None,
+   `type`: Option[Type] = None,
+   element_type: Option[Type] = None,
+   key_type: Option[Type] = None,
+   value_type: Option[Type] = None,
+   comment: Option[String] = None,
+   default: Option[String] = None,
+   element_nullable: Option[Boolean] = Some(true),
+   value_nullable: Option[Boolean] = Some(true),
+   nullable: Option[Boolean] = Some(true)
+)
+
+case class Field(
+  name: String,
+  `type`: Type,
+  element_nullable: Boolean = true,
+  comment: Option[String] = None,
+  default: Option[String] = None
+)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index 92bea4d8655c5..9d353fde898f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -207,7 +207,7 @@ class ShowTablesSuite extends ShowTablesSuiteBase with CommandSuiteBase {
              |View Original Text: SELECT id FROM $catalog.$namespace.$table
              |View Schema Mode: COMPENSATION
              |View Catalog and Namespace: $catalog.$namespace
-             |View Query Output Columns: [id]
+             |View Query Output Columns: [`id`]
              |Schema: root
              | |-- id: integer (nullable = true)""".stripMargin
         assert(actualResult === expectedResult)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
index d66dca20d77b8..5719fbee370a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
@@ -53,8 +53,8 @@ class ShowTablesSuite extends command.ShowTablesSuiteBase with CommandSuiteBase
       catalog: String,
       namespace: String,
       table: String): (String, Map[String, String]) = {
-    ("_LEGACY_ERROR_TEMP_1231",
-      Map("key" -> "id", "tblName" -> s"`$catalog`.`$namespace`.`$table`"))
+    ("PARTITIONS_NOT_FOUND",
+      Map("partitionList" -> "`id`", "tableName" -> s"`$catalog`.`$namespace`.`$table`"))
   }
 
   protected override def namespaceKey: String = "Namespace"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceResolverSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceResolverSuite.scala
new file mode 100644
index 0000000000000..016c1e2f5457d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceResolverSuite.scala
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.analysis.resolver.{MetadataResolver, Resolver}
+import org.apache.spark.sql.catalyst.catalog.UnresolvedCatalogRelation
+import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
+
+class DataSourceResolverSuite extends QueryTest with SharedSparkSession {
+  private val keyValueTableSchema = StructType(
+    Seq(
+      StructField("key", IntegerType, true),
+      StructField("value", StringType, true)
+    )
+  )
+
+  test("CSV relation") {
+    withTable("src_csv") {
+      spark.sql("CREATE TABLE src_csv (key INT, value STRING) USING CSV;").collect()
+
+      checkResolveOperator(
+        sqlText = "SELECT * FROM src_csv",
+        expectedTableName = "spark_catalog.default.src_csv",
+        expectedTableSchema = keyValueTableSchema
+      )
+    }
+  }
+
+  test("JSON relation") {
+    withTable("src_json") {
+      spark.sql("CREATE TABLE src_json (key INT, value STRING) USING JSON;").collect()
+
+      checkResolveOperator(
+        sqlText = "SELECT * FROM src_json",
+        expectedTableName = "spark_catalog.default.src_json",
+        expectedTableSchema = keyValueTableSchema
+      )
+    }
+  }
+
+  test("PARQUET relation") {
+    withTable("src_parquet") {
+      spark.sql("CREATE TABLE src_parquet (key INT, value STRING) USING PARQUET;").collect()
+
+      checkResolveOperator(
+        sqlText = "SELECT * FROM src_parquet",
+        expectedTableName = "spark_catalog.default.src_parquet",
+        expectedTableSchema = keyValueTableSchema
+      )
+    }
+  }
+
+  test("ORC relation") {
+    withTable("src_orc") {
+      spark.sql("CREATE TABLE src_orc (key INT, value STRING) USING ORC;").collect()
+
+      checkResolveOperator(
+        sqlText = "SELECT * FROM src_orc",
+        expectedTableName = "spark_catalog.default.src_orc",
+        expectedTableSchema = keyValueTableSchema
+      )
+    }
+  }
+
+  private def checkResolveOperator(
+      sqlText: String,
+      expectedTableName: String,
+      expectedTableSchema: StructType) = {
+    val metadataResolver = new MetadataResolver(
+      spark.sessionState.catalogManager,
+      Resolver.createRelationResolution(spark.sessionState.catalogManager)
+    )
+    val dataSourceResolver = new DataSourceResolver(spark)
+
+    val unresolvedPlan = spark.sql(sqlText).queryExecution.logical
+
+    metadataResolver.resolve(unresolvedPlan)
+
+    val unresolvedRelations = unresolvedPlan.collect {
+      case unresolvedRelation: UnresolvedRelation => unresolvedRelation
+    }
+    assert(unresolvedRelations.size == 1)
+
+    val partiallyResolvedRelation = metadataResolver
+      .getRelationWithResolvedMetadata(unresolvedRelations.head)
+      .get
+      .asInstanceOf[SubqueryAlias]
+      .child
+    assert(partiallyResolvedRelation.isInstanceOf[UnresolvedCatalogRelation])
+
+    val result = dataSourceResolver.resolveOperator(partiallyResolvedRelation)
+
+    val logicalRelation = result.asInstanceOf[LogicalRelation]
+    assert(
+      logicalRelation.catalogTable.get.identifier.unquotedString
+      == expectedTableName
+    )
+    assert(logicalRelation.relation.schema == expectedTableSchema)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
index fd9d31e7a594d..d2acdcfc62053 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
 import org.scalatest.PrivateMethodTester
 
+import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.util.Utils
@@ -207,6 +208,18 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester {
       Utils.deleteRecursively(baseDir)
     }
   }
+
+  test("SPARK-50458: Proper error handling for unsupported file system") {
+    val loc = "https://raw.githubusercontent.com/apache/spark/refs/heads/master/examples/" +
+      "src/main/resources/employees.json"
+    checkError(exception = intercept[SparkUnsupportedOperationException](
+      sql(s"CREATE TABLE HTTP USING JSON LOCATION '$loc'")),
+      condition = "FAILED_READ_FILE.UNSUPPORTED_FILE_SYSTEM",
+      parameters = Map(
+        "path" -> loc,
+        "fileSystemClass" -> "org.apache.hadoop.fs.http.HttpsFileSystem",
+        "method" -> "listStatus"))
+  }
 }
 
 object TestPaths {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileResolverSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileResolverSuite.scala
new file mode 100644
index 0000000000000..1d1b228028bdb
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileResolverSuite.scala
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.plans.logical.Project
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{LongType, StringType, StructType}
+
+class FileResolverSuite extends QueryTest with SharedSparkSession {
+  private val tableSchema = new StructType().add("id", LongType)
+  private val csvTableSchema = new StructType().add("_c0", StringType)
+
+  test("JSON file format") {
+    val df = spark.range(100).toDF()
+    withTempPath(f => {
+      df.write.json(f.getCanonicalPath)
+      checkResolveOperator(
+        sqlText = s"select id from json.`${f.getCanonicalPath}`",
+        expectedTablePath = s"file:${f.getCanonicalPath}",
+        expectedTableSchema = tableSchema
+      )
+    })
+  }
+
+  test("PARQUET file format") {
+    val df = spark.range(100).toDF()
+    withTempPath(f => {
+      df.write.parquet(f.getCanonicalPath)
+      checkResolveOperator(
+        sqlText = s"select id from parquet.`${f.getCanonicalPath}`",
+        expectedTablePath = s"file:${f.getCanonicalPath}",
+        expectedTableSchema = tableSchema
+      )
+    })
+  }
+
+  test("ORC file format") {
+    val df = spark.range(100).toDF()
+    withTempPath(f => {
+      df.write.orc(f.getCanonicalPath)
+      checkResolveOperator(
+        sqlText = s"select id from ORC.`${f.getCanonicalPath}`",
+        expectedTablePath = s"file:${f.getCanonicalPath}",
+        expectedTableSchema = tableSchema
+      )
+    })
+  }
+
+  test("CSV file format") {
+    val df = spark.range(100).toDF()
+    withTempPath(f => {
+      df.write.csv(f.getCanonicalPath)
+      checkResolveOperator(
+        sqlText = s"select _c0 from csv.`${f.getCanonicalPath}`",
+        expectedTablePath = s"file:${f.getCanonicalPath}",
+        expectedTableSchema = csvTableSchema
+      )
+    })
+  }
+
+  private def checkResolveOperator(
+      sqlText: String,
+      expectedTablePath: String,
+      expectedTableSchema: StructType) = {
+    val fileResolver = new FileResolver(spark)
+
+    val unresolvedPlan = spark.sql(sqlText).queryExecution.logical
+
+    val result = fileResolver.resolveOperator(
+      unresolvedPlan.asInstanceOf[Project].child.asInstanceOf[UnresolvedRelation]
+    )
+
+    val logicalRelation = result.asInstanceOf[LogicalRelation]
+    assert(
+      logicalRelation.relation.asInstanceOf[HadoopFsRelation].location.rootPaths.mkString(",") ==
+      expectedTablePath
+    )
+    assert(logicalRelation.relation.schema == expectedTableSchema)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PushVariantIntoScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PushVariantIntoScanSuite.scala
new file mode 100644
index 0000000000000..2a866dcd66f06
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PushVariantIntoScanSuite.scala
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.variant._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+class PushVariantIntoScanSuite extends SharedSparkSession {
+  override def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.PUSH_VARIANT_INTO_SCAN.key, "true")
+
+  private def localTimeZone = spark.sessionState.conf.sessionLocalTimeZone
+
+  // Return a `StructField` with the expected `VariantMetadata`.
+  private def field(ordinal: Int, dataType: DataType, path: String,
+                    failOnError: Boolean = true, timeZone: String = localTimeZone): StructField =
+    StructField(ordinal.toString, dataType,
+      metadata = VariantMetadata(path, failOnError, timeZone).toMetadata)
+
+  // Validate an `Alias` expression has the expected name and child.
+  private def checkAlias(expr: Expression, expectedName: String, expected: Expression): Unit = {
+    expr match {
+      case Alias(child, name) =>
+        assert(name == expectedName)
+        assert(child == expected)
+      case _ => fail()
+    }
+  }
+
+  private def testOnFormats(fn: String => Unit): Unit = {
+    for (format <- Seq("PARQUET")) {
+      test("test - " + format) {
+        withTable("T") {
+          fn(format)
+        }
+      }
+    }
+  }
+
+  testOnFormats { format =>
+    sql("create table T (v variant, vs struct<v1 variant, v2 variant, i int>, " +
+      "va array<variant>, vd variant default parse_json('1')) " +
+      s"using $format")
+
+    sql("select variant_get(v, '$.a', 'int') as a, v, cast(v as struct<b float>) as v from T")
+      .queryExecution.optimizedPlan match {
+      case Project(projectList, l: LogicalRelation) =>
+        val output = l.output
+        val v = output(0)
+        checkAlias(projectList(0), "a", GetStructField(v, 0))
+        checkAlias(projectList(1), "v", GetStructField(v, 1))
+        checkAlias(projectList(2), "v", GetStructField(v, 2))
+        assert(v.dataType == StructType(Array(
+          field(0, IntegerType, "$.a"),
+          field(1, VariantType, "$", timeZone = "UTC"),
+          field(2, StructType(Array(StructField("b", FloatType))), "$"))))
+      case _ => fail()
+    }
+
+    sql("select 1 from T where isnotnull(v)")
+      .queryExecution.optimizedPlan match {
+      case Project(projectList, Filter(condition, l: LogicalRelation)) =>
+        val output = l.output
+        val v = output(0)
+        checkAlias(projectList(0), "1", Literal(1))
+        assert(condition == IsNotNull(v))
+        assert(v.dataType == StructType(Array(
+          field(0, BooleanType, "$.__placeholder_field__", failOnError = false, timeZone = "UTC"))))
+      case _ => fail()
+    }
+
+    sql("select variant_get(v, '$.a', 'int') + 1 as a, try_variant_get(v, '$.b', 'string') as b " +
+      "from T where variant_get(v, '$.a', 'int') = 1").queryExecution.optimizedPlan match {
+      case Project(projectList, Filter(condition, l: LogicalRelation)) =>
+        val output = l.output
+        val v = output(0)
+        checkAlias(projectList(0), "a", Add(GetStructField(v, 0), Literal(1)))
+        checkAlias(projectList(1), "b", GetStructField(v, 1))
+        assert(condition == And(IsNotNull(v), EqualTo(GetStructField(v, 0), Literal(1))))
+        assert(v.dataType == StructType(Array(
+          field(0, IntegerType, "$.a"),
+          field(1, StringType, "$.b", failOnError = false))))
+      case _ => fail()
+    }
+
+    sql("select variant_get(vs.v1, '$.a', 'int') as a, variant_get(vs.v1, '$.b', 'int') as b, " +
+      "variant_get(vs.v2, '$.a', 'int') as a, vs.i from T").queryExecution.optimizedPlan match {
+      case Project(projectList, l: LogicalRelation) =>
+        val output = l.output
+        val vs = output(1)
+        val v1 = GetStructField(vs, 0, Some("v1"))
+        val v2 = GetStructField(vs, 1, Some("v2"))
+        checkAlias(projectList(0), "a", GetStructField(v1, 0))
+        checkAlias(projectList(1), "b", GetStructField(v1, 1))
+        checkAlias(projectList(2), "a", GetStructField(v2, 0))
+        checkAlias(projectList(3), "i", GetStructField(vs, 2, Some("i")))
+        assert(vs.dataType == StructType(Array(
+          StructField("v1", StructType(Array(
+            field(0, IntegerType, "$.a"), field(1, IntegerType, "$.b")))),
+          StructField("v2", StructType(Array(field(0, IntegerType, "$.a")))),
+          StructField("i", IntegerType))))
+      case _ => fail()
+    }
+
+    def variantGet(child: Expression): Expression = VariantGet(
+      child,
+      path = Literal("$.a"),
+      targetType = VariantType,
+      failOnError = true,
+      timeZoneId = Some(localTimeZone))
+
+    // No push down if the struct containing variant is used.
+    sql("select vs, variant_get(vs.v1, '$.a') as a from T").queryExecution.optimizedPlan match {
+      case Project(projectList, l: LogicalRelation) =>
+        val output = l.output
+        val vs = output(1)
+        assert(projectList(0) == vs)
+        checkAlias(projectList(1), "a", variantGet(GetStructField(vs, 0, Some("v1"))))
+        assert(vs.dataType == StructType(Array(
+          StructField("v1", VariantType),
+          StructField("v2", VariantType),
+          StructField("i", IntegerType))))
+      case _ => fail()
+    }
+
+    // No push down for variant in array.
+    sql("select variant_get(va[0], '$.a') as a from T").queryExecution.optimizedPlan match {
+      case Project(projectList, l: LogicalRelation) =>
+        val output = l.output
+        val va = output(2)
+        checkAlias(projectList(0), "a", variantGet(GetArrayItem(va, Literal(0))))
+        assert(va.dataType == ArrayType(VariantType))
+      case _ => fail()
+    }
+
+    // No push down if variant has default value.
+    sql("select variant_get(vd, '$.a') as a from T").queryExecution.optimizedPlan match {
+      case Project(projectList, l: LogicalRelation) =>
+        val output = l.output
+        val vd = output(3)
+        checkAlias(projectList(0), "a", variantGet(vd))
+        assert(vd.dataType == VariantType)
+      case _ => fail()
+    }
+  }
+
+  test("No push down for JSON") {
+    withTable("T") {
+      sql("create table T (v variant) using JSON")
+      sql("select variant_get(v, '$.a') from T").queryExecution.optimizedPlan match {
+        case Project(_, l: LogicalRelation) =>
+          val output = l.output
+          assert(output(0).dataType == VariantType)
+        case _ => fail()
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 7cacd8ea2dc50..850e887ac8e75 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -3078,6 +3078,23 @@ abstract class CSVSuite
     }
   }
 
+  test("SPARK-50616: We can write with a tsv file extension") {
+    withTempPath { path =>
+      val input = Seq(
+        "1423-11-12T23:41:00",
+        "1765-03-28",
+        "2016-01-28T20:00:00"
+      ).toDF().repartition(1)
+      input.write.option("extension", "tsv").csv(path.getAbsolutePath)
+
+      val files = Files.list(path.toPath)
+        .iterator().asScala.map(x => x.getFileName.toString)
+        .toList.filter(x => x.takeRight(3).equals("tsv"))
+
+      assert(files.size == 1)
+    }
+  }
+
   test("SPARK-39904: Parse incorrect timestamp values") {
     withTempPath { path =>
       Seq(
@@ -3308,7 +3325,7 @@ abstract class CSVSuite
   }
 
   test("SPARK-40667: validate CSV Options") {
-    assert(CSVOptions.getAllOptions.size == 39)
+    assert(CSVOptions.getAllOptions.size == 40)
     // Please add validation on any new CSV options here
     assert(CSVOptions.isValidOption("header"))
     assert(CSVOptions.isValidOption("inferSchema"))
@@ -3347,6 +3364,7 @@ abstract class CSVSuite
     assert(CSVOptions.isValidOption("compression"))
     assert(CSVOptions.isValidOption("codec"))
     assert(CSVOptions.isValidOption("sep"))
+    assert(CSVOptions.isValidOption("extension"))
     assert(CSVOptions.isValidOption("delimiter"))
     assert(CSVOptions.isValidOption("columnPruning"))
     // Please add validation on any new parquet options with alternative here
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index 500c0647bcb2a..bf9740970a667 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -28,14 +28,13 @@ import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument, SearchA
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder
 
 import org.apache.spark.{SparkConf, SparkException, SparkRuntimeException}
-import org.apache.spark.sql.{AnalysisException, DataFrame, Row}
+import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Row}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
 import org.apache.spark.sql.functions.col
-import org.apache.spark.sql.internal.ExpressionUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -47,7 +46,7 @@ import org.apache.spark.util.ArrayImplicits._
  */
 @ExtendedSQLTest
 class OrcFilterSuite extends OrcTest with SharedSparkSession {
-  import testImplicits.toRichColumn
+  import testImplicits.{toRichColumn, ColumnConstructorExt}
 
   override protected def sparkConf: SparkConf =
     super
@@ -60,8 +59,8 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
       checker: (SearchArgument) => Unit): Unit = {
     val output = predicate.collect { case a: Attribute => a }.distinct
     val query = df
-      .select(output.map(e => ExpressionUtils.column(e)): _*)
-      .where(ExpressionUtils.column(predicate))
+      .select(output.map(e => Column(e)): _*)
+      .where(Column(predicate))
 
     query.queryExecution.optimizedPlan match {
       case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _, _, _)) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
index b8669ee4d1ef1..9fbc872ad262b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
@@ -28,10 +28,10 @@ import org.scalatest.BeforeAndAfterAll
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Predicate}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.execution.datasources.FileBasedDataSourceTest
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
-import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
 import org.apache.spark.util.ArrayImplicits._
@@ -118,8 +118,8 @@ trait OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfter
       (implicit df: DataFrame): Unit = {
     val output = predicate.collect { case a: Attribute => a }.distinct
     val query = df
-      .select(output.map(e => column(e)): _*)
-      .where(predicate)
+      .select(output.map(e => Column(e)): _*)
+      .where(Column(predicate))
 
     query.queryExecution.optimizedPlan match {
       case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _, _, _)) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
index 5260ebf15e4f3..8018417f923af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
@@ -21,12 +21,12 @@ import scala.jdk.CollectionConverters._
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.{Column, DataFrame}
 import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Predicate}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.execution.datasources.orc.OrcShimUtils.{Operator, SearchArgument}
-import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.tags.ExtendedSQLTest
 
@@ -44,8 +44,8 @@ class OrcV1FilterSuite extends OrcFilterSuite {
       checker: (SearchArgument) => Unit): Unit = {
     val output = predicate.collect { case a: Attribute => a }.distinct
     val query = df
-      .select(output.map(e => column(e)): _*)
-      .where(predicate)
+      .select(output.map(e => Column(e)): _*)
+      .where(Column(predicate))
 
     var maybeRelation: Option[HadoopFsRelation] = None
     val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
@@ -90,8 +90,8 @@ class OrcV1FilterSuite extends OrcFilterSuite {
       (implicit df: DataFrame): Unit = {
     val output = predicate.collect { case a: Attribute => a }.distinct
     val query = df
-      .select(output.map(e => column(e)): _*)
-      .where(predicate)
+      .select(output.map(e => Column(e)): _*)
+      .where(Column(predicate))
 
     var maybeRelation: Option[HadoopFsRelation] = None
     val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 37edb9ea2315e..5f7a0c9e7e749 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -49,7 +49,7 @@ import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsR
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.internal.{ExpressionUtils, LegacyBehaviorPolicy, SQLConf}
+import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
 import org.apache.spark.sql.internal.LegacyBehaviorPolicy.{CORRECTED, LEGACY}
 import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType.{INT96, TIMESTAMP_MICROS, TIMESTAMP_MILLIS}
 import org.apache.spark.sql.test.SharedSparkSession
@@ -2233,6 +2233,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
 
 @ExtendedSQLTest
 class ParquetV1FilterSuite extends ParquetFilterSuite {
+  import testImplicits.ColumnConstructorExt
+
   override protected def sparkConf: SparkConf =
     super
       .sparkConf
@@ -2260,8 +2262,8 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
         SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
         SQLConf.NESTED_PREDICATE_PUSHDOWN_FILE_SOURCE_LIST.key -> pushdownDsList) {
         val query = df
-          .select(output.map(ExpressionUtils.column): _*)
-          .where(ExpressionUtils.column(predicate))
+          .select(output.map(Column(_)): _*)
+          .where(Column(predicate))
 
         val nestedOrAttributes = predicate.collectFirst {
           case g: GetStructField => g
@@ -2313,6 +2315,8 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
 
 @ExtendedSQLTest
 class ParquetV2FilterSuite extends ParquetFilterSuite {
+  import testImplicits.ColumnConstructorExt
+
   // TODO: enable Parquet V2 write path after file source V2 writers are workable.
   override protected def sparkConf: SparkConf =
     super
@@ -2339,8 +2343,8 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
       SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> InferFiltersFromConstraints.ruleName,
       SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
       val query = df
-        .select(output.map(ExpressionUtils.column): _*)
-        .where(ExpressionUtils.column(predicate))
+        .select(output.map(Column(_)): _*)
+        .where(Column(predicate))
 
       query.queryExecution.optimizedPlan.collectFirst {
         case PhysicalOperation(_, filters,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 22a02447e720f..bba71f1c48dec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -473,6 +473,26 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
     }
   }
 
+  test("SPARK-50463: Partition values can be read over multiple batches") {
+    withTempDir { dir =>
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_BATCH_SIZE.key -> "1") {
+        val path = dir.getAbsolutePath
+        spark.range(0, 5)
+          .selectExpr("concat(cast(id % 2 as string), 'a') as partCol", "id")
+          .write
+          .format("parquet")
+          .mode("overwrite")
+          .partitionBy("partCol").save(path)
+        val df = spark.read.format("parquet").load(path).selectExpr("partCol")
+        val expected = spark.range(0, 5)
+          .selectExpr("concat(cast(id % 2 as string), 'a') as partCol")
+          .collect()
+
+        checkAnswer(df, expected)
+      }
+    }
+  }
+
   test("SPARK-10301 requested schema clipping - same schema") {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVariantShreddingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVariantShreddingSuite.scala
new file mode 100644
index 0000000000000..8bb5a4b1d0bc5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVariantShreddingSuite.scala
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import java.io.File
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.unsafe.types.VariantVal
+
+/**
+ * Test shredding Variant values in the Parquet reader/writer.
+ */
+class ParquetVariantShreddingSuite extends QueryTest with ParquetTest with SharedSparkSession {
+
+  private def testWithTempDir(name: String)(block: File => Unit): Unit = test(name) {
+    withTempDir { dir =>
+      block(dir)
+    }
+  }
+
+  testWithTempDir("write shredded variant basic") { dir =>
+    val schema = "a int, b string, c decimal(15, 1)"
+    val df = spark.sql(
+      """
+        | select case
+        | when id = 0 then parse_json('{"a": 1, "b": "2", "c": 3.3, "d": 4.4}')
+        | when id = 1 then parse_json('{"a": [1,2,3], "b": "hello", "c": {"x": 0}}')
+        | when id = 2 then parse_json('{"A": 1, "c": 1.23}')
+        | end v from range(3)
+        |""".stripMargin)
+    val fullSchema = "v struct<metadata binary, value binary, typed_value struct<" +
+      "a struct<value binary, typed_value int>, b struct<value binary, typed_value string>," +
+      "c struct<value binary, typed_value decimal(15, 1)>>>"
+    withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString,
+      SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) {
+      df.write.mode("overwrite").parquet(dir.getAbsolutePath)
+
+
+      // Verify that we can read the full variant. The exact binary layout can change before and
+      // after shredding, so just check that the JSON representation matches.
+      checkAnswer(
+        spark.read.parquet(dir.getAbsolutePath).selectExpr("to_json(v)"),
+        df.selectExpr("to_json(v)").collect()
+      )
+
+      // Verify that it was shredded to the expected fields.
+
+      val shreddedDf = spark.read.schema(fullSchema).parquet(dir.getAbsolutePath)
+      // Metadata should be unchanaged.
+      checkAnswer(shreddedDf.selectExpr("v.metadata"),
+        df.collect().map(v => Row(v.get(0).asInstanceOf[VariantVal].getMetadata))
+      )
+
+      // Check typed values.
+      // Second row is not an integer, and third is A, not a
+      checkAnswer(
+        shreddedDf.selectExpr("v.typed_value.a.typed_value"),
+        Seq(Row(1), Row(null), Row(null)))
+      // b is missing from third row.
+      checkAnswer(
+        shreddedDf.selectExpr("v.typed_value.b.typed_value"),
+        Seq(Row("2"), Row("hello"), Row(null)))
+      // Second row is an object, third is the wrong scale. (Note: we may eventually allow the
+      // latter, in which case this test should be updated.)
+      checkAnswer(
+        shreddedDf.selectExpr("v.typed_value.c.typed_value"),
+        Seq(Row(3.3), Row(null), Row(null)))
+
+      // Untyped values are more awkward to check, so for now just check their nullness. We
+      // can do more thorough checking once the reader is ready.
+      checkAnswer(
+        shreddedDf.selectExpr("v.value is null"),
+        // First row has "d" and third has "A".
+        Seq(Row(false), Row(true), Row(false)))
+      checkAnswer(
+        shreddedDf.selectExpr("v.typed_value.a.value is null"),
+        // First row is fully shredded, third is missing.
+        Seq(Row(true), Row(false), Row(true)))
+      checkAnswer(
+        shreddedDf.selectExpr("v.typed_value.b.value is null"),
+        // b is always fully shredded or missing.
+        Seq(Row(true), Row(true), Row(true)))
+      checkAnswer(
+        shreddedDf.selectExpr("v.typed_value.c.value is null"),
+        Seq(Row(true), Row(false), Row(false)))
+      // The a/b/c levels are not null, even if the field is missing.
+      checkAnswer(
+        shreddedDf.selectExpr(
+          "v.typed_value.a is null or v.typed_value.b is null or v.typed_value.c is null"),
+        Seq(Row(false), Row(false), Row(false)))
+    }
+  }
+
+  testWithTempDir("write shredded variant array") { dir =>
+    val schema = "array<int>"
+    val df = spark.sql(
+      """
+        | select case
+        | when id = 0 then parse_json('[1, "2", 3.5, null, 5]')
+        | when id = 1 then parse_json('{"a": [1, 2, 3]}')
+        | when id = 2 then parse_json('1')
+        | when id = 3 then parse_json('null')
+        | end v from range(4)
+        |""".stripMargin)
+    val fullSchema = "v struct<metadata binary, value binary, typed_value array<" +
+      "struct<value binary, typed_value int>>>"
+    withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString,
+      SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) {
+      df.write.mode("overwrite").parquet(dir.getAbsolutePath)
+
+      // Verify that we can read the full variant.
+      checkAnswer(
+        spark.read.parquet(dir.getAbsolutePath).selectExpr("to_json(v)"),
+        df.selectExpr("to_json(v)").collect()
+      )
+
+      // Verify that it was shredded to the expected fields.
+
+      val shreddedDf = spark.read.schema(fullSchema).parquet(dir.getAbsolutePath)
+      // Metadata should be unchanaged.
+      checkAnswer(shreddedDf.selectExpr("v.metadata"),
+        df.collect().map(v => Row(v.get(0).asInstanceOf[VariantVal].getMetadata))
+      )
+
+      // Check typed values.
+      checkAnswer(
+        shreddedDf.selectExpr("v.typed_value.typed_value"),
+        Seq(Row(Array(1, null, null, null, 5)), Row(null), Row(null), Row(null)))
+
+      // All the other array elements should have non-null value.
+      checkAnswer(
+        shreddedDf.selectExpr("transform(v.typed_value.value, v -> v is null)"),
+        Seq(Row(Array(true, false, false, false, true)), Row(null), Row(null), Row(null)))
+
+      // The non-arrays should have non-null top-level value.
+      checkAnswer(
+        shreddedDf.selectExpr("v.value is null"),
+        Seq(Row(true), Row(false), Row(false), Row(false)))
+    }
+  }
+
+  testWithTempDir("write no shredding schema") { dir =>
+    // Check that we can write and read normally when shredding is enabled if
+    // we don't provide a shredding schema.
+    withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString) {
+      val df = spark.sql(
+        """
+          | select parse_json('{"a": ' || id || ', "b": 2}') as v,
+          | array(parse_json('{"c": 3}'), 123::variant) as a
+          | from range(1, 3, 1, 1)
+          |""".stripMargin)
+      df.write.mode("overwrite").parquet(dir.getAbsolutePath)
+      checkAnswer(
+        spark.read.parquet(dir.getAbsolutePath), df.collect()
+      )
+    }
+  }
+
+  testWithTempDir("arrays and maps ignore shredding schema") { dir =>
+    // Check that we don't try to shred array or map elements, even if a shredding schema
+    // is specified.
+    val schema = "a int"
+    val df = spark.sql(
+      """ select v, array(v) as arr, map('myKey', v) as m from
+        | (select parse_json('{"a":' || id || '}') v from range(3))
+        |""".stripMargin)
+    val fullSchema = "v struct<metadata binary, value binary, typed_value struct<" +
+      "a struct<value binary, typed_value int>>>, " +
+      "arr array<struct<metadata binary, value binary>>, " +
+      "m map<string, struct<metadata binary, value binary>>"
+    withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString,
+      SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) {
+      df.write.mode("overwrite").parquet(dir.getAbsolutePath)
+
+      // Verify that we can read the full variant.
+      checkAnswer(
+        spark.read.parquet(dir.getAbsolutePath).selectExpr("to_json(v)"),
+        df.selectExpr("to_json(v)").collect()
+      )
+
+      // Verify that it was shredded to the expected fields.
+
+      val shreddedDf = spark.read.schema(fullSchema).parquet(dir.getAbsolutePath)
+      // Metadata should be unchanaged.
+      checkAnswer(shreddedDf.selectExpr("v.metadata"),
+        df.selectExpr("v").collect().map(v => Row(v.get(0).asInstanceOf[VariantVal].getMetadata))
+      )
+      checkAnswer(shreddedDf.selectExpr("arr[0].metadata"),
+        df.selectExpr("arr[0]").collect().map(v =>
+          Row(v.get(0).asInstanceOf[VariantVal].getMetadata))
+      )
+      checkAnswer(shreddedDf.selectExpr("m['myKey'].metadata"),
+        df.selectExpr("m['myKey']").collect().map(
+          v => Row(v.get(0).asInstanceOf[VariantVal].getMetadata))
+      )
+
+      // v should be fully shredded, but the array and map should not be.
+      checkAnswer(
+        shreddedDf.selectExpr(
+          "v.value is null"),
+        Seq(Row(true), Row(true), Row(true)))
+      checkAnswer(
+        shreddedDf.selectExpr(
+          "arr[0].value is null"),
+        Seq(Row(false), Row(false), Row(false)))
+      checkAnswer(
+        shreddedDf.selectExpr(
+          "m['myKey'].value is null"),
+        Seq(Row(false), Row(false), Row(false)))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
index 0316f09e42ce3..0d18e3bf809e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
@@ -22,7 +22,7 @@ import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.{QueryTest, SparkSession}
 import org.apache.spark.sql.connector.read.ScanBuilder
-import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.text.TextFileFormat
@@ -96,8 +96,8 @@ class FileTableSuite extends QueryTest with SharedSparkSession {
   }
 
   allFileBasedDataSources.foreach { format =>
-    test(s"SPARK-49519: Merge options of table and relation when constructing FileScanBuilder" +
-      s" - $format") {
+    test("SPARK-49519, SPARK-50287: Merge options of table and relation when " +
+      s"constructing ScanBuilder and WriteBuilder in FileFormat - $format") {
       withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
         val userSpecifiedSchema = StructType(Seq(StructField("c1", StringType)))
 
@@ -108,20 +108,29 @@ class FileTableSuite extends QueryTest with SharedSparkSession {
             val table = provider.getTable(
               userSpecifiedSchema,
               Array.empty,
-              dsOptions.asCaseSensitiveMap())
+              dsOptions.asCaseSensitiveMap()).asInstanceOf[FileTable]
             val tableOptions = new CaseInsensitiveStringMap(
               Map("k2" -> "table_v2", "k3" -> "v3").asJava)
-            val mergedOptions = table.asInstanceOf[FileTable].newScanBuilder(tableOptions) match {
+
+            val mergedReadOptions = table.newScanBuilder(tableOptions) match {
               case csv: CSVScanBuilder => csv.options
               case json: JsonScanBuilder => json.options
               case orc: OrcScanBuilder => orc.options
               case parquet: ParquetScanBuilder => parquet.options
               case text: TextScanBuilder => text.options
             }
-            assert(mergedOptions.size() == 3)
-            assert("v1".equals(mergedOptions.get("k1")))
-            assert("table_v2".equals(mergedOptions.get("k2")))
-            assert("v3".equals(mergedOptions.get("k3")))
+            assert(mergedReadOptions.size === 3)
+            assert(mergedReadOptions.get("k1") === "v1")
+            assert(mergedReadOptions.get("k2") === "table_v2")
+            assert(mergedReadOptions.get("k3") === "v3")
+
+            val writeInfo = LogicalWriteInfoImpl("query-id", userSpecifiedSchema, tableOptions)
+            val mergedWriteOptions = table.newWriteBuilder(writeInfo).build()
+              .asInstanceOf[FileWrite].options
+            assert(mergedWriteOptions.size === 3)
+            assert(mergedWriteOptions.get("k1") === "v1")
+            assert(mergedWriteOptions.get("k2") === "table_v2")
+            assert(mergedWriteOptions.get("k3") === "v3")
           case _ =>
             throw new IllegalArgumentException(s"Failed to get table provider for $format")
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceTransformWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceTransformWithStateSuite.scala
index baab6327b35c1..fe224608fffd8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceTransformWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceTransformWithStateSuite.scala
@@ -24,11 +24,12 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.sql.{Encoders, Row}
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, RocksDBFileManager, RocksDBStateStoreProvider, TestClass}
+import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithEncodingTypes, AlsoTestWithRocksDBFeatures, RocksDBFileManager, RocksDBStateStoreProvider, TestClass}
 import org.apache.spark.sql.functions.{col, explode, timestamp_seconds}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.{InputMapRow, ListState, MapInputEvent, MapOutputEvent, MapStateTTLProcessor, MaxEventTimeStatefulProcessor, OutputMode, RunningCountStatefulProcessor, RunningCountStatefulProcessorWithProcTimeTimerUpdates, StatefulProcessor, StateStoreMetricsTest, TestMapStateProcessor, TimeMode, TimerValues, TransformWithStateSuiteUtils, Trigger, TTLConfig, ValueState}
 import org.apache.spark.sql.streaming.util.StreamManualClock
+import org.apache.spark.tags.SlowSQLTest
 import org.apache.spark.util.Utils
 
 /** Stateful processor of single value state var with non-primitive type */
@@ -125,8 +126,9 @@ class SessionGroupsStatefulProcessorWithTTL extends
 /**
  * Test suite to verify integration of state data source reader with the transformWithState operator
  */
+@SlowSQLTest
 class StateDataSourceTransformWithStateSuite extends StateStoreMetricsTest
-  with AlsoTestWithChangelogCheckpointingEnabled {
+  with AlsoTestWithRocksDBFeatures with AlsoTestWithEncodingTypes {
 
   import testImplicits._
 
@@ -1075,7 +1077,7 @@ class StateDataSourceTransformWithStateSuite extends StateStoreMetricsTest
       // Read the changelog for one of the partitions at version 3 and
       // ensure that we have two entries
       // For this test - keys 9 and 12 are written at version 3 for partition 4
-      val changelogReader = fileManager.getChangelogReader(3, true)
+      val changelogReader = fileManager.getChangelogReader(3)
       val entries = changelogReader.toSeq
       assert(entries.size == 2)
       val retainEntry = entries.head
@@ -1085,13 +1087,13 @@ class StateDataSourceTransformWithStateSuite extends StateStoreMetricsTest
       Utils.deleteRecursively(new File(changelogFilePath))
 
       // Write the retained entry back to the changelog
-      val changelogWriter = fileManager.getChangeLogWriter(3, true)
+      val changelogWriter = fileManager.getChangeLogWriter(3)
       changelogWriter.put(retainEntry._2, retainEntry._3)
       changelogWriter.commit()
 
       // Ensure that we have only one entry in the changelog for version 3
       // For this test - key 9 is retained and key 12 is deleted
-      val changelogReader1 = fileManager.getChangelogReader(3, true)
+      val changelogReader1 = fileManager.getChangelogReader(3)
       val entries1 = changelogReader1.toSeq
       assert(entries1.size == 1)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
index fe910c21cb0c6..560292b263ba3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
@@ -1930,7 +1930,7 @@ class XmlSuite
     }
 
     checkXmlOptionErrorMessage(Map.empty,
-      "[XML_ROW_TAG_MISSING] `rowTag` option is required for reading files in XML format.",
+      "[XML_ROW_TAG_MISSING] `rowTag` option is required for reading/writing files in XML format.",
       QueryCompilationErrors.xmlRowTagRequiredError(XmlOptions.ROW_TAG).getCause)
     checkXmlOptionErrorMessage(Map("rowTag" -> ""),
       "'rowTag' option should not be an empty string.")
@@ -1951,6 +1951,20 @@ class XmlSuite
       .xml(spark.createDataset(Seq(xmlString)))
   }
 
+  test("SPARK-50688: rowTag requirement for write") {
+    withTempDir { dir =>
+      dir.delete()
+      val e = intercept[AnalysisException] {
+        spark.range(1).write.xml(dir.getCanonicalPath)
+      }
+      checkError(
+        exception = e,
+        condition = "XML_ROW_TAG_MISSING",
+        parameters = Map("rowTag" -> "`rowTag`")
+      )
+    }
+  }
+
   test("Primitive field casting") {
     val ts = Seq("2002-05-30 21:46:54", "2002-05-30T21:46:54", "2002-05-30T21:46:54.1234",
       "2002-05-30T21:46:54Z", "2002-05-30T21:46:54.1234Z", "2002-05-30T21:46:54-06:00",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonDataSourceSuite.scala
index 1f2be12058eb7..73c05ff0e0b58 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonDataSourceSuite.scala
@@ -94,6 +94,27 @@ abstract class PythonDataSourceSuiteBase extends QueryTest with SharedSparkSessi
 class PythonDataSourceSuite extends PythonDataSourceSuiteBase {
   import IntegratedUDFTestUtils._
 
+  test("SPARK-50426: should not trigger static Python data source lookup") {
+    assume(shouldTestPandasUDFs)
+    val testAppender = new LogAppender("Python data source lookup")
+    // Using builtin and Java data sources should not trigger a static
+    // Python data source lookup
+    withLogAppender(testAppender) {
+      spark.read.format("org.apache.spark.sql.test").load()
+      spark.range(3).write.mode("overwrite").format("noop").save()
+    }
+    assert(!testAppender.loggingEvents
+      .exists(msg => msg.getMessage.getFormattedMessage.contains(
+        "Loading static Python Data Sources.")))
+    // Now trigger a Python data source lookup
+    withLogAppender(testAppender) {
+      spark.read.format(staticSourceName).load()
+    }
+    assert(testAppender.loggingEvents
+      .exists(msg => msg.getMessage.getFormattedMessage.contains(
+        "Loading static Python Data Sources.")))
+  }
+
   test("SPARK-45917: automatic registration of Python Data Source") {
     assume(shouldTestPandasUDFs)
     val df = spark.read.format(staticSourceName).load()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
index 4b46331be107a..2f44994c301b3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
@@ -91,7 +91,10 @@ class PythonUDFSuite extends QueryTest with SharedSparkSession {
     val pythonSQLMetrics = List(
       "data sent to Python workers",
       "data returned from Python workers",
-      "number of output rows")
+      "number of output rows",
+      "total time to initialize Python workers",
+      "total time to start Python workers",
+      "total time to run Python workers")
 
     val df = base.groupBy(pythonTestUDF(base("a") + 1))
       .agg(pythonTestUDF(pythonTestUDF(base("a") + 1)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServerSuite.scala
index e05264825f773..c3d4541bac29c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServerSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 import org.apache.spark.sql.execution.streaming.{StatefulProcessorHandleImpl, StatefulProcessorHandleState}
 import org.apache.spark.sql.execution.streaming.state.StateMessage
-import org.apache.spark.sql.execution.streaming.state.StateMessage.{AppendList, AppendValue, Clear, ContainsKey, DeleteTimer, Exists, ExpiryTimerRequest, Get, GetProcessingTime, GetValue, GetWatermark, HandleState, Keys, ListStateCall, ListStateGet, ListStatePut, ListTimers, MapStateCall, RegisterTimer, RemoveKey, SetHandleState, StateCallCommand, StatefulProcessorCall, TimerRequest, TimerStateCallCommand, TimerValueRequest, UpdateValue, Values, ValueStateCall, ValueStateUpdate}
+import org.apache.spark.sql.execution.streaming.state.StateMessage.{AppendList, AppendValue, Clear, ContainsKey, DeleteTimer, Exists, ExpiryTimerRequest, Get, GetProcessingTime, GetValue, GetWatermark, HandleState, Keys, ListStateCall, ListStateGet, ListStatePut, ListTimers, MapStateCall, ParseStringSchema, RegisterTimer, RemoveKey, SetHandleState, StateCallCommand, StatefulProcessorCall, TimerRequest, TimerStateCallCommand, TimerValueRequest, UpdateValue, UtilsRequest, Values, ValueStateCall, ValueStateUpdate}
 import org.apache.spark.sql.streaming.{ListState, MapState, TTLConfig, ValueState}
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 
@@ -574,6 +574,16 @@ class TransformWithStateInPandasStateServerSuite extends SparkFunSuite with Befo
     verify(arrowStreamWriter).finalizeCurrentArrowBatch()
   }
 
+  test("utils request - parse string schema") {
+    val message = UtilsRequest.newBuilder().setParseStringSchema(
+      ParseStringSchema.newBuilder().setSchema(
+        "value int"
+      ).build()
+    ).build()
+    stateServer.handleUtilsRequest(message)
+    verify(outputStream).writeInt(argThat((x: Int) => x > 0))
+  }
+
   private def getIntegerRow(value: Int): Row = {
     new GenericRowWithSchema(Array(value), stateSchema)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
index e5dfa33164903..5f7205eaf4bbe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLogSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.streaming
 
 import java.io.File
 
+import org.apache.commons.io.FileUtils
+
 import org.apache.spark.sql.catalyst.util.stringToFile
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -129,4 +131,69 @@ class OffsetSeqLogSuite extends SharedSparkSession {
     val log = new OffsetSeqLog(spark, input.toString)
     log.getLatest().get
   }
+
+  // SPARK-50526 - sanity tests to ensure that values are set correctly for state store
+  // encoding format within OffsetSeqMetadata
+  test("offset log records defaults to unsafeRow for store encoding format") {
+    val offsetSeqMetadata = OffsetSeqMetadata.apply(batchWatermarkMs = 0, batchTimestampMs = 0,
+      spark.conf)
+    assert(offsetSeqMetadata.conf.get(SQLConf.STREAMING_STATE_STORE_ENCODING_FORMAT.key) ===
+      Some("unsaferow"))
+  }
+
+  test("offset log uses the store encoding format set in the conf") {
+    val offsetSeqMetadata = OffsetSeqMetadata.apply(batchWatermarkMs = 0, batchTimestampMs = 0,
+      Map(SQLConf.STREAMING_STATE_STORE_ENCODING_FORMAT.key -> "avro"))
+    assert(offsetSeqMetadata.conf.get(SQLConf.STREAMING_STATE_STORE_ENCODING_FORMAT.key) ===
+      Some("avro"))
+  }
+
+  // Verify whether entry exists within the offset log and has the right value or that we pick up
+  // the correct default values when populating the session conf.
+  private def verifyOffsetLogEntry(
+      checkpointDir: String,
+      entryExists: Boolean,
+      encodingFormat: String): Unit = {
+    val log = new OffsetSeqLog(spark, s"$checkpointDir/offsets")
+    val latestBatchId = log.getLatestBatchId()
+    assert(latestBatchId.isDefined, "No offset log entries found in the checkpoint location")
+
+    // Read the latest offset log
+    val offsetSeq = log.get(latestBatchId.get).get
+    val offsetSeqMetadata = offsetSeq.metadata.get
+
+    if (entryExists) {
+      val encodingFormatOpt = offsetSeqMetadata.conf.get(
+        SQLConf.STREAMING_STATE_STORE_ENCODING_FORMAT.key)
+      assert(encodingFormatOpt.isDefined, "No store encoding format found in the offset log entry")
+      assert(encodingFormatOpt.get == encodingFormat)
+    }
+
+    val clonedSqlConf = spark.sessionState.conf.clone()
+    OffsetSeqMetadata.setSessionConf(offsetSeqMetadata, clonedSqlConf)
+    assert(clonedSqlConf.stateStoreEncodingFormat == encodingFormat)
+  }
+
+  // verify that checkpoint created with different store encoding formats are read correctly
+  Seq("unsaferow", "avro").foreach { storeEncodingFormat =>
+    test(s"verify format values from checkpoint loc - $storeEncodingFormat") {
+      withTempDir { checkpointDir =>
+        val resourceUri = this.getClass.getResource(
+        "/structured-streaming/checkpoint-version-4.0.0-tws-" + storeEncodingFormat + "/").toURI
+        FileUtils.copyDirectory(new File(resourceUri), checkpointDir.getCanonicalFile)
+        verifyOffsetLogEntry(checkpointDir.getAbsolutePath, entryExists = true,
+          storeEncodingFormat)
+      }
+    }
+  }
+
+  test("verify format values from old checkpoint with Spark version 3.5.1") {
+    withTempDir { checkpointDir =>
+      val resourceUri = this.getClass.getResource(
+        "/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/").toURI
+      FileUtils.copyDirectory(new File(resourceUri), checkpointDir.getCanonicalFile)
+      verifyOffsetLogEntry(checkpointDir.getAbsolutePath, entryExists = false,
+        "unsaferow")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ListStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ListStateSuite.scala
index 22876831c00d1..bb4343bf32159 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ListStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ListStateSuite.scala
@@ -190,8 +190,8 @@ class ListStateSuite extends StateVariableSuiteBase {
       var ttlValues = testState.getTTLValues()
       assert(ttlValues.nonEmpty)
       assert(ttlValues.forall(_._2 === ttlExpirationMs))
-      var ttlStateValueIterator = testState.getValuesInTTLState()
-      assert(ttlStateValueIterator.hasNext)
+      var ttlStateValue = testState.getValueInTTLState()
+      assert(ttlStateValue.isDefined)
 
       // increment batchProcessingTime, or watermark and ensure expired value is not returned
       val nextBatchHandle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
@@ -212,10 +212,9 @@ class ListStateSuite extends StateVariableSuiteBase {
       ttlValues = nextBatchTestState.getTTLValues()
       assert(ttlValues.nonEmpty)
       assert(ttlValues.forall(_._2 === ttlExpirationMs))
-      ttlStateValueIterator = nextBatchTestState.getValuesInTTLState()
-      assert(ttlStateValueIterator.hasNext)
-      assert(ttlStateValueIterator.next() === ttlExpirationMs)
-      assert(ttlStateValueIterator.isEmpty)
+      ttlStateValue = nextBatchTestState.getValueInTTLState()
+      assert(ttlStateValue.isDefined)
+      assert(ttlStateValue.get === ttlExpirationMs)
 
       // getWithoutTTL should still return the expired value
       assert(nextBatchTestState.getWithoutEnforcingTTL().toSeq === Seq("v1", "v2", "v3"))
@@ -276,8 +275,8 @@ class ListStateSuite extends StateVariableSuiteBase {
       val ttlValues = testState.getTTLValues()
       assert(ttlValues.nonEmpty)
       assert(ttlValues.forall(_._2 === ttlExpirationMs))
-      val ttlStateValueIterator = testState.getValuesInTTLState()
-      assert(ttlStateValueIterator.hasNext)
+      val ttlStateValue = testState.getValueInTTLState()
+      assert(ttlStateValue.isDefined)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreCheckpointFormatV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreCheckpointFormatV2Suite.scala
index 9ac74eb5b9e8f..5725ebaf727bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreCheckpointFormatV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreCheckpointFormatV2Suite.scala
@@ -182,7 +182,7 @@ class CkptIdCollectingStateStoreProviderWrapper extends StateStoreProvider {
 // return their own state store checkpointID. This can happen because of task retry or
 // speculative execution.
 class RocksDBStateStoreCheckpointFormatV2Suite extends StreamTest
-  with AlsoTestWithChangelogCheckpointingEnabled {
+  with AlsoTestWithRocksDBFeatures {
   import testImplicits._
 
   val providerClassName = classOf[CkptIdCollectingStateStoreProviderWrapper].getCanonicalName
@@ -445,11 +445,12 @@ class RocksDBStateStoreCheckpointFormatV2Suite extends StreamTest
     val numBatches = checkpointInfoList.size / 8
 
     // We don't pass batch versions that would need base checkpoint IDs because we don't know
-    // batchIDs for that. We only know that there are 3 batches without it.
+    // batchIDs for that. We only know that there are 1 batches without it.
+    // The two checkpoint IDs in between are stored in the commit log.
     validateCheckpointInfo(numBatches, 4, Set())
     assert(CkptIdCollectingStateStoreWrapper
       .getStateStoreCheckpointInfos
-      .count(_.baseStateStoreCkptId.isDefined) == (numBatches - 3) * 8)
+      .count(_.baseStateStoreCkptId.isDefined) == (numBatches - 1) * 8)
   }
 
   testWithCheckpointInfoTracked(s"checkpointFormatVersion2 validate DropDuplicates") {
@@ -541,4 +542,35 @@ class RocksDBStateStoreCheckpointFormatV2Suite extends StreamTest
     }
     validateCheckpointInfo(6, 1, Set(2, 4, 6))
   }
+
+  test("checkpointFormatVersion2 validate transformWithState") {
+    withTempDir { checkpointDir =>
+      val inputData = MemoryStream[String]
+      val result = inputData.toDS()
+        .groupByKey(x => x)
+        .transformWithState(new RunningCountStatefulProcessor(),
+          TimeMode.None(),
+          OutputMode.Update())
+
+      testStream(result, Update())(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, "a"),
+        CheckNewAnswer(("a", "1")),
+        Execute { q =>
+          assert(q.lastProgress.stateOperators(0).customMetrics.get("numValueStateVars") > 0)
+          assert(q.lastProgress.stateOperators(0).customMetrics.get("numRegisteredTimers") == 0)
+        },
+        AddData(inputData, "a", "b"),
+        CheckNewAnswer(("a", "2"), ("b", "1")),
+        StopStream,
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, "a", "b"), // should remove state for "a" and not return anything for a
+        CheckNewAnswer(("b", "2")),
+        StopStream,
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, "a", "c"), // should recreate state for "a" and return count as 1 and
+        CheckNewAnswer(("a", "1"), ("c", "1"))
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
index d20cfb04f8e81..f170de66ee9df 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.streaming.OutputMode.Update
 import org.apache.spark.util.Utils
 
 class RocksDBStateStoreIntegrationSuite extends StreamTest
-  with AlsoTestWithChangelogCheckpointingEnabled {
+  with AlsoTestWithRocksDBFeatures {
   import testImplicits._
 
   testWithColumnFamilies("RocksDBStateStore",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala
index e1bd9dd38066b..72035ee268cbb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala
@@ -42,7 +42,8 @@ import org.apache.spark.util.Utils
 
 @ExtendedSQLTest
 class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvider]
-  with AlsoTestWithChangelogCheckpointingEnabled
+  with AlsoTestWithRocksDBFeatures
+  with AlsoTestWithEncodingTypes
   with SharedSparkSession
   with BeforeAndAfter {
 
@@ -58,7 +59,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
 
   import StateStoreTestsHelper._
 
-  testWithColumnFamilies(s"version encoding",
+  testWithColumnFamiliesAndEncodingTypes(s"version encoding",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
     import RocksDBStateStoreProvider._
 
@@ -127,7 +128,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies("rocksdb file manager metrics exposed",
+  testWithColumnFamiliesAndEncodingTypes("rocksdb file manager metrics exposed",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
     import RocksDBStateStoreProvider._
     def getCustomMetric(metrics: StateStoreMetrics,
@@ -162,7 +163,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies("rocksdb range scan validation - invalid num columns",
+  testWithColumnFamiliesAndEncodingTypes("rocksdb range scan validation - invalid num columns",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
     // zero ordering cols
     val ex1 = intercept[SparkUnsupportedOperationException] {
@@ -201,7 +202,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     )
   }
 
-  testWithColumnFamilies("rocksdb range scan validation - variable sized columns",
+  testWithColumnFamiliesAndEncodingTypes("rocksdb range scan validation - variable sized columns",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
     val keySchemaWithVariableSizeCols: StructType = StructType(
       Seq(StructField("key1", StringType, false), StructField("key2", StringType, false)))
@@ -224,7 +225,8 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     )
   }
 
-  testWithColumnFamilies("rocksdb range scan validation - variable size data types unsupported",
+  testWithColumnFamiliesAndEncodingTypes(
+    "rocksdb range scan validation - variable size data types unsupported",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
     val keySchemaWithSomeUnsupportedTypeCols: StructType = StructType(Seq(
       StructField("key1", StringType, false),
@@ -264,7 +266,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies("rocksdb range scan validation - null type columns",
+  testWithColumnFamiliesAndEncodingTypes("rocksdb range scan validation - null type columns",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
     val keySchemaWithNullTypeCols: StructType = StructType(
       Seq(StructField("key1", NullType, false), StructField("key2", StringType, false)))
@@ -287,7 +289,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     )
   }
 
-  testWithColumnFamilies("rocksdb range scan - fixed size non-ordering columns",
+  testWithColumnFamiliesAndEncodingTypes("rocksdb range scan - fixed size non-ordering columns",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
 
     tryWithProviderResource(newStoreProvider(keySchemaWithRangeScan,
@@ -339,7 +341,8 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies("rocksdb range scan - variable size non-ordering columns with " +
+  testWithColumnFamiliesAndEncodingTypes(
+    "rocksdb range scan - variable size non-ordering columns with " +
     "double type values are supported",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
 
@@ -395,7 +398,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies("rocksdb range scan - variable size non-ordering columns",
+  testWithColumnFamiliesAndEncodingTypes("rocksdb range scan - variable size non-ordering columns",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
 
     tryWithProviderResource(newStoreProvider(keySchemaWithRangeScan,
@@ -448,7 +451,8 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies("rocksdb range scan multiple ordering columns - variable size " +
+  testWithColumnFamiliesAndEncodingTypes(
+    "rocksdb range scan multiple ordering columns - variable size " +
     s"non-ordering columns",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
 
@@ -492,15 +496,16 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies("rocksdb range scan multiple non-contiguous ordering columns",
+  testWithColumnFamiliesAndEncodingTypes(
+    "rocksdb range scan multiple non-contiguous ordering columns",
     TestWithBothChangelogCheckpointingEnabledAndDisabled ) { colFamiliesEnabled =>
     val testSchema: StructType = StructType(
       Seq(
-        StructField("ordering-1", LongType, false),
+        StructField("ordering1", LongType, false),
         StructField("key2", StringType, false),
-        StructField("ordering-2", IntegerType, false),
-        StructField("string-2", StringType, false),
-        StructField("ordering-3", DoubleType, false)
+        StructField("ordering2", IntegerType, false),
+        StructField("string2", StringType, false),
+        StructField("ordering3", DoubleType, false)
       )
     )
 
@@ -582,7 +587,8 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
   }
 
 
-  testWithColumnFamilies("rocksdb range scan multiple ordering columns - variable size " +
+  testWithColumnFamiliesAndEncodingTypes(
+    "rocksdb range scan multiple ordering columns - variable size " +
     s"non-ordering columns with null values in first ordering column",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
 
@@ -682,7 +688,8 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies("rocksdb range scan multiple ordering columns - variable size " +
+  testWithColumnFamiliesAndEncodingTypes(
+    "rocksdb range scan multiple ordering columns - variable size " +
     s"non-ordering columns with null values in second ordering column",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
 
@@ -735,7 +742,8 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies("rocksdb range scan byte ordering column - variable size " +
+  testWithColumnFamiliesAndEncodingTypes(
+    "rocksdb range scan byte ordering column - variable size " +
     s"non-ordering columns",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
 
@@ -779,7 +787,8 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies("rocksdb range scan - ordering cols and key schema cols are same",
+  testWithColumnFamiliesAndEncodingTypes(
+    "rocksdb range scan - ordering cols and key schema cols are same",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
 
     // use the same schema as value schema for single col key schema
@@ -821,7 +830,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies("rocksdb range scan - with prefix scan",
+  testWithColumnFamiliesAndEncodingTypes("rocksdb range scan - with prefix scan",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
 
     tryWithProviderResource(newStoreProvider(keySchemaWithRangeScan,
@@ -858,7 +867,8 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies("rocksdb key and value schema encoders for column families",
+  testWithColumnFamiliesAndEncodingTypes(
+    "rocksdb key and value schema encoders for column families",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
     val testColFamily = "testState"
 
@@ -919,7 +929,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
   }
 
   /* Column family related tests */
-  testWithColumnFamilies("column family creation with invalid names",
+  testWithColumnFamiliesAndEncodingTypes("column family creation with invalid names",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
     tryWithProviderResource(
       newStoreProvider(useColumnFamilies = colFamiliesEnabled)) { provider =>
@@ -956,7 +966,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies(s"column family creation with reserved chars",
+  testWithColumnFamiliesAndEncodingTypes(s"column family creation with reserved chars",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
     tryWithProviderResource(
       newStoreProvider(useColumnFamilies = colFamiliesEnabled)) { provider =>
@@ -992,7 +1002,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  testWithColumnFamilies(s"operations on absent column family",
+  testWithColumnFamiliesAndEncodingTypes(s"operations on absent column family",
     TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
     tryWithProviderResource(
       newStoreProvider(useColumnFamilies = colFamiliesEnabled)) { provider =>
@@ -1145,7 +1155,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
   Seq(
     NoPrefixKeyStateEncoderSpec(keySchema), PrefixKeyScanStateEncoderSpec(keySchema, 1)
   ).foreach { keyEncoder =>
-    testWithColumnFamilies(s"validate rocksdb " +
+    testWithColumnFamiliesAndEncodingTypes(s"validate rocksdb " +
       s"${keyEncoder.getClass.toString.split('.').last} correctness",
       TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
         tryWithProviderResource(newStoreProvider(keySchema, keyEncoder,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
index 637eb49130305..634a3c9de9011 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.streaming.state
 
 import java.io._
 import java.nio.charset.Charset
+import java.util.UUID
 import java.util.concurrent.Executors
 
 import scala.collection.mutable
@@ -29,19 +30,27 @@ import scala.util.Random
 
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FSDataInputStream, Path}
 import org.rocksdb.CompressionType
 import org.scalactic.source.Position
+import org.scalatest.PrivateMethodTester
 import org.scalatest.Tag
 
-import org.apache.spark.{SparkConf, SparkException, TaskContext}
+import org.apache.spark.{SparkConf, SparkException, SparkFunSuite, TaskContext}
+import org.apache.spark.internal.Logging
+import org.apache.spark.io.CompressionCodec
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
 import org.apache.spark.sql.catalyst.util.quietly
-import org.apache.spark.sql.execution.streaming.{CreateAtomicTestManager, FileSystemBasedCheckpointFileManager}
+import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, CreateAtomicTestManager, FileContextBasedCheckpointFileManager, FileSystemBasedCheckpointFileManager}
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager.{CancellableFSDataOutputStream, RenameBasedFSDataOutputStream}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.STREAMING_CHECKPOINT_FILE_MANAGER_CLASS
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.sql.types._
 import org.apache.spark.tags.SlowSQLTest
+import org.apache.spark.unsafe.Platform
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.{ThreadUtils, Utils}
 import org.apache.spark.util.ArrayImplicits._
 
@@ -62,6 +71,17 @@ class NoOverwriteFileSystemBasedCheckpointFileManager(path: Path, hadoopConf: Co
   }
 }
 
+class TestStateStoreChangelogWriterV101(
+    fm: CheckpointFileManager,
+    file: Path,
+    compressionCodec: CompressionCodec)
+  extends StateStoreChangelogWriterV1(fm, file, compressionCodec) {
+
+  override def version: Short = 101
+
+  writeVersion()
+}
+
 trait RocksDBStateStoreChangelogCheckpointingTestUtil {
   val rocksdbChangelogCheckpointingConfKey: String = RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX +
     ".changelogCheckpointing.enabled"
@@ -71,22 +91,41 @@ trait RocksDBStateStoreChangelogCheckpointingTestUtil {
 
   def snapshotVersionsPresent(dir: File): Seq[Long] = {
     dir.listFiles.filter(_.getName.endsWith(".zip"))
-      .map(_.getName.stripSuffix(".zip"))
-      .map(_.toLong)
+      .map(_.getName.stripSuffix(".zip").split("_"))
+      .map {
+        case Array(version, _) => version.toLong
+        case Array(version) => version.toLong
+      }
       .sorted
       .toImmutableArraySeq
   }
 
   def changelogVersionsPresent(dir: File): Seq[Long] = {
     dir.listFiles.filter(_.getName.endsWith(".changelog"))
-      .map(_.getName.stripSuffix(".changelog"))
-      .map(_.toLong)
+      .map(_.getName.stripSuffix(".changelog").split("_"))
+      .map {
+        case Array(version, _) => version.toLong
+        case Array(version) => version.toLong
+      }
       .sorted
       .toImmutableArraySeq
   }
 }
 
-trait AlsoTestWithChangelogCheckpointingEnabled
+trait AlsoTestWithEncodingTypes extends SQLTestUtils {
+  override protected def test(testName: String, testTags: Tag*)(testBody: => Any)
+                             (implicit pos: Position): Unit = {
+    Seq("unsaferow", "avro").foreach { encoding =>
+      super.test(s"$testName (encoding = $encoding)", testTags: _*) {
+        withSQLConf(SQLConf.STREAMING_STATE_STORE_ENCODING_FORMAT.key -> encoding) {
+          testBody
+        }
+      }
+    }
+  }
+}
+
+trait AlsoTestWithRocksDBFeatures
   extends SQLTestUtils with RocksDBStateStoreChangelogCheckpointingTestUtil {
 
   sealed trait TestMode
@@ -128,6 +167,35 @@ trait AlsoTestWithChangelogCheckpointingEnabled
     }
   }
 
+  def testWithRocksDBStateStore(
+    testName: String, testTags: Tag*)(testBody: => Any): Unit = {
+    super.test(testName, testTags: _*) {
+      // in case tests have any code that needs to execute before every test
+      super.beforeEach()
+      withSQLConf(
+        SQLConf.STATE_STORE_PROVIDER_CLASS.key -> classOf[RocksDBStateStoreProvider].getName) {
+        testBody
+      }
+      // in case tests have any code that needs to execute after every test
+      super.afterEach()
+    }
+  }
+
+  def testWithColumnFamiliesAndEncodingTypes(
+      testName: String,
+      testMode: TestMode = TestWithBothChangelogCheckpointingEnabledAndDisabled)
+      (testBody: Boolean => Any): Unit = {
+    // For each encoding type
+    Seq("unsaferow", "avro").foreach { encoding =>
+      // Call testWithColumnFamilies for each encoding
+      testWithColumnFamilies(s"$testName (encoding = $encoding)", testMode) { colFamiliesEnabled =>
+        withSQLConf(SQLConf.STREAMING_STATE_STORE_ENCODING_FORMAT.key -> encoding) {
+          testBody(colFamiliesEnabled)
+        }
+      }
+    }
+  }
+
   def testWithColumnFamilies(
       testName: String,
       testMode: TestMode,
@@ -162,258 +230,585 @@ trait AlsoTestWithChangelogCheckpointingEnabled
       }
     }
   }
-}
 
-@SlowSQLTest
-class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with SharedSparkSession {
+  def testWithStateStoreCheckpointIdsAndColumnFamilies(
+      testName: String,
+      testMode: TestMode,
+      testTags: Tag*)
+      (testBody: (Boolean, Boolean) => Any): Unit = {
+    Seq(true, false).foreach { enableStateStoreCheckpointIds =>
+      val newTestName = s"$testName - with enableStateStoreCheckpointIds = " +
+        s"$enableStateStoreCheckpointIds"
+      testWithColumnFamilies(newTestName, testMode, testTags: _*) { colFamiliesEnabled =>
+        testBody(enableStateStoreCheckpointIds, colFamiliesEnabled)
+      }
+    }
+  }
 
-  override protected def sparkConf: SparkConf = {
-    super.sparkConf
-      .set(SQLConf.STATE_STORE_PROVIDER_CLASS, classOf[RocksDBStateStoreProvider].getName)
+  def testWithStateStoreCheckpointIds(
+    testName: String,
+    testTags: Tag*)
+    (testBody: Boolean => Any): Unit = {
+    Seq(true, false).foreach { enableStateStoreCheckpointIds =>
+      val newTestName = s"$testName - with enableStateStoreCheckpointIds = " +
+        s"$enableStateStoreCheckpointIds"
+      test(newTestName, testTags: _*) { enableStateStoreCheckpointIds =>
+        testBody(enableStateStoreCheckpointIds)
+      }
+    }
   }
 
-  testWithColumnFamilies(
-    "RocksDB: check changelog and snapshot version",
-    TestWithChangelogCheckpointingEnabled) { colFamiliesEnabled =>
-    val remoteDir = Utils.createTempDir().toString
-    val conf = dbConf.copy(minDeltasForSnapshot = 1)
-    new File(remoteDir).delete() // to make sure that the directory gets created
-    for (version <- 0 to 49) {
-      withDB(remoteDir, version = version, conf = conf,
-        useColumnFamilies = colFamiliesEnabled) { db =>
-        db.put(version.toString, version.toString)
-        db.commit()
-        if ((version + 1) % 5 == 0) db.doMaintenance()
+  def testWithStateStoreCheckpointIdsAndChangelogEnabled(
+    testName: String,
+    testTags: Tag*)
+    (testBody: Boolean => Any): Unit = {
+    Seq(true, false).foreach { enableStateStoreCheckpointIds =>
+      val newTestName = s"$testName - with enableStateStoreCheckpointIds = " +
+        s"$enableStateStoreCheckpointIds"
+      testWithChangelogCheckpointingDisabled(newTestName, testTags: _*) {
+        enableStateStoreCheckpointIds => testBody(enableStateStoreCheckpointIds)
       }
     }
+  }
+}
 
-    if (isChangelogCheckpointingEnabled) {
-      assert(changelogVersionsPresent(remoteDir) === (1 to 50))
-      assert(snapshotVersionsPresent(remoteDir) === Range.inclusive(5, 50, 5))
-    } else {
-      assert(changelogVersionsPresent(remoteDir) === Seq.empty)
-      assert(snapshotVersionsPresent(remoteDir) === (1 to 50))
+class OpenNumCountedTestInputStream(in: InputStream) extends FSDataInputStream(in) {
+  import OpenNumCountedTestInputStream._
+
+  addOpenStreams(this)
+
+  override def close(): Unit = {
+    removeOpenStream(this)
+    super.close()
+  }
+}
+
+class OpenStreamCountedTestFileManager(path: Path, hadoopConf: Configuration)
+  extends FileContextBasedCheckpointFileManager(path, hadoopConf) {
+
+  override def open(path: Path): FSDataInputStream = {
+    val stream = new OpenNumCountedTestInputStream(super.open(path))
+    stream
+  }
+}
+
+object OpenNumCountedTestInputStream extends Logging {
+  private val openStreams = mutable.Map.empty[FSDataInputStream, Throwable]
+
+  def addOpenStreams(stream: FSDataInputStream): Unit = openStreams.synchronized {
+    openStreams.put(stream, new Throwable())
+  }
+
+  def removeOpenStream(stream: FSDataInputStream): Unit = openStreams.synchronized {
+    openStreams.remove(stream)
+  }
+
+  def clearOpenStreams(): Unit = openStreams.synchronized {
+    openStreams.clear()
+  }
+
+  def assertNoOpenStreams(): Unit = openStreams.synchronized {
+    val numOpen = openStreams.values.size
+    if (numOpen > 0) {
+      for (exc <- openStreams.values) {
+        logWarning("Leaked filesystem connection created at:")
+        exc.printStackTrace()
+      }
+      throw new IllegalStateException(s"There are $numOpen possibly leaked file streams.",
+        openStreams.values.head)
     }
   }
+}
 
-  testWithColumnFamilies(s"RocksDB: load version that doesn't exist",
-    TestWithBothChangelogCheckpointingEnabledAndDisabled) {
-    colFamiliesEnabled =>
-    val provider = new RocksDBStateStoreProvider()
-    var ex = intercept[SparkException] {
-      provider.getStore(-1)
+class RocksDBStateEncoderSuite extends SparkFunSuite {
+
+  // Helper method to create test schemas
+  private def createTestSchemas() = {
+    val keySchema = StructType(Seq(
+      StructField("k1", IntegerType),
+      StructField("k2", LongType),
+      StructField("k3", DoubleType)
+    ))
+    val valueSchema = StructType(Seq(
+      StructField("v1", StringType),
+      StructField("v2", BooleanType)
+    ))
+    (keySchema, valueSchema)
+  }
+
+  // Create encoders for different key encoding strategies
+  private def createTestEncoder(keyStateEncoderSpec: KeyStateEncoderSpec): RocksDBDataEncoder = {
+    val (keySchema, valueSchema) = createTestSchemas()
+    val stateSchemaInfo = Some(StateSchemaInfo(
+      keySchemaId = 0,
+      valueSchemaId = 0
+    ))
+    new AvroStateEncoder(keyStateEncoderSpec, valueSchema, stateSchemaInfo)
+  }
+
+  private def createNoPrefixKeyEncoder(): RocksDBDataEncoder = {
+    val (keySchema, _) = createTestSchemas()
+    createTestEncoder(NoPrefixKeyStateEncoderSpec(keySchema))
+  }
+
+  private def createPrefixKeyScanEncoder(): RocksDBDataEncoder = {
+    val (keySchema, _) = createTestSchemas()
+    createTestEncoder(PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey = 2))
+  }
+
+  private def createRangeKeyScanEncoder(): RocksDBDataEncoder = {
+    val (keySchema, _) = createTestSchemas()
+    createTestEncoder(RangeKeyScanStateEncoderSpec(keySchema, orderingOrdinals = Seq(0, 1)))
+  }
+
+  test("verify schema ID handling in prefix and range scan key encoding") {
+    val keySchema = StructType(Seq(
+      StructField("k1", IntegerType),
+      StructField("k2", LongType),
+      StructField("k3", DoubleType)
+    ))
+    val valueSchema = StructType(Seq(
+      StructField("v1", StringType)
+    ))
+
+    // Create test row with some data
+    val keyProj = UnsafeProjection.create(keySchema)
+    val fullKeyRow = keyProj.apply(InternalRow(42, 123L, 3.14))
+
+    // Test prefix scan encoding with schema evolution
+    withClue("Testing prefix scan encoding: ") {
+      val prefixKeySpec = PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey = 2)
+      val stateSchemaInfo = Some(StateSchemaInfo(keySchemaId = 42, valueSchemaId = 0))
+      val encoder = new AvroStateEncoder(prefixKeySpec, valueSchema, stateSchemaInfo)
+
+      // Then encode just the remaining key portion (which should include schema ID)
+      val remainingKeyRow = keyProj.apply(InternalRow(null, null, 3.14))
+      val encodedRemainingKey = encoder.encodeRemainingKey(remainingKeyRow)
+
+      // Verify schema ID in remaining key bytes
+      val decodedSchemaIdRow = encoder.decodeStateSchemaIdRow(encodedRemainingKey)
+      assert(decodedSchemaIdRow.schemaId === 42,
+        "Schema ID not preserved in prefix scan remaining key encoding")
+    }
+
+    // Test range scan encoding with schema evolution
+    withClue("Testing range scan encoding: ") {
+      val rangeScanSpec = RangeKeyScanStateEncoderSpec(keySchema, orderingOrdinals = Seq(0, 1))
+      val stateSchemaInfo = Some(StateSchemaInfo(keySchemaId = 24, valueSchemaId = 0))
+      val encoder = new AvroStateEncoder(rangeScanSpec, valueSchema, stateSchemaInfo)
+
+      // Encode remaining key (non-ordering columns)
+      // For range scan, the remaining key schema only contains columns NOT in orderingOrdinals
+      val remainingKeySchema = StructType(Seq(
+        StructField("k3", DoubleType)  // Only the non-ordering column
+      ))
+      val remainingKeyProj = UnsafeProjection.create(remainingKeySchema)
+      val remainingKeyRow = remainingKeyProj.apply(InternalRow(3.14))
+      val encodedRemainingKey = encoder.encodeRemainingKey(remainingKeyRow)
+
+      // Verify schema ID in remaining key bytes
+      val decodedSchemaIdRow = encoder.decodeStateSchemaIdRow(encodedRemainingKey)
+      assert(decodedSchemaIdRow.schemaId === 24,
+        "Schema ID not preserved in range scan remaining key encoding")
+
+      // Verify we can decode the remaining key correctly
+      // The decoded row should only have the non-ordering column (k3)
+      val decodedRemainingKey = encoder.decodeRemainingKey(encodedRemainingKey)
+      assert(decodedRemainingKey.getDouble(0) === 3.14,
+        "Data not preserved in range scan remaining key encoding")
+
+      // Test the range scan key portion (ordering columns)
+      val rangeScanKeySchema = StructType(Seq(
+        StructField("k1", IntegerType),
+        StructField("k2", LongType)
+      ))
+      val rangeScanProj = UnsafeProjection.create(rangeScanKeySchema)
+      val rangeScanRow = rangeScanProj.apply(InternalRow(42, 123L))
+      val encodedRangeScan = encoder.encodePrefixKeyForRangeScan(rangeScanRow)
+
+      // Range scan portion should not have schema ID since it uses special encoding
+      val decodedRangeScan = encoder.decodePrefixKeyForRangeScan(encodedRangeScan)
+      assert(decodedRangeScan.getInt(0) === 42)
+      assert(decodedRangeScan.getLong(1) === 123L)
     }
-    checkError(
-      ex,
-      condition = "CANNOT_LOAD_STATE_STORE.UNEXPECTED_VERSION",
-      parameters = Map("version" -> "-1")
+  }
+
+  test("verify schema ID preservation through encode/decode cycle") {
+    val encoders = Seq(
+      ("NoPrefixKey", createNoPrefixKeyEncoder()),
+      ("PrefixKeyScan", createPrefixKeyScanEncoder()),
+      ("RangeKeyScan", createRangeKeyScanEncoder())
     )
-    ex = intercept[SparkException] {
-      provider.getReadStore(-1)
-    }
-    checkError(
-      ex,
-      condition = "CANNOT_LOAD_STATE_STORE.UNEXPECTED_VERSION",
-      parameters = Map("version" -> "-1")
+
+    // Test a range of schema IDs including edge cases
+    val testSchemaIds = Seq[Short](
+      0, // Min value
+      1, // Common case
+      42, // Arbitrary value
+      -1, // Negative value
+      Short.MaxValue, // Max positive
+      Short.MinValue // Max negative
     )
 
-    val remoteDir = Utils.createTempDir().toString
-    new File(remoteDir).delete() // to make sure that the directory gets created
-    withDB(remoteDir, useColumnFamilies = colFamiliesEnabled) { db =>
-      ex = intercept[SparkException] {
-        db.load(1)
+    encoders.foreach { case (encoderType, encoder) =>
+      testSchemaIds.foreach { schemaId =>
+        withClue(s"Testing $encoderType encoder with schema ID $schemaId: ") {
+          val testData = Array[Byte](1, 2, 3, 4)
+          val schemaIdRow = StateSchemaIdRow(schemaId, testData)
+
+          // Encode the row
+          val encoded = encoder.encodeWithStateSchemaId(schemaIdRow)
+
+          // Verify schema ID directly in encoded bytes
+          val encodedSchemaId = Platform.getShort(encoded, Platform.BYTE_ARRAY_OFFSET)
+          assert(encodedSchemaId === schemaId,
+            s"Schema ID mismatch in encoded bytes: expected $schemaId but got $encodedSchemaId")
+
+          // Decode and verify
+          val decoded = encoder.decodeStateSchemaIdRow(encoded)
+          assert(decoded.schemaId === schemaId,
+            s"Schema ID mismatch after decode: expected $schemaId but got ${decoded.schemaId}")
+
+          // Also verify data wasn't corrupted
+          assert(decoded.bytes === testData,
+            "Data corruption detected in encode/decode cycle")
+        }
       }
-      checkError(
-        ex,
-        condition = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_STREAMING_STATE_FILE",
-        parameters = Map(
-          "fileToRead" -> s"$remoteDir/1.changelog"
-        )
-      )
     }
   }
 
-  testWithColumnFamilies(
-    "RocksDB: purge changelog and snapshots with minVersionsToDelete = 0",
-    TestWithChangelogCheckpointingEnabled) { colFamiliesEnabled =>
-    val remoteDir = Utils.createTempDir().toString
-    new File(remoteDir).delete() // to make sure that the directory gets created
-    val conf = dbConf.copy(enableChangelogCheckpointing = true,
-      minVersionsToRetain = 3, minDeltasForSnapshot = 1, minVersionsToDelete = 0)
-    withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled) { db =>
-      db.load(0)
-      db.commit()
-      for (version <- 1 to 2) {
-        db.load(version)
-        db.commit()
-        db.doMaintenance()
-      }
-      assert(snapshotVersionsPresent(remoteDir) === Seq(2, 3))
-      assert(changelogVersionsPresent(remoteDir) == Seq(1, 2, 3))
+  test("verify schema ID handling in single value encoder") {
+    val keySchema = StructType(Seq(
+      StructField("k1", IntegerType)
+    ))
+    val valueSchema = StructType(Seq(
+      StructField("v1", StringType),
+      StructField("v2", IntegerType),
+      StructField("v3", BooleanType)
+    ))
 
-      for (version <- 3 to 4) {
-        db.load(version)
-        db.commit()
-      }
-      assert(snapshotVersionsPresent(remoteDir) === Seq(2, 3))
-      assert(changelogVersionsPresent(remoteDir) == (1 to 5))
-      db.doMaintenance()
-      // 3 is the latest snapshot <= maxSnapshotVersionPresent - minVersionsToRetain + 1
-      assert(snapshotVersionsPresent(remoteDir) === Seq(3, 5))
-      assert(changelogVersionsPresent(remoteDir) == (3 to 5))
+    val valueProj = UnsafeProjection.create(valueSchema)
+    val value = valueProj.apply(InternalRow(UTF8String.fromString("hello"), 42, true))
 
-      for (version <- 5 to 7) {
-        db.load(version)
-        db.commit()
-      }
-      assert(snapshotVersionsPresent(remoteDir) === Seq(3, 5))
-      assert(changelogVersionsPresent(remoteDir) == (3 to 8))
-      db.doMaintenance()
-      // 5 is the latest snapshot <= maxSnapshotVersionPresent - minVersionsToRetain + 1
-      assert(snapshotVersionsPresent(remoteDir) === Seq(5, 8))
-      assert(changelogVersionsPresent(remoteDir) == (5 to 8))
+    withClue("Testing single value encoder: ") {
+      val keySpec = NoPrefixKeyStateEncoderSpec(keySchema)
+      val stateSchemaInfo = Some(StateSchemaInfo(keySchemaId = 0, valueSchemaId = 42))
+      val avroEncoder = new AvroStateEncoder(keySpec, valueSchema, stateSchemaInfo)
+      val valueEncoder = new SingleValueStateEncoder(avroEncoder, valueSchema)
+
+      // Encode value
+      val encodedValue = valueEncoder.encodeValue(value)
+
+      // Verify schema ID was included and preserved
+      val decodedSchemaIdRow = avroEncoder.decodeStateSchemaIdRow(encodedValue)
+      assert(decodedSchemaIdRow.schemaId === 42,
+        "Schema ID not preserved in single value encoding")
+
+      // Verify value was preserved
+      val decodedValue = valueEncoder.decodeValue(encodedValue)
+      assert(decodedValue.getString(0) === "hello")
+      assert(decodedValue.getInt(1) === 42)
+      assert(decodedValue.getBoolean(2) === true)
     }
   }
+}
 
-  testWithColumnFamilies(
-    "RocksDB: purge version files with minVersionsToDelete > 0",
-    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
-    val remoteDir = Utils.createTempDir().toString
-    new File(remoteDir).delete() // to make sure that the directory gets created
-    val conf = dbConf.copy(
-      minVersionsToRetain = 3, minDeltasForSnapshot = 1, minVersionsToDelete = 3)
-    withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled) { db =>
-      // Commit 5 versions
-      // stale versions: (1, 2)
-      // keep versions: (3, 4, 5)
-      for (version <- 0 to 4) {
-        // Should upload latest snapshot but not delete any files
-        // since number of stale versions < minVersionsToDelete
-        db.load(version)
-        db.commit()
-        db.doMaintenance()
-      }
+@SlowSQLTest
+class RocksDBSuite extends AlsoTestWithRocksDBFeatures with SharedSparkSession
+    with PrivateMethodTester {
 
-      // Commit 1 more version
-      // stale versions: (1, 2, 3)
-      // keep versions: (4, 5, 6)
-      db.load(5)
-      db.commit()
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf
+      .set(SQLConf.STATE_STORE_PROVIDER_CLASS, classOf[RocksDBStateStoreProvider].getName)
+  }
+
+  // In each test we verify opened streams are all closed
+  private def hadoopConf: Configuration = {
+    val fmClass = "org.apache.spark.sql.execution.streaming.state." +
+      "OpenStreamCountedTestFileManager"
+    val hadoopConf = new Configuration()
+    hadoopConf.set(STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key, fmClass)
+    hadoopConf
+  }
+
+  override def beforeEach(): Unit = {
+    OpenNumCountedTestInputStream.clearOpenStreams()
+  }
+
+  override def afterEach(): Unit = {
+    eventually(timeout(10.seconds), interval(2.seconds)) {
+      OpenNumCountedTestInputStream.assertNoOpenStreams()
+    }
+  }
+
+  testWithStateStoreCheckpointIdsAndColumnFamilies("RocksDB: check changelog and snapshot version",
+      TestWithChangelogCheckpointingEnabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
+      val remoteDir = Utils.createTempDir().toString
+      val conf = dbConf.copy(minDeltasForSnapshot = 1)
+      new File(remoteDir).delete() // to make sure that the directory gets created
+      val versionToUniqueId = new mutable.HashMap[Long, String]()
+      withDB(remoteDir, conf = conf,
+        useColumnFamilies = colFamiliesEnabled,
+        enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+        versionToUniqueId = versionToUniqueId) { db =>
+        for (version <- 0 to 49) {
+          db.load(version, versionToUniqueId.get(version))
+          db.put(version.toString, version.toString)
+          db.commit()
+          if ((version + 1) % 5 == 0) db.doMaintenance()
+        }
+      }
 
-      // Checkpoint directory before maintenance
       if (isChangelogCheckpointingEnabled) {
-        assert(snapshotVersionsPresent(remoteDir) == (1 to 5))
-        assert(changelogVersionsPresent(remoteDir) == (1 to 6))
+        assert(changelogVersionsPresent(remoteDir) === (1 to 50))
+        assert(snapshotVersionsPresent(remoteDir) === Range.inclusive(5, 50, 5))
       } else {
-        assert(snapshotVersionsPresent(remoteDir) == (1 to 6))
+        assert(changelogVersionsPresent(remoteDir) === Seq.empty)
+        assert(snapshotVersionsPresent(remoteDir) === (1 to 50))
       }
+  }
 
-      // Should delete stale versions for zip files and change log files
-      // since number of stale versions >= minVersionsToDelete
-      db.doMaintenance()
+  testWithStateStoreCheckpointIdsAndColumnFamilies(s"RocksDB: load version that doesn't exist",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
+      val provider = new RocksDBStateStoreProvider()
+      var ex = intercept[SparkException] {
+        provider.getStore(-1)
+      }
+      checkError(
+        ex,
+        condition = "CANNOT_LOAD_STATE_STORE.UNEXPECTED_VERSION",
+        parameters = Map("version" -> "-1")
+      )
+      ex = intercept[SparkException] {
+        provider.getReadStore(-1)
+      }
+      checkError(
+        ex,
+        condition = "CANNOT_LOAD_STATE_STORE.UNEXPECTED_VERSION",
+        parameters = Map("version" -> "-1")
+      )
 
-      // Checkpoint directory after maintenance
-      assert(snapshotVersionsPresent(remoteDir) == Seq(4, 5, 6))
-      if (isChangelogCheckpointingEnabled) {
-        assert(changelogVersionsPresent(remoteDir) == Seq(4, 5, 6))
+      val remoteDir = Utils.createTempDir().toString
+      new File(remoteDir).delete() // to make sure that the directory gets created
+      val versionToUniqueId = new mutable.HashMap[Long, String]()
+      withDB(remoteDir, useColumnFamilies = colFamiliesEnabled,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
+        ex = intercept[SparkException] {
+          db.load(1, versionToUniqueId.get(1))
+        }
+        checkError(
+          ex,
+          condition = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_STREAMING_STATE_FILE",
+          parameters = Map(
+            "fileToRead" -> s"$remoteDir/1.changelog"
+          )
+        )
       }
-    }
   }
 
-  testWithColumnFamilies(
-    "RocksDB: minDeltasForSnapshot",
-    TestWithChangelogCheckpointingEnabled) { colFamiliesEnabled =>
-    val remoteDir = Utils.createTempDir().toString
-    new File(remoteDir).delete() // to make sure that the directory gets created
-    val conf = dbConf.copy(enableChangelogCheckpointing = true, minDeltasForSnapshot = 3)
-    withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled) { db =>
-      for (version <- 0 to 1) {
-        db.load(version)
+  testWithStateStoreCheckpointIdsAndColumnFamilies(
+      "RocksDB: purge changelog and snapshots with minVersionsToDelete = 0",
+    TestWithChangelogCheckpointingEnabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
+      val remoteDir = Utils.createTempDir().toString
+      new File(remoteDir).delete() // to make sure that the directory gets created
+      val conf = dbConf.copy(enableChangelogCheckpointing = true,
+        minVersionsToRetain = 3, minDeltasForSnapshot = 1, minVersionsToDelete = 0)
+      withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds) { db =>
+        db.load(0)
         db.commit()
+        for (version <- 1 to 2) {
+          db.load(version)
+          db.commit()
+          db.doMaintenance()
+        }
+        assert(snapshotVersionsPresent(remoteDir) === Seq(2, 3))
+        assert(changelogVersionsPresent(remoteDir) == Seq(1, 2, 3))
+
+        for (version <- 3 to 4) {
+          db.load(version)
+          db.commit()
+        }
+        assert(snapshotVersionsPresent(remoteDir) === Seq(2, 3))
+        assert(changelogVersionsPresent(remoteDir) == (1 to 5))
         db.doMaintenance()
+        // 3 is the latest snapshot <= maxSnapshotVersionPresent - minVersionsToRetain + 1
+        assert(snapshotVersionsPresent(remoteDir) === Seq(3, 5))
+        assert(changelogVersionsPresent(remoteDir) == (3 to 5))
+
+        for (version <- 5 to 7) {
+          db.load(version)
+          db.commit()
+        }
+        assert(snapshotVersionsPresent(remoteDir) === Seq(3, 5))
+        assert(changelogVersionsPresent(remoteDir) == (3 to 8))
+        db.doMaintenance()
+        // 5 is the latest snapshot <= maxSnapshotVersionPresent - minVersionsToRetain + 1
+        assert(snapshotVersionsPresent(remoteDir) === Seq(5, 8))
+        assert(changelogVersionsPresent(remoteDir) == (5 to 8))
       }
-      // Snapshot should not be created because minDeltasForSnapshot = 3
-      assert(snapshotVersionsPresent(remoteDir) === Seq.empty)
-      assert(changelogVersionsPresent(remoteDir) == Seq(1, 2))
-      db.load(2)
-      db.commit()
-      db.doMaintenance()
-      assert(snapshotVersionsPresent(remoteDir) === Seq(3))
-      db.load(3)
+  }
+
+  testWithStateStoreCheckpointIdsAndColumnFamilies(
+    "RocksDB: purge version files with minVersionsToDelete > 0",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
+      val remoteDir = Utils.createTempDir().toString
+      new File(remoteDir).delete() // to make sure that the directory gets created
+      val conf = dbConf.copy(
+        minVersionsToRetain = 3, minDeltasForSnapshot = 1, minVersionsToDelete = 3)
+      withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled,
+        enableStateStoreCheckpointIds = enableStateStoreCheckpointIds) { db =>
+        // Commit 5 versions
+        // stale versions: (1, 2)
+        // keep versions: (3, 4, 5)
+        for (version <- 0 to 4) {
+          // Should upload latest snapshot but not delete any files
+          // since number of stale versions < minVersionsToDelete
+          db.load(version)
+          db.commit()
+          db.doMaintenance()
+        }
 
-      for (version <- 3 to 7) {
-        db.load(version)
+        // Commit 1 more version
+        // stale versions: (1, 2, 3)
+        // keep versions: (4, 5, 6)
+        db.load(5)
         db.commit()
+
+        // Checkpoint directory before maintenance
+        if (isChangelogCheckpointingEnabled) {
+          assert(snapshotVersionsPresent(remoteDir) == (1 to 5))
+          assert(changelogVersionsPresent(remoteDir) == (1 to 6))
+        } else {
+          assert(snapshotVersionsPresent(remoteDir) == (1 to 6))
+        }
+
+        // Should delete stale versions for zip files and change log files
+        // since number of stale versions >= minVersionsToDelete
         db.doMaintenance()
+
+        // Checkpoint directory after maintenance
+        assert(snapshotVersionsPresent(remoteDir) == Seq(4, 5, 6))
+        if (isChangelogCheckpointingEnabled) {
+          assert(changelogVersionsPresent(remoteDir) == Seq(4, 5, 6))
+        }
       }
-      assert(snapshotVersionsPresent(remoteDir) === Seq(3, 6))
-      for (version <- 8 to 17) {
-        db.load(version)
+  }
+
+  testWithStateStoreCheckpointIdsAndColumnFamilies(
+    "RocksDB: minDeltasForSnapshot",
+    TestWithChangelogCheckpointingEnabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
+      val remoteDir = Utils.createTempDir().toString
+      new File(remoteDir).delete() // to make sure that the directory gets created
+      val conf = dbConf.copy(enableChangelogCheckpointing = true, minDeltasForSnapshot = 3)
+      val versionToUniqueId = new mutable.HashMap[Long, String]()
+      withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
+        for (version <- 0 to 1) {
+          db.load(version, versionToUniqueId.get(version))
+          db.commit()
+          db.doMaintenance()
+        }
+        // Snapshot should not be created because minDeltasForSnapshot = 3
+        assert(snapshotVersionsPresent(remoteDir) === Seq.empty)
+        assert(changelogVersionsPresent(remoteDir) == Seq(1, 2))
+        db.load(2, versionToUniqueId.get(2))
         db.commit()
-      }
-      db.doMaintenance()
-      assert(snapshotVersionsPresent(remoteDir) === Seq(3, 6, 18))
-    }
+        db.doMaintenance()
+        assert(snapshotVersionsPresent(remoteDir) === Seq(3))
+        db.load(3, versionToUniqueId.get(3))
 
-    // pick up from the last snapshot and the next upload will be for version 21
-    withDB(remoteDir, conf = conf) { db =>
-      db.load(18)
-      db.commit()
-      db.doMaintenance()
-      assert(snapshotVersionsPresent(remoteDir) === Seq(3, 6, 18))
+        for (version <- 3 to 7) {
+          db.load(version, versionToUniqueId.get(version))
+          db.commit()
+          db.doMaintenance()
+        }
+        assert(snapshotVersionsPresent(remoteDir) === Seq(3, 6))
+        for (version <- 8 to 17) {
+          db.load(version, versionToUniqueId.get(version))
+          db.commit()
+        }
+        db.doMaintenance()
+        assert(snapshotVersionsPresent(remoteDir) === Seq(3, 6, 18))
+      }
 
-      for (version <- 19 to 20) {
-        db.load(version)
+      // pick up from the last snapshot and the next upload will be for version 21
+      withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
+        db.load(18, versionToUniqueId.get(18))
         db.commit()
+        db.doMaintenance()
+        assert(snapshotVersionsPresent(remoteDir) === Seq(3, 6, 18))
+
+        for (version <- 19 to 20) {
+          db.load(version, versionToUniqueId.get(version))
+          db.commit()
+        }
+        db.doMaintenance()
+        assert(snapshotVersionsPresent(remoteDir) === Seq(3, 6, 18, 21))
       }
-      db.doMaintenance()
-      assert(snapshotVersionsPresent(remoteDir) === Seq(3, 6, 18, 21))
-    }
   }
 
-  testWithColumnFamilies("SPARK-45419: Do not reuse SST files" +
+  testWithStateStoreCheckpointIdsAndColumnFamilies("SPARK-45419: Do not reuse SST files" +
     " in different RocksDB instances",
-    TestWithChangelogCheckpointingEnabled) { colFamiliesEnabled =>
-    val remoteDir = Utils.createTempDir().toString
-    val conf = dbConf.copy(minDeltasForSnapshot = 0, compactOnCommit = false)
-    new File(remoteDir).delete() // to make sure that the directory gets created
-    withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled) { db =>
-      for (version <- 0 to 2) {
-        db.load(version)
-        db.put(version.toString, version.toString)
-        db.commit()
-      }
-      // upload snapshot 3.zip
-      db.doMaintenance()
-      // Roll back to version 1 and start to process data.
-      for (version <- 1 to 3) {
-        db.load(version)
-        db.put(version.toString, version.toString)
-        db.commit()
+    TestWithChangelogCheckpointingEnabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
+      val remoteDir = Utils.createTempDir().toString
+      val conf = dbConf.copy(minDeltasForSnapshot = 0, compactOnCommit = false)
+      new File(remoteDir).delete() // to make sure that the directory gets created
+      val versionToUniqueId = new mutable.HashMap[Long, String]()
+      withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
+        for (version <- 0 to 2) {
+          db.load(version, versionToUniqueId.get(version))
+          db.put(version.toString, version.toString)
+          db.commit()
+        }
+        // upload snapshot 3.zip
+        db.doMaintenance()
+        // Roll back to version 1 and start to process data.
+        for (version <- 1 to 3) {
+          db.load(version, versionToUniqueId.get(version))
+          db.put(version.toString, version.toString)
+          db.commit()
+        }
+        // Upload snapshot 4.zip, should not reuse the SST files in 3.zip
+        db.doMaintenance()
       }
-      // Upload snapshot 4.zip, should not reuse the SST files in 3.zip
-      db.doMaintenance()
-    }
 
-    withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled) { db =>
-      // Open the db to verify that the state in 4.zip is no corrupted.
-      db.load(4)
-    }
+      withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
+        // Open the db to verify that the state in 4.zip is no corrupted.
+        db.load(4, versionToUniqueId.get(4))
+      }
   }
 
   // A rocksdb instance with changelog checkpointing enabled should be able to load
   // an existing checkpoint without changelog.
-  testWithColumnFamilies(
+  testWithStateStoreCheckpointIdsAndColumnFamilies(
     "RocksDB: changelog checkpointing backward compatibility",
-    TestWithChangelogCheckpointingEnabled) { colFamiliesEnabled =>
+    TestWithChangelogCheckpointingEnabled) { (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
     val remoteDir = Utils.createTempDir().toString
     new File(remoteDir).delete() // to make sure that the directory gets created
     val disableChangelogCheckpointingConf =
       dbConf.copy(enableChangelogCheckpointing = false, minVersionsToRetain = 30)
+    val versionToUniqueId = new mutable.HashMap[Long, String]()
     withDB(remoteDir, conf = disableChangelogCheckpointingConf,
-      useColumnFamilies = colFamiliesEnabled) { db =>
+      useColumnFamilies = colFamiliesEnabled,
+      enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+      versionToUniqueId = versionToUniqueId) { db =>
       for (version <- 1 to 30) {
-        db.load(version - 1)
+        db.load(version - 1, versionToUniqueId.get(version - 1))
         db.put(version.toString, version.toString)
         db.remove((version - 1).toString)
         db.commit()
@@ -427,13 +822,15 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       dbConf.copy(enableChangelogCheckpointing = true, minVersionsToRetain = 30,
         minDeltasForSnapshot = 1)
     withDB(remoteDir, conf = enableChangelogCheckpointingConf,
-      useColumnFamilies = colFamiliesEnabled) { db =>
+      useColumnFamilies = colFamiliesEnabled,
+      enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+      versionToUniqueId = versionToUniqueId) { db =>
       for (version <- 1 to 30) {
-        db.load(version)
+        db.load(version, versionToUniqueId.get(version))
         assert(db.iterator().map(toStr).toSet === Set((version.toString, version.toString)))
       }
       for (version <- 30 to 60) {
-        db.load(version - 1)
+        db.load(version - 1, versionToUniqueId.get(version - 1))
         db.put(version.toString, version.toString)
         db.remove((version - 1).toString)
         db.commit()
@@ -441,13 +838,13 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       assert(snapshotVersionsPresent(remoteDir) === (1 to 30))
       assert(changelogVersionsPresent(remoteDir) === (30 to 60))
       for (version <- 1 to 60) {
-        db.load(version, readOnly = true)
+        db.load(version, versionToUniqueId.get(version), readOnly = true)
         assert(db.iterator().map(toStr).toSet === Set((version.toString, version.toString)))
       }
 
       // recommit 60 to ensure that acquireLock is released for maintenance
       for (version <- 60 to 60) {
-        db.load(version - 1)
+        db.load(version - 1, versionToUniqueId.get(version - 1))
         db.put(version.toString, version.toString)
         db.remove((version - 1).toString)
         db.commit()
@@ -455,29 +852,142 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       // Check that snapshots and changelogs get purged correctly.
       db.doMaintenance()
       assert(snapshotVersionsPresent(remoteDir) === Seq(30, 60))
-      assert(changelogVersionsPresent(remoteDir) === (30 to 60))
+      if (enableStateStoreCheckpointIds) {
+        // recommit version 60 creates another changelog file with different unique id
+        assert(changelogVersionsPresent(remoteDir) === (30 to 60) :+ 60)
+      } else {
+        assert(changelogVersionsPresent(remoteDir) === (30 to 60))
+      }
+
       // Verify the content of retained versions.
       for (version <- 30 to 60) {
-        db.load(version, readOnly = true)
+        db.load(version, versionToUniqueId.get(version), readOnly = true)
         assert(db.iterator().map(toStr).toSet === Set((version.toString, version.toString)))
       }
     }
   }
 
+  testWithChangelogCheckpointingEnabled("RocksDB Fault Tolerance: correctly handle when there " +
+    "are multiple snapshot files for the same version") {
+    val enableStateStoreCheckpointIds = true
+    val useColumnFamily = true
+    val remoteDir = Utils.createTempDir().toString
+    new File(remoteDir).delete() // to make sure that the directory gets created
+    val enableChangelogCheckpointingConf =
+      dbConf.copy(enableChangelogCheckpointing = true, minVersionsToRetain = 20,
+        minDeltasForSnapshot = 3)
+
+    // Simulate when there are multiple snapshot files for the same version
+    // The first DB writes to version 0 with uniqueId
+    val versionToUniqueId1 = new mutable.HashMap[Long, String]()
+    withDB(remoteDir, conf = enableChangelogCheckpointingConf,
+      useColumnFamilies = useColumnFamily,
+      enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+      versionToUniqueId = versionToUniqueId1) { db =>
+      db.load(0, versionToUniqueId1.get(0))
+      db.put("a", "1") // write key a here
+      db.commit()
+
+      // Add some change log files after the snapshot
+      for (version <- 2 to 5) {
+        db.load(version - 1, versionToUniqueId1.get(version - 1))
+        db.put(version.toString, version.toString) // update "1" -> "1", "2" -> "2", ...
+        db.commit()
+      }
+
+      // doMaintenance uploads the snapshot
+      db.doMaintenance()
+
+      for (version <- 6 to 10) {
+        db.load(version - 1, versionToUniqueId1.get(version - 1))
+        db.put(version.toString, version.toString)
+        db.commit()
+      }
+    }
+
+    // versionToUniqueId1 should be non-empty, meaning the id is updated from rocksDB to the map
+    assert(versionToUniqueId1.nonEmpty)
+
+    // The second DB writes to version 0 with another uniqueId
+    val versionToUniqueId2 = new mutable.HashMap[Long, String]()
+    withDB(remoteDir, conf = enableChangelogCheckpointingConf,
+      useColumnFamilies = useColumnFamily,
+      enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+      versionToUniqueId = versionToUniqueId2) { db =>
+      db.load(0, versionToUniqueId2.get(0))
+      db.put("b", "2") // write key b here
+      db.commit()
+      // Add some change log files after the snapshot
+      for (version <- 2 to 5) {
+        db.load(version - 1, versionToUniqueId2.get(version - 1))
+        db.put(version.toString, (version + 1).toString) // update "1" -> "2", "2" -> "3", ...
+        db.commit()
+      }
+
+      // doMaintenance uploads the snapshot
+      db.doMaintenance()
+
+      for (version <- 6 to 10) {
+        db.load(version - 1, versionToUniqueId2.get(version - 1))
+        db.put(version.toString, (version + 1).toString)
+        db.commit()
+      }
+    }
+
+    // versionToUniqueId2 should be non-empty, meaning the id is updated from rocksDB to the map
+    assert(versionToUniqueId2.nonEmpty)
+
+    // During a load() with linage from the first rocksDB,
+    // the DB should load with data in the first db
+    withDB(remoteDir, conf = enableChangelogCheckpointingConf,
+      useColumnFamilies = useColumnFamily,
+      enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+      versionToUniqueId = versionToUniqueId1) { db =>
+      db.load(10, versionToUniqueId1.get(10))
+      assert(toStr(db.get("a")) === "1")
+      for (version <- 2 to 10) {
+        // first time we write version -> version
+        // second time we write version -> version + 1
+        // here since we are loading from the first db lineage, we should see version -> version
+        assert(toStr(db.get(version.toString)) === version.toString)
+      }
+    }
+
+    // During a load() with linage from the second rocksDB,
+    // the DB should load with data in the second db
+    withDB(remoteDir, conf = enableChangelogCheckpointingConf,
+      useColumnFamilies = useColumnFamily,
+      enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+      versionToUniqueId = versionToUniqueId2) { db =>
+      db.load(10, versionToUniqueId2.get(10))
+      assert(toStr(db.get("b")) === "2")
+      for (version <- 2 to 10) {
+        // first time we write version -> version
+        // second time we write version -> version + 1
+        // here since we are loading from the second db lineage,
+        // we should see version -> version + 1
+        assert(toStr(db.get(version.toString)) === (version + 1).toString)
+      }
+    }
+  }
+
   // A rocksdb instance with changelog checkpointing disabled should be able to load
   // an existing checkpoint with changelog.
-  testWithColumnFamilies(
+  testWithStateStoreCheckpointIdsAndColumnFamilies(
     "RocksDB: changelog checkpointing forward compatibility",
-    TestWithChangelogCheckpointingEnabled) { colFamiliesEnabled =>
+    TestWithChangelogCheckpointingEnabled) { (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
     val remoteDir = Utils.createTempDir().toString
     new File(remoteDir).delete() // to make sure that the directory gets created
     val enableChangelogCheckpointingConf =
       dbConf.copy(enableChangelogCheckpointing = true, minVersionsToRetain = 20,
         minDeltasForSnapshot = 3)
+    val versionToUniqueId = new mutable.HashMap[Long, String]()
     withDB(remoteDir, conf = enableChangelogCheckpointingConf,
-      useColumnFamilies = colFamiliesEnabled) { db =>
+      useColumnFamilies = colFamiliesEnabled,
+      enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+      versionToUniqueId = versionToUniqueId) { db =>
       for (version <- 1 to 30) {
-        db.load(version - 1)
+        db.load(version - 1, versionToUniqueId.get(version - 1))
         db.put(version.toString, version.toString)
         db.remove((version - 1).toString)
         db.commit()
@@ -487,16 +997,18 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     // Now disable changelog checkpointing in a checkpoint created by a state store
     // that enable changelog checkpointing.
     val disableChangelogCheckpointingConf =
-    dbConf.copy(enableChangelogCheckpointing = false, minVersionsToRetain = 20,
-      minDeltasForSnapshot = 1)
+      dbConf.copy(enableChangelogCheckpointing = false, minVersionsToRetain = 20,
+        minDeltasForSnapshot = 1)
     withDB(remoteDir, conf = disableChangelogCheckpointingConf,
-      useColumnFamilies = colFamiliesEnabled) { db =>
+      useColumnFamilies = colFamiliesEnabled,
+      enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+      versionToUniqueId = versionToUniqueId) { db =>
       for (version <- 1 to 30) {
-        db.load(version)
+        db.load(version, versionToUniqueId.get(version))
         assert(db.iterator().map(toStr).toSet === Set((version.toString, version.toString)))
       }
       for (version <- 31 to 60) {
-        db.load(version - 1)
+        db.load(version - 1, versionToUniqueId.get(version - 1))
         db.put(version.toString, version.toString)
         db.remove((version - 1).toString)
         db.commit()
@@ -504,7 +1016,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       assert(changelogVersionsPresent(remoteDir) === (1 to 30))
       assert(snapshotVersionsPresent(remoteDir) === (31 to 60))
       for (version <- 1 to 60) {
-        db.load(version, readOnly = true)
+        db.load(version, versionToUniqueId.get(version), readOnly = true)
         assert(db.iterator().map(toStr).toSet === Set((version.toString, version.toString)))
       }
       // Check that snapshots and changelogs get purged correctly.
@@ -513,7 +1025,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       assert(changelogVersionsPresent(remoteDir) === Seq.empty)
       // Verify the content of retained versions.
       for (version <- 41 to 60) {
-        db.load(version, readOnly = true)
+        db.load(version, versionToUniqueId.get(version), readOnly = true)
         assert(db.iterator().map(toStr).toSet === Set((version.toString, version.toString)))
       }
     }
@@ -535,152 +1047,172 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
-  testWithColumnFamilies(s"RocksDB: get, put, iterator, commit, load",
-    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
-    def testOps(compactOnCommit: Boolean): Unit = {
-      val remoteDir = Utils.createTempDir().toString
-      new File(remoteDir).delete() // to make sure that the directory gets created
+  testWithStateStoreCheckpointIdsAndColumnFamilies(s"RocksDB: get, put, iterator, commit, load",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
+      def testOps(compactOnCommit: Boolean): Unit = {
+        val remoteDir = Utils.createTempDir().toString
+        new File(remoteDir).delete() // to make sure that the directory gets created
 
-      val conf = RocksDBConf().copy(compactOnCommit = compactOnCommit)
-      withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled) { db =>
-        assert(db.get("a") === null)
-        assert(iterator(db).isEmpty)
+        val conf = RocksDBConf().copy(compactOnCommit = compactOnCommit)
+        val versionToUniqueId = new mutable.HashMap[Long, String]()
+        withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
+          assert(db.get("a") === null)
+          assert(iterator(db).isEmpty)
 
-        db.put("a", "1")
-        assert(toStr(db.get("a")) === "1")
-        db.commit()
-      }
+          db.put("a", "1")
+          assert(toStr(db.get("a")) === "1")
+          db.commit()
+        }
 
-      withDB(remoteDir, conf = conf, version = 0, useColumnFamilies = colFamiliesEnabled) { db =>
-        // version 0 can be loaded again
-        assert(toStr(db.get("a")) === null)
-        assert(iterator(db).isEmpty)
-      }
+        withDB(remoteDir, conf = conf, version = 0, useColumnFamilies = colFamiliesEnabled,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
+          // version 0 can be loaded again
+          assert(toStr(db.get("a")) === null)
+          assert(iterator(db).isEmpty)
+        }
 
-      withDB(remoteDir, conf = conf, version = 1, useColumnFamilies = colFamiliesEnabled) { db =>
-        // version 1 data recovered correctly
-        assert(toStr(db.get("a")) === "1")
-        assert(db.iterator().map(toStr).toSet === Set(("a", "1")))
+        withDB(remoteDir, conf = conf, version = 1, useColumnFamilies = colFamiliesEnabled,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
+          // version 1 data recovered correctly
+          assert(toStr(db.get("a")) === "1")
+          assert(db.iterator().map(toStr).toSet === Set(("a", "1")))
 
-        // make changes but do not commit version 2
-        db.put("b", "2")
-        assert(toStr(db.get("b")) === "2")
-        assert(db.iterator().map(toStr).toSet === Set(("a", "1"), ("b", "2")))
-      }
+          // make changes but do not commit version 2
+          db.put("b", "2")
+          assert(toStr(db.get("b")) === "2")
+          assert(db.iterator().map(toStr).toSet === Set(("a", "1"), ("b", "2")))
+        }
 
-      withDB(remoteDir, conf = conf, version = 1, useColumnFamilies = colFamiliesEnabled) { db =>
-        // version 1 data not changed
-        assert(toStr(db.get("a")) === "1")
-        assert(db.get("b") === null)
-        assert(db.iterator().map(toStr).toSet === Set(("a", "1")))
+        withDB(remoteDir, conf = conf, version = 1, useColumnFamilies = colFamiliesEnabled,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
+          // version 1 data not changed
+          assert(toStr(db.get("a")) === "1")
+          assert(db.get("b") === null)
+          assert(db.iterator().map(toStr).toSet === Set(("a", "1")))
 
-        // commit version 2
-        db.put("b", "2")
-        assert(toStr(db.get("b")) === "2")
-        db.commit()
-        assert(db.iterator().map(toStr).toSet === Set(("a", "1"), ("b", "2")))
-      }
+          // commit version 2
+          db.put("b", "2")
+          assert(toStr(db.get("b")) === "2")
+          db.commit()
+          assert(db.iterator().map(toStr).toSet === Set(("a", "1"), ("b", "2")))
+        }
 
-      withDB(remoteDir, conf = conf, version = 1, useColumnFamilies = colFamiliesEnabled) { db =>
-        // version 1 data not changed
-        assert(toStr(db.get("a")) === "1")
-        assert(db.get("b") === null)
-      }
+        withDB(remoteDir, conf = conf, version = 1, useColumnFamilies = colFamiliesEnabled,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
+          // version 1 data not changed
+          assert(toStr(db.get("a")) === "1")
+          assert(db.get("b") === null)
+        }
 
-      withDB(remoteDir, conf = conf, version = 2, useColumnFamilies = colFamiliesEnabled) { db =>
-        // version 2 can be loaded again
-        assert(toStr(db.get("b")) === "2")
-        assert(db.iterator().map(toStr).toSet === Set(("a", "1"), ("b", "2")))
+        withDB(remoteDir, conf = conf, version = 2, useColumnFamilies = colFamiliesEnabled,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
+          // version 2 can be loaded again
+          assert(toStr(db.get("b")) === "2")
+          assert(db.iterator().map(toStr).toSet === Set(("a", "1"), ("b", "2")))
 
-        db.load(1)
-        assert(toStr(db.get("b")) === null)
-        assert(db.iterator().map(toStr).toSet === Set(("a", "1")))
+          db.load(1, versionToUniqueId.get(1))
+          assert(toStr(db.get("b")) === null)
+          assert(db.iterator().map(toStr).toSet === Set(("a", "1")))
+        }
       }
-    }
 
-    for (compactOnCommit <- Seq(false, true)) {
-      withClue(s"compactOnCommit = $compactOnCommit") {
-        testOps(compactOnCommit)
+      for (compactOnCommit <- Seq(false, true)) {
+        withClue(s"compactOnCommit = $compactOnCommit") {
+          testOps(compactOnCommit)
+        }
       }
-    }
   }
 
-  testWithColumnFamilies(s"RocksDB: handle commit failures and aborts",
-    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
-    val hadoopConf = new Configuration()
-    hadoopConf.set(
-      SQLConf.STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key,
-      classOf[CreateAtomicTestManager].getName)
-    val remoteDir = Utils.createTempDir().getAbsolutePath
-    withDB(remoteDir, hadoopConf = hadoopConf, useColumnFamilies = colFamiliesEnabled) { db =>
-      // Disable failure of output stream and generate versions
-      CreateAtomicTestManager.shouldFailInCreateAtomic = false
-      for (version <- 1 to 10) {
-        db.load(version - 1)
-        db.put(version.toString, version.toString) // update "1" -> "1", "2" -> "2", ...
-        db.commit()
-      }
-      val version10Data = (1L to 10).map(_.toString).map(x => x -> x).toSet
-
-      // Fail commit for next version and verify that reloading resets the files
-      CreateAtomicTestManager.shouldFailInCreateAtomic = true
-      db.load(10)
-      db.put("11", "11")
-      intercept[IOException] {
-        quietly {
+  testWithStateStoreCheckpointIdsAndColumnFamilies(s"RocksDB: handle commit failures and aborts",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
+      val hadoopConf = new Configuration()
+      hadoopConf.set(
+        SQLConf.STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key,
+        classOf[CreateAtomicTestManager].getName)
+      val remoteDir = Utils.createTempDir().getAbsolutePath
+      withDB(remoteDir, hadoopConf = hadoopConf, useColumnFamilies = colFamiliesEnabled,
+        enableStateStoreCheckpointIds = enableStateStoreCheckpointIds) { db =>
+        // Disable failure of output stream and generate versions
+        CreateAtomicTestManager.shouldFailInCreateAtomic = false
+        for (version <- 1 to 10) {
+          db.load(version - 1)
+          db.put(version.toString, version.toString) // update "1" -> "1", "2" -> "2", ...
           db.commit()
         }
-      }
-      assert(db.load(10, readOnly = true).iterator().map(toStr).toSet === version10Data)
-      CreateAtomicTestManager.shouldFailInCreateAtomic = false
+        val version10Data = (1L to 10).map(_.toString).map(x => x -> x).toSet
+
+        // Fail commit for next version and verify that reloading resets the files
+        CreateAtomicTestManager.shouldFailInCreateAtomic = true
+        db.load(10)
+        db.put("11", "11")
+        intercept[IOException] {
+          quietly {
+            db.commit()
+          }
+        }
+        assert(db.load(10, readOnly = true).iterator().map(toStr).toSet === version10Data)
+        CreateAtomicTestManager.shouldFailInCreateAtomic = false
 
-      // Abort commit for next version and verify that reloading resets the files
-      db.load(10)
-      db.put("11", "11")
-      db.rollback()
-      assert(db.load(10, readOnly = true).iterator().map(toStr).toSet === version10Data)
-    }
+        // Abort commit for next version and verify that reloading resets the files
+        db.load(10)
+        db.put("11", "11")
+        db.rollback()
+        assert(db.load(10, readOnly = true).iterator().map(toStr).toSet === version10Data)
+      }
   }
 
-  testWithColumnFamilies("RocksDB close tests - close before doMaintenance",
-    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
-    val remoteDir = Utils.createTempDir().toString
-    val conf = dbConf.copy(minDeltasForSnapshot = 1, compactOnCommit = false)
-    new File(remoteDir).delete() // to make sure that the directory gets created
-    withDB(remoteDir, conf = conf,
-      useColumnFamilies = colFamiliesEnabled) { db =>
-      db.load(0)
-      db.put("foo", "bar")
-      db.commit()
-      // call close first and maintenance can be still be invoked in the context of the
-      // maintenance task's thread pool
-      db.close()
-      db.doMaintenance()
-    }
+  testWithStateStoreCheckpointIdsAndColumnFamilies("RocksDB close tests - " +
+    "close before doMaintenance",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
+      val remoteDir = Utils.createTempDir().toString
+      val conf = dbConf.copy(minDeltasForSnapshot = 1, compactOnCommit = false)
+      new File(remoteDir).delete() // to make sure that the directory gets created
+      withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled,
+        enableStateStoreCheckpointIds = enableStateStoreCheckpointIds) { db =>
+        db.load(0)
+        db.put("foo", "bar")
+        db.commit()
+        // call close first and maintenance can be still be invoked in the context of the
+        // maintenance task's thread pool
+        db.close()
+        db.doMaintenance()
+      }
   }
 
-  testWithColumnFamilies("RocksDB close tests - close after doMaintenance",
-    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
-    val remoteDir = Utils.createTempDir().toString
-    val conf = dbConf.copy(minDeltasForSnapshot = 1, compactOnCommit = false)
-    new File(remoteDir).delete() // to make sure that the directory gets created
-    withDB(remoteDir, conf = conf,
-      useColumnFamilies = colFamiliesEnabled) { db =>
-      db.load(0)
-      db.put("foo", "bar")
-      db.commit()
-      // maintenance can be invoked in the context of the maintenance task's thread pool
-      // and close is invoked after that
-      db.doMaintenance()
-      db.close()
-    }
+  testWithStateStoreCheckpointIdsAndColumnFamilies("RocksDB close tests - " +
+    "close after doMaintenance",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
+      val remoteDir = Utils.createTempDir().toString
+      val conf = dbConf.copy(minDeltasForSnapshot = 1, compactOnCommit = false)
+      new File(remoteDir).delete() // to make sure that the directory gets created
+      withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled,
+        enableStateStoreCheckpointIds = enableStateStoreCheckpointIds) { db =>
+        db.load(0)
+        db.put("foo", "bar")
+        db.commit()
+        // maintenance can be invoked in the context of the maintenance task's thread pool
+        // and close is invoked after that
+        db.doMaintenance()
+        db.close()
+      }
   }
 
   testWithChangelogCheckpointingEnabled("RocksDB: Unsupported Operations" +
     " with Changelog Checkpointing") {
     val dfsRootDir = new File(Utils.createTempDir().getAbsolutePath + "/state/1/1")
     val fileManager = new RocksDBFileManager(
-      dfsRootDir.getAbsolutePath, Utils.createTempDir(), new Configuration)
+      dfsRootDir.getAbsolutePath, Utils.createTempDir(), hadoopConf)
     val changelogWriter = fileManager.getChangeLogWriter(1)
     assert(changelogWriter.version === 1)
 
@@ -729,7 +1261,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
   testWithChangelogCheckpointingEnabled("RocksDBFileManager: read and write changelog") {
     val dfsRootDir = new File(Utils.createTempDir().getAbsolutePath + "/state/1/1")
     val fileManager = new RocksDBFileManager(
-      dfsRootDir.getAbsolutePath, Utils.createTempDir(), new Configuration)
+      dfsRootDir.getAbsolutePath, Utils.createTempDir(), hadoopConf)
     val changelogWriter = fileManager.getChangeLogWriter(1)
     assert(changelogWriter.version === 1)
 
@@ -752,14 +1284,162 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     entries.zip(expectedEntries).map{
       case (e1, e2) => assert(e1._1 === e2._1 && e1._2 === e2._2 && e1._3 === e2._3)
     }
+
+    changelogReader.closeIfNeeded()
+  }
+
+  testWithChangelogCheckpointingEnabled("RocksDBFileManager: StateStoreChangelogReaderFactory " +
+    "edge case") {
+    val dfsRootDir = new File(Utils.createTempDir().getAbsolutePath + "/state/1/1")
+    val fileManager = new RocksDBFileManager(
+      dfsRootDir.getAbsolutePath, Utils.createTempDir(), hadoopConf)
+
+    val checkpointUniqueId = Some(java.util.UUID.randomUUID.toString)
+    val lineage: Array[LineageItem] = Array(
+      LineageItem(1, java.util.UUID.randomUUID.toString),
+      LineageItem(2, java.util.UUID.randomUUID.toString),
+      LineageItem(3, java.util.UUID.randomUUID.toString)
+    )
+
+    // Create a v1 writer
+    val changelogWriterV1 = fileManager.getChangeLogWriter(101)
+    assert(changelogWriterV1.version === 1)
+    changelogWriterV1.commit() // v1 with empty content
+
+    val changelogReaderV1 = fileManager.getChangelogReader(101)
+    assert(changelogReaderV1.version === 1) // getChangelogReader should return a v1 reader
+    changelogReaderV1.closeIfNeeded()
+
+    // Create a v2 writer
+    val changelogWriterV2 = fileManager.getChangeLogWriter(102, useColumnFamilies = true)
+    assert(changelogWriterV2.version === 2)
+    changelogWriterV2.commit() // v2 with empty content
+
+    val changelogReaderV2 = fileManager.getChangelogReader(102)
+    assert(changelogReaderV2.version === 2) // getChangelogReader should return a v2 reader
+    changelogReaderV2.closeIfNeeded()
+
+    // Create a v3 writer
+    val changelogWriterV3 = fileManager.getChangeLogWriter(
+      103, useColumnFamilies = false, checkpointUniqueId, Some(lineage))
+    assert(changelogWriterV3.version === 3)
+    changelogWriterV3.commit() // v1 with empty content
+
+    val changelogReaderV3 = fileManager.getChangelogReader(
+      103, checkpointUniqueId = checkpointUniqueId)
+    assert(changelogReaderV3.version === 3) // getChangelogReader should return a v3 reader
+    assert(changelogReaderV3.lineage sameElements lineage)
+    changelogReaderV3.closeIfNeeded()
+
+    // Create a v4 writer
+    val changelogWriterV4 = fileManager.getChangeLogWriter(
+      104, useColumnFamilies = true, checkpointUniqueId, Some(lineage))
+    assert(changelogWriterV4.version === 4)
+    changelogWriterV4.commit() // v1 with empty content
+
+    val changelogReaderV4 = fileManager.getChangelogReader(
+      104, checkpointUniqueId = checkpointUniqueId)
+    assert(changelogReaderV4.version === 4) // getChangelogReader should return a v4 reader
+    assert(changelogReaderV4.lineage sameElements lineage)
+    changelogReaderV4.closeIfNeeded()
+  }
+
+  testWithChangelogCheckpointingEnabled("RocksDBFileManager: changelog reader / writer " +
+    "failure cases") {
+    val dfsRootDir = new File(Utils.createTempDir().getAbsolutePath + "/state/1/1")
+    val fileManager = new RocksDBFileManager(
+      dfsRootDir.getAbsolutePath, Utils.createTempDir(), hadoopConf)
+    // Failure case 1: reader writer version mismatch
+    // Create a v1 writer
+    val changelogWriterV1 = fileManager.getChangeLogWriter(101)
+    assert(changelogWriterV1.version === 1)
+
+    (1 to 5).foreach(i => changelogWriterV1.put(i.toString, i.toString))
+    (2 to 4).foreach(j => changelogWriterV1.delete(j.toString))
+
+    changelogWriterV1.commit()
+    // Success case, when reading from the same file, a V1 reader should be constructed.
+    val changelogReaderV1 = fileManager.getChangelogReader(101)
+    assert(changelogReaderV1.version === 1)
+    changelogReaderV1.closeIfNeeded()
+
+    // Failure case, force creating a V3 reader.
+    val dfsChangelogFile = PrivateMethod[Path](Symbol("dfsChangelogFile"))
+    val codec = PrivateMethod[CompressionCodec](Symbol("codec"))
+    var changelogFile = fileManager invokePrivate dfsChangelogFile(101L, None)
+    val compressionCodec = fileManager invokePrivate codec()
+    val fm = CheckpointFileManager.create(new Path(dfsRootDir.getAbsolutePath), new Configuration)
+    val e = intercept[AssertionError] {
+      new StateStoreChangelogReaderV3(fm, changelogFile, compressionCodec)
+    }
+    assert(e.getMessage.contains("Changelog version mismatch"))
+
+    changelogFile = fileManager invokePrivate dfsChangelogFile(1L, None)
+    // Failure case 2: readerFactory throw when reading from ckpt built in future Spark version
+    // Create a v101 writer
+    val changelogWriter = new TestStateStoreChangelogWriterV101(
+      fm, changelogFile, compressionCodec)
+    assert(changelogWriter.version === 101)
+
+    changelogWriter.commit()
+
+    // Failure case, force creating a V3 reader.
+    val ex = intercept[SparkException] {
+      fileManager.getChangelogReader(1)
+    }
+    checkError(
+      ex,
+      condition = "CANNOT_LOAD_STATE_STORE.INVALID_CHANGE_LOG_READER_VERSION",
+      parameters = Map("version" -> 101.toString)
+    )
+    assert(ex.getMessage.contains("please upgrade your Spark"))
+  }
+
+  testWithChangelogCheckpointingEnabled("RocksDBFileManager: read and write changelog " +
+      "with state checkpoint id enabled") {
+    val dfsRootDir = new File(Utils.createTempDir().getAbsolutePath + "/state/1/1")
+    val fileManager = new RocksDBFileManager(
+      dfsRootDir.getAbsolutePath, Utils.createTempDir(), hadoopConf)
+    val checkpointUniqueId = Some(java.util.UUID.randomUUID.toString)
+    val lineage: Array[LineageItem] = Array(
+      LineageItem(1, java.util.UUID.randomUUID.toString),
+      LineageItem(2, java.util.UUID.randomUUID.toString),
+      LineageItem(3, java.util.UUID.randomUUID.toString)
+    )
+    val changelogWriter = fileManager.getChangeLogWriter(
+      3, useColumnFamilies = false, checkpointUniqueId, Some(lineage))
+    assert(changelogWriter.version === 3)
+
+    (1 to 5).foreach(i => changelogWriter.put(i.toString, i.toString))
+    (2 to 4).foreach(j => changelogWriter.delete(j.toString))
+
+    changelogWriter.commit()
+    val changelogReader = fileManager.getChangelogReader(3, checkpointUniqueId)
+    assert(changelogReader.version === 3)
+    assert(changelogReader.lineage sameElements lineage)
+    val entries = changelogReader.toSeq
+    val expectedEntries = (1 to 5).map { i =>
+      (RecordType.PUT_RECORD, i.toString.getBytes,
+        i.toString.getBytes, StateStore.DEFAULT_COL_FAMILY_NAME)
+    } ++ (2 to 4).map { j =>
+      (RecordType.DELETE_RECORD, j.toString.getBytes,
+        null, StateStore.DEFAULT_COL_FAMILY_NAME)
+    }
+
+    assert(entries.size == expectedEntries.size)
+    entries.zip(expectedEntries).map{
+      case (e1, e2) => assert(e1._1 === e2._1 && e1._2 === e2._2 && e1._3 === e2._3)
+    }
+
+    changelogReader.closeIfNeeded()
   }
 
   testWithChangelogCheckpointingEnabled(
     "RocksDBFileManager: read and write v2 changelog with default col family") {
     val dfsRootDir = new File(Utils.createTempDir().getAbsolutePath + "/state/1/1")
     val fileManager = new RocksDBFileManager(
-      dfsRootDir.getAbsolutePath, Utils.createTempDir(), new Configuration)
-    val changelogWriter = fileManager.getChangeLogWriter(1, true)
+      dfsRootDir.getAbsolutePath, Utils.createTempDir(), hadoopConf)
+    val changelogWriter = fileManager.getChangeLogWriter(1, useColumnFamilies = true)
     assert(changelogWriter.version === 2)
     (1 to 5).foreach { i =>
       changelogWriter.put(i.toString, i.toString)
@@ -773,7 +1453,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
 
     changelogWriter.commit()
-    val changelogReader = fileManager.getChangelogReader(1, true)
+    val changelogReader = fileManager.getChangelogReader(1)
     assert(changelogReader.version === 2)
     val entries = changelogReader.toSeq
     val expectedEntries = (1 to 5).map { i =>
@@ -788,6 +1468,54 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     entries.zip(expectedEntries).map{
       case (e1, e2) => assert(e1._1 === e2._1 && e1._2 === e2._2 && e1._3 === e2._3)
     }
+
+    changelogReader.closeIfNeeded()
+  }
+
+  testWithChangelogCheckpointingEnabled("RocksDBFileManager: read and write v2 changelog with " +
+      "default col family and state checkpoint id enabled") {
+    val dfsRootDir = new File(Utils.createTempDir().getAbsolutePath + "/state/1/1")
+    val fileManager = new RocksDBFileManager(
+      dfsRootDir.getAbsolutePath, Utils.createTempDir(), hadoopConf)
+    val checkpointUniqueId = Some(java.util.UUID.randomUUID.toString)
+    val lineage: Array[LineageItem] = Array(
+      LineageItem(1, java.util.UUID.randomUUID.toString),
+      LineageItem(2, java.util.UUID.randomUUID.toString),
+      LineageItem(3, java.util.UUID.randomUUID.toString)
+    )
+    val changelogWriter = fileManager.getChangeLogWriter(
+      1, useColumnFamilies = true, checkpointUniqueId, Some(lineage))
+    assert(changelogWriter.version === 4)
+    (1 to 5).foreach { i =>
+      changelogWriter.put(i.toString, i.toString)
+    }
+    (1 to 5).foreach { i =>
+      changelogWriter.merge(i.toString, i.toString)
+    }
+
+    (2 to 4).foreach { j =>
+      changelogWriter.delete(j.toString)
+    }
+
+    changelogWriter.commit()
+    val changelogReader = fileManager.getChangelogReader(1, checkpointUniqueId)
+    assert(changelogReader.version === 4)
+    assert(changelogReader.lineage sameElements lineage)
+    val entries = changelogReader.toSeq
+    val expectedEntries = (1 to 5).map { i =>
+      (RecordType.PUT_RECORD, i.toString.getBytes, i.toString.getBytes)
+    } ++ (1 to 5).map { i =>
+      (RecordType.MERGE_RECORD, i.toString.getBytes, i.toString.getBytes)
+    } ++ (2 to 4).map { j =>
+      (RecordType.DELETE_RECORD, j.toString.getBytes, null)
+    }
+
+    assert(entries.size == expectedEntries.size)
+    entries.zip(expectedEntries).map{
+      case (e1, e2) => assert(e1._1 === e2._1 && e1._2 === e2._2 && e1._3 === e2._3)
+    }
+
+    changelogReader.closeIfNeeded()
   }
 
   testWithColumnFamilies("RocksDBFileManager: create init dfs directory with " +
@@ -797,7 +1525,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     try {
       val verificationDir = Utils.createTempDir().getAbsolutePath
       val fileManager = new RocksDBFileManager(
-        dfsRootDir.getAbsolutePath, Utils.createTempDir(), new Configuration)
+        dfsRootDir.getAbsolutePath, Utils.createTempDir(), hadoopConf)
       // Save a version of empty checkpoint files
       val cpFiles = Seq()
       generateFiles(verificationDir, cpFiles)
@@ -890,17 +1618,18 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
-  testWithColumnFamilies("RocksDBFileManager: delete orphan files",
-    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
+  testWithStateStoreCheckpointIdsAndColumnFamilies("RocksDBFileManager: delete orphan files",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
     withTempDir { dir =>
       val dfsRootDir = dir.getAbsolutePath
       // Use 2 file managers here to emulate concurrent execution
       // that checkpoint the same version of state
       val fileManager = new RocksDBFileManager(
-        dfsRootDir, Utils.createTempDir(), new Configuration)
+        dfsRootDir, Utils.createTempDir(), hadoopConf)
       val rocksDBFileMapping = new RocksDBFileMapping()
       val fileManager_ = new RocksDBFileManager(
-        dfsRootDir, Utils.createTempDir(), new Configuration)
+        dfsRootDir, Utils.createTempDir(), hadoopConf)
       val sstDir = s"$dfsRootDir/SSTs"
       def numRemoteSSTFiles: Int = listFiles(sstDir).length
       val logDir = s"$dfsRootDir/logs"
@@ -915,9 +1644,12 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00001.log" -> 1000,
         "archive/00002.log" -> 2000
       )
-
+      val uuid = enableStateStoreCheckpointIds match {
+        case false => None
+        case true => Some(UUID.randomUUID().toString)
+      }
       saveCheckpointFiles(fileManager, cpFiles1, version = 1,
-        numKeys = 101, rocksDBFileMapping)
+        numKeys = 101, rocksDBFileMapping, uuid)
       assert(fileManager.getLatestVersion() === 1)
       assert(numRemoteSSTFiles == 2) // 2 sst files copied
       assert(numRemoteLogFiles == 2)
@@ -932,7 +1664,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00003.log" -> 2000
       )
       saveCheckpointFiles(fileManager_, cpFiles1_, version = 1,
-        numKeys = 101, new RocksDBFileMapping())
+        numKeys = 101, new RocksDBFileMapping(), uuid)
       assert(fileManager_.getLatestVersion() === 1)
       assert(numRemoteSSTFiles == 4)
       assert(numRemoteLogFiles == 4)
@@ -952,7 +1684,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00005.log" -> 2000
       )
       saveCheckpointFiles(fileManager_, cpFiles2,
-        version = 2, numKeys = 121, new RocksDBFileMapping())
+        version = 2, numKeys = 121, new RocksDBFileMapping(), uuid)
       fileManager_.deleteOldVersions(1)
       assert(numRemoteSSTFiles <= 4) // delete files recorded in 1.zip
       assert(numRemoteLogFiles <= 5) // delete files recorded in 1.zip and orphan 00001.log
@@ -967,7 +1699,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00007.log" -> 2000
       )
       saveCheckpointFiles(fileManager_, cpFiles3,
-        version = 3, numKeys = 131, new RocksDBFileMapping())
+        version = 3, numKeys = 131, new RocksDBFileMapping(), uuid)
       assert(fileManager_.getLatestVersion() === 3)
       fileManager_.deleteOldVersions(1)
       assert(numRemoteSSTFiles == 1)
@@ -975,13 +1707,14 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
-  testWithColumnFamilies("RocksDBFileManager: don't delete orphan files " +
-    s"when there is only 1 version",
-    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
+  testWithStateStoreCheckpointIdsAndColumnFamilies("RocksDBFileManager: don't delete " +
+    s"orphan files when there is only 1 version",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
     withTempDir { dir =>
       val dfsRootDir = dir.getAbsolutePath
       val fileManager = new RocksDBFileManager(
-        dfsRootDir, Utils.createTempDir(), new Configuration)
+        dfsRootDir, Utils.createTempDir(), hadoopConf)
       (new File(dfsRootDir, "SSTs")).mkdir()
       (new File(dfsRootDir, "logs")).mkdir()
 
@@ -1005,8 +1738,14 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00002.log" -> 2000
       )
       val rocksDBFileMapping = new RocksDBFileMapping()
-      saveCheckpointFiles(fileManager, cpFiles1,
-        version = 1, numKeys = 101, rocksDBFileMapping)
+      val uuid = if (enableStateStoreCheckpointIds) {
+        Some(UUID.randomUUID().toString)
+      } else {
+        None
+      }
+
+      saveCheckpointFiles(
+        fileManager, cpFiles1, version = 1, numKeys = 101, rocksDBFileMapping, uuid)
       fileManager.deleteOldVersions(1)
       // Should not delete orphan files even when they are older than all existing files
       // when there is only 1 version.
@@ -1023,8 +1762,8 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00003.log" -> 1000,
         "archive/00004.log" -> 2000
       )
-      saveCheckpointFiles(fileManager, cpFiles2,
-        version = 2, numKeys = 101, rocksDBFileMapping)
+      saveCheckpointFiles(
+        fileManager, cpFiles2, version = 2, numKeys = 101, rocksDBFileMapping, uuid)
       assert(numRemoteSSTFiles == 5)
       assert(numRemoteLogFiles == 5)
       fileManager.deleteOldVersions(1)
@@ -1034,122 +1773,131 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
-  testWithColumnFamilies("RocksDBFileManager: upload only new immutable files",
-    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
-    withTempDir { dir =>
-      val dfsRootDir = dir.getAbsolutePath
-      val verificationDir = Utils.createTempDir().getAbsolutePath // local dir to load checkpoints
-      val fileManager = new RocksDBFileManager(
-        dfsRootDir, Utils.createTempDir(), new Configuration)
-      val sstDir = s"$dfsRootDir/SSTs"
-      def numRemoteSSTFiles: Int = listFiles(sstDir).length
-      val logDir = s"$dfsRootDir/logs"
-      def numRemoteLogFiles: Int = listFiles(logDir).length
-      val fileMapping = new RocksDBFileMapping
-
-      // Verify behavior before any saved checkpoints
-      assert(fileManager.getLatestVersion() === 0)
-
-      // Try to load incorrect versions
-      intercept[FileNotFoundException] {
-        fileManager.loadCheckpointFromDfs(1, Utils.createTempDir(), fileMapping)
-      }
-
-      // Save a version of checkpoint files
-      val cpFiles1 = Seq(
-        "sst-file1.sst" -> 10,
-        "sst-file2.sst" -> 20,
-        "other-file1" -> 100,
-        "other-file2" -> 200,
-        "archive/00001.log" -> 1000,
-        "archive/00002.log" -> 2000
-      )
-      saveCheckpointFiles(fileManager, cpFiles1,
-        version = 1, numKeys = 101, fileMapping)
-      assert(fileManager.getLatestVersion() === 1)
-      assert(numRemoteSSTFiles == 2) // 2 sst files copied
-      assert(numRemoteLogFiles == 2) // 2 log files copied
-
-      // Load back the checkpoint files into another local dir with existing files and verify
-      generateFiles(verificationDir, Seq(
-        "sst-file1.sst" -> 11, // files with same name but different sizes, should get overwritten
-        "other-file1" -> 101,
-        "archive/00001.log" -> 1001,
-        "random-sst-file.sst" -> 100, // unnecessary files, should get deleted
-        "random-other-file" -> 9,
-        "00005.log" -> 101,
-        "archive/00007.log" -> 101
-      ))
+  testWithStateStoreCheckpointIdsAndColumnFamilies("RocksDBFileManager: upload only " +
+    "new immutable files",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
+      withTempDir { dir =>
+        val dfsRootDir = dir.getAbsolutePath
+        val verificationDir = Utils.createTempDir().getAbsolutePath // local dir to load checkpoints
+        val fileManager = new RocksDBFileManager(
+          dfsRootDir, Utils.createTempDir(), hadoopConf)
+        val sstDir = s"$dfsRootDir/SSTs"
+        def numRemoteSSTFiles: Int = listFiles(sstDir).length
+        val logDir = s"$dfsRootDir/logs"
+        def numRemoteLogFiles: Int = listFiles(logDir).length
+        val fileMapping = new RocksDBFileMapping
+
+        // Verify behavior before any saved checkpoints
+        assert(fileManager.getLatestVersion() === 0)
+
+        // Try to load incorrect versions
+        intercept[FileNotFoundException] {
+          fileManager.loadCheckpointFromDfs(1, Utils.createTempDir(), fileMapping)
+        }
 
-      // as we are loading version 1 again, the previously committed 1,zip and
-      // SST files would not be reused.
-      loadAndVerifyCheckpointFiles(fileManager, verificationDir,
-        version = 1, cpFiles1, 101, fileMapping)
+        // Save a version of checkpoint files
+        val cpFiles1 = Seq(
+          "sst-file1.sst" -> 10,
+          "sst-file2.sst" -> 20,
+          "other-file1" -> 100,
+          "other-file2" -> 200,
+          "archive/00001.log" -> 1000,
+          "archive/00002.log" -> 2000
+        )
 
-      // Save SAME version again with different checkpoint files and load back again to verify
-      // whether files were overwritten.
-      val cpFiles1_ = Seq(
-        "sst-file1.sst" -> 10, // same SST file as before, but will be uploaded again
-        "sst-file2.sst" -> 25, // new SST file with same name as before, but different length
-        "sst-file3.sst" -> 30, // new SST file
-        "other-file1" -> 100, // same non-SST file as before, should not get copied
-        "other-file2" -> 210, // new non-SST file with same name as before, but different length
-        "other-file3" -> 300, // new non-SST file
-        "archive/00001.log" -> 1000, // same log file as before, this should get reused
-        "archive/00002.log" -> 2500, // new log file with same name as before, but different length
-        "archive/00003.log" -> 3000 // new log file
-      )
+        val uuid = if (enableStateStoreCheckpointIds) {
+          Some(UUID.randomUUID().toString)
+        } else {
+          None
+        }
 
-      // upload version 1 again, new checkpoint will be created and SST files from
-      // previously committed version 1 will not be reused.
-      saveCheckpointFiles(fileManager, cpFiles1_,
-        version = 1, numKeys = 1001, fileMapping)
-      assert(numRemoteSSTFiles === 5, "shouldn't reuse old version 1 SST files" +
-        " while uploading version 1 again") // 2 old + 3 new SST files
-      assert(numRemoteLogFiles === 5, "shouldn't reuse old version 1 log files" +
-        " while uploading version 1 again") // 2 old + 3 new log files
+        saveCheckpointFiles(
+          fileManager, cpFiles1, version = 1, numKeys = 101, fileMapping, uuid)
+        assert(fileManager.getLatestVersion() === 1)
+        assert(numRemoteSSTFiles == 2) // 2 sst files copied
+        assert(numRemoteLogFiles == 2) // 2 log files copied
+
+        // Load back the checkpoint files into another local dir with existing files and verify
+        generateFiles(verificationDir, Seq(
+          "sst-file1.sst" -> 11, // files with same name but different sizes, should get overwritten
+          "other-file1" -> 101,
+          "archive/00001.log" -> 1001,
+          "random-sst-file.sst" -> 100, // unnecessary files, should get deleted
+          "random-other-file" -> 9,
+          "00005.log" -> 101,
+          "archive/00007.log" -> 101
+        ))
+
+        // as we are loading version 1 again, the previously committed 1.zip and
+        // SST files would not be reused.
+        loadAndVerifyCheckpointFiles(
+          fileManager, verificationDir, version = 1, cpFiles1, 101, fileMapping, uuid)
+
+        // Save SAME version again with different checkpoint files and load back again to verify
+        // whether files were overwritten.
+        val cpFiles1_ = Seq(
+          "sst-file1.sst" -> 10, // same SST file as before, but will be uploaded again
+          "sst-file2.sst" -> 25, // new SST file with same name as before, but different length
+          "sst-file3.sst" -> 30, // new SST file
+          "other-file1" -> 100, // same non-SST file as before, should not get copied
+          "other-file2" -> 210, // new non-SST file with same name as before, but different length
+          "other-file3" -> 300, // new non-SST file
+          "archive/00001.log" -> 1000, // same log file as before, this should get reused
+          "archive/00002.log" -> 2500, // new log file with same name but different length
+          "archive/00003.log" -> 3000 // new log file
+        )
 
-      // verify checkpoint state is correct
-      loadAndVerifyCheckpointFiles(fileManager, verificationDir,
-        version = 1, cpFiles1_, 1001, fileMapping)
+        // upload version 1 again, new checkpoint will be created and SST files from
+        // previously committed version 1 will not be reused.
+        saveCheckpointFiles(fileManager, cpFiles1_,
+          version = 1, numKeys = 1001, fileMapping, uuid)
+        assert(numRemoteSSTFiles === 5, "shouldn't reuse old version 1 SST files" +
+          " while uploading version 1 again") // 2 old + 3 new SST files
+        assert(numRemoteLogFiles === 5, "shouldn't reuse old version 1 log files" +
+          " while uploading version 1 again") // 2 old + 3 new log files
 
-      // Save another version and verify
-      val cpFiles2 = Seq(
-        "sst-file1.sst" -> 10, // same SST file as version 1, should be reused
-        "sst-file2.sst" -> 25, // same SST file as version 1, should be reused
-        "sst-file3.sst" -> 30, // same SST file as version 1, should be reused
-        "sst-file4.sst" -> 40, // new sst file, should be uploaded
-        "other-file4" -> 400,
-        "archive/00004.log" -> 4000
-      )
+        // verify checkpoint state is correct
+        loadAndVerifyCheckpointFiles(fileManager, verificationDir,
+          version = 1, cpFiles1_, 1001, fileMapping, uuid)
+
+        // Save another version and verify
+        val cpFiles2 = Seq(
+          "sst-file1.sst" -> 10, // same SST file as version 1, should be reused
+          "sst-file2.sst" -> 25, // same SST file as version 1, should be reused
+          "sst-file3.sst" -> 30, // same SST file as version 1, should be reused
+          "sst-file4.sst" -> 40, // new sst file, should be uploaded
+          "other-file4" -> 400,
+          "archive/00004.log" -> 4000
+        )
+        saveCheckpointFiles(fileManager, cpFiles2,
+          version = 2, numKeys = 1501, fileMapping, uuid)
+        assert(numRemoteSSTFiles === 6) // 1 new file over earlier 5 files
+        assert(numRemoteLogFiles === 6) // 1 new file over earlier 6 files
+        loadAndVerifyCheckpointFiles(fileManager, verificationDir,
+          version = 2, cpFiles2, 1501, fileMapping, uuid)
 
-      saveCheckpointFiles(fileManager, cpFiles2,
-        version = 2, numKeys = 1501, fileMapping)
-      assert(numRemoteSSTFiles === 6) // 1 new file over earlier 5 files
-      assert(numRemoteLogFiles === 6) // 1 new file over earlier 6 files
-      loadAndVerifyCheckpointFiles(fileManager, verificationDir,
-        version = 2, cpFiles2, 1501, fileMapping)
+        // Loading an older version should work
+        loadAndVerifyCheckpointFiles(
+          fileManager, verificationDir, version = 1, cpFiles1_, 1001, fileMapping, uuid)
 
-      // Loading an older version should work
-      loadAndVerifyCheckpointFiles(fileManager, verificationDir,
-        version = 1, cpFiles1_, 1001, fileMapping)
+        // Loading incorrect version should fail
+        intercept[FileNotFoundException] {
+          loadAndVerifyCheckpointFiles(
+            fileManager, verificationDir, version = 3, Nil, 1001, fileMapping, uuid)
+        }
 
-      // Loading incorrect version should fail
-      intercept[FileNotFoundException] {
-        loadAndVerifyCheckpointFiles(fileManager, verificationDir,
-          version = 3, Nil, 1001, fileMapping)
+        // Loading 0 should delete all files
+        require(verificationDir.list().length > 0)
+        loadAndVerifyCheckpointFiles(
+          fileManager, verificationDir, version = 0, Nil, 0, fileMapping, uuid)
       }
-
-      // Loading 0 should delete all files
-      require(verificationDir.list().length > 0)
-      loadAndVerifyCheckpointFiles(fileManager, verificationDir,
-        version = 0, Nil, 0, fileMapping)
-    }
   }
 
-  testWithColumnFamilies("RocksDBFileManager: error writing [version].zip " +
-    s"cancels the output stream",
-    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
+  testWithStateStoreCheckpointIdsAndColumnFamilies("RocksDBFileManager: error writing " +
+    s"[version].zip cancels the output stream",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
     quietly {
       val hadoopConf = new Configuration()
       hadoopConf.set(
@@ -1159,30 +1907,40 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       val fileManager = new RocksDBFileManager(dfsRootDir, Utils.createTempDir(), hadoopConf)
       val cpFiles = Seq("sst-file1.sst" -> 10, "sst-file2.sst" -> 20, "other-file1" -> 100)
       CreateAtomicTestManager.shouldFailInCreateAtomic = true
+      val uuid = if (enableStateStoreCheckpointIds) {
+        Some(UUID.randomUUID().toString)
+      } else {
+        None
+      }
       intercept[IOException] {
-        saveCheckpointFiles(fileManager, cpFiles,
-          version = 1, numKeys = 101, new RocksDBFileMapping())
+        saveCheckpointFiles(
+          fileManager, cpFiles, version = 1, numKeys = 101, new RocksDBFileMapping(), uuid)
       }
       assert(CreateAtomicTestManager.cancelCalledInCreateAtomic)
     }
   }
 
-  testWithColumnFamilies("disallow concurrent updates to the same RocksDB instance",
-    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
+  testWithStateStoreCheckpointIdsAndColumnFamilies("disallow concurrent updates to the same " +
+    "RocksDB instance",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) {
+    case (enableStateStoreCheckpointIds, colFamiliesEnabled) =>
     quietly {
+      val versionToUniqueId = new mutable.HashMap[Long, String]()
       withDB(
         Utils.createTempDir().toString,
         conf = dbConf.copy(lockAcquireTimeoutMs = 20),
-        useColumnFamilies = colFamiliesEnabled) { db =>
+        useColumnFamilies = colFamiliesEnabled,
+        enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+        versionToUniqueId = versionToUniqueId) { db =>
         // DB has been loaded so current thread has already
         // acquired the lock on the RocksDB instance
 
-        db.load(0) // Current thread should be able to load again
+        db.load(0, versionToUniqueId.get(0)) // Current thread should be able to load again
 
         // Another thread should not be able to load while current thread is using it
         var ex = intercept[SparkException] {
           ThreadUtils.runInNewThread("concurrent-test-thread-1") {
-            db.load(0)
+            db.load(0, versionToUniqueId.get(0))
           }
         }
         checkError(
@@ -1202,15 +1960,15 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         // Commit should release the instance allowing other threads to load new version
         db.commit()
         ThreadUtils.runInNewThread("concurrent-test-thread-2") {
-          db.load(1)
+          db.load(1, versionToUniqueId.get(1))
           db.commit()
         }
 
         // Another thread should not be able to load while current thread is using it
-        db.load(2)
+        db.load(2, versionToUniqueId.get(2))
         ex = intercept[SparkException] {
           ThreadUtils.runInNewThread("concurrent-test-thread-2") {
-            db.load(2)
+            db.load(2, versionToUniqueId.get(2))
           }
         }
         checkError(
@@ -1230,7 +1988,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         // Rollback should release the instance allowing other threads to load new version
         db.rollback()
         ThreadUtils.runInNewThread("concurrent-test-thread-3") {
-          db.load(1)
+          db.load(1, versionToUniqueId.get(1))
           db.commit()
         }
       }
@@ -1675,27 +2433,33 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
-  test("time travel - validate successful RocksDB load") {
+  testWithStateStoreCheckpointIds("time travel - " +
+    "validate successful RocksDB load") { enableStateStoreCheckpointIds =>
     val remoteDir = Utils.createTempDir().toString
     val conf = dbConf.copy(minDeltasForSnapshot = 1, compactOnCommit = false)
     new File(remoteDir).delete() // to make sure that the directory gets created
-    withDB(remoteDir, conf = conf) { db =>
+    val versionToUniqueId = new mutable.HashMap[Long, String]()
+    withDB(remoteDir, conf = conf, enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+      versionToUniqueId = versionToUniqueId) { db =>
       for (version <- 0 to 1) {
-        db.load(version)
+        db.load(version, versionToUniqueId.get(version))
         db.put(version.toString, version.toString)
         db.commit()
       }
       // upload snapshot 2.zip
       db.doMaintenance()
       for (version <- Seq(2)) {
-        db.load(version)
+        db.load(version, versionToUniqueId.get(version))
         db.put(version.toString, version.toString)
         db.commit()
       }
       // upload snapshot 3.zip
       db.doMaintenance()
       // simulate db in another executor that override the zip file
-      withDB(remoteDir, conf = conf) { db1 =>
+      // In checkpoint V2, reusing the same versionToUniqueId to simulate when two executors
+      // are scheduled with the same uniqueId in the same microbatch
+      withDB(remoteDir, conf = conf, enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+        versionToUniqueId = versionToUniqueId) { db1 =>
         for (version <- 0 to 1) {
           db1.load(version)
           db1.put(version.toString, version.toString)
@@ -1703,41 +2467,48 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         }
         db1.doMaintenance()
       }
-      db.load(2)
+      db.load(2, versionToUniqueId.get(2))
       for (version <- Seq(2)) {
-        db.load(version)
+        db.load(version, versionToUniqueId.get(version))
         db.put(version.toString, version.toString)
         db.commit()
       }
       // upload snapshot 3.zip
       db.doMaintenance()
       // rollback to version 2
-      db.load(2)
+      db.load(2, versionToUniqueId.get(2))
     }
   }
 
-  test("time travel 2 - validate successful RocksDB load") {
+  testWithStateStoreCheckpointIds("time travel 2 - " +
+    "validate successful RocksDB load") { enableStateStoreCheckpointIds =>
     Seq(1, 2).map(minDeltasForSnapshot => {
       val remoteDir = Utils.createTempDir().toString
       val conf = dbConf.copy(minDeltasForSnapshot = minDeltasForSnapshot,
         compactOnCommit = false)
       new File(remoteDir).delete() // to make sure that the directory gets created
-      withDB(remoteDir, conf = conf) { db =>
+      val versionToUniqueId = new mutable.HashMap[Long, String]()
+      withDB(remoteDir, conf = conf, enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+        versionToUniqueId = versionToUniqueId) { db =>
         for (version <- 0 to 1) {
-          db.load(version)
+          db.load(version, versionToUniqueId.get(version))
           db.put(version.toString, version.toString)
           db.commit()
         }
         // upload snapshot 2.zip
         db.doMaintenance()
         for (version <- 2 to 3) {
-          db.load(version)
+          db.load(version, versionToUniqueId.get(version))
           db.put(version.toString, version.toString)
           db.commit()
         }
-        db.load(0)
+        db.load(0, versionToUniqueId.get(0))
         // simulate db in another executor that override the zip file
-        withDB(remoteDir, conf = conf) { db1 =>
+        // In checkpoint V2, reusing the same versionToUniqueId to simulate when two executors
+        // are scheduled with the same uniqueId in the same microbatch
+        withDB(remoteDir, conf = conf,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db1 =>
           for (version <- 0 to 1) {
             db1.load(version)
             db1.put(version.toString, version.toString)
@@ -1746,7 +2517,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
           db1.doMaintenance()
         }
         for (version <- 2 to 3) {
-          db.load(version)
+          db.load(version, versionToUniqueId.get(version))
           db.put(version.toString, version.toString)
           db.commit()
         }
@@ -1758,20 +2529,23 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     })
   }
 
-  test("time travel 3 - validate successful RocksDB load") {
+  testWithStateStoreCheckpointIds("time travel 3 - validate" +
+    " successful RocksDB load") { enableStateStoreCheckpointIds =>
     val remoteDir = Utils.createTempDir().toString
     val conf = dbConf.copy(minDeltasForSnapshot = 0, compactOnCommit = false)
     new File(remoteDir).delete() // to make sure that the directory gets created
-    withDB(remoteDir, conf = conf) { db =>
+    val versionToUniqueId = new mutable.HashMap[Long, String]()
+    withDB(remoteDir, conf = conf, enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+      versionToUniqueId = versionToUniqueId) { db =>
       for (version <- 0 to 2) {
-        db.load(version)
+        db.load(version, versionToUniqueId.get(version))
         db.put(version.toString, version.toString)
         db.commit()
       }
       // upload snapshot 2.zip
       db.doMaintenance()
       for (version <- 1 to 3) {
-        db.load(version)
+        db.load(version, versionToUniqueId.get(version))
         db.put(version.toString, version.toString)
         db.commit()
       }
@@ -1783,20 +2557,22 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
-  testWithChangelogCheckpointingEnabled("time travel 4 -" +
-    " validate successful RocksDB load when metadata file is overwritten") {
+  testWithStateStoreCheckpointIdsAndChangelogEnabled("time travel 4 - validate successful" +
+    " RocksDB load when metadata file is overwritten") { enableStateStoreCheckpointIds =>
     val remoteDir = Utils.createTempDir().toString
     val conf = dbConf.copy(minDeltasForSnapshot = 2, compactOnCommit = false)
     new File(remoteDir).delete() // to make sure that the directory gets created
-    withDB(remoteDir, conf = conf) { db =>
+    val versionToUniqueId = new mutable.HashMap[Long, String]()
+    withDB(remoteDir, conf = conf, enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+      versionToUniqueId = versionToUniqueId) { db =>
       for (version <- 0 to 1) {
-        db.load(version)
+        db.load(version, versionToUniqueId.get(version))
         db.put(version.toString, version.toString)
         db.commit()
       }
 
       // load previous version, and recreate the snapshot
-      db.load(1)
+      db.load(1, versionToUniqueId.get(1))
       db.put("3", "3")
 
       // upload any latest snapshots so far
@@ -1811,8 +2587,8 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
-  testWithChangelogCheckpointingEnabled("time travel 5 -" +
-    "validate successful RocksDB load when metadata file is not overwritten") {
+  testWithStateStoreCheckpointIdsAndChangelogEnabled("time travel 5 - validate successful " +
+    "RocksDB load when metadata file is not overwritten") { enableStateStoreCheckpointIds =>
     val fmClass = "org.apache.spark.sql.execution.streaming.state." +
       "NoOverwriteFileSystemBasedCheckpointFileManager"
     Seq(Some(fmClass), None).foreach { fm =>
@@ -1822,13 +2598,16 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         fm.foreach(value =>
           hadoopConf.set(STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key, value))
         val remoteDir = dir.getCanonicalPath
-        withDB(remoteDir, conf = conf, hadoopConf = hadoopConf) { db =>
-          db.load(0)
+        val versionToUniqueId = new mutable.HashMap[Long, String]()
+        withDB(remoteDir, conf = conf, hadoopConf = hadoopConf,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
+          db.load(0, versionToUniqueId.get(0))
           db.put("a", "1")
           db.commit()
 
           // load previous version, will recreate snapshot on commit
-          db.load(0)
+          db.load(0, versionToUniqueId.get(0))
           db.put("a", "1")
 
           // upload version 1 snapshot created previously
@@ -1853,14 +2632,17 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
-  testWithChangelogCheckpointingEnabled("reloading the same version") {
+  testWithStateStoreCheckpointIdsAndChangelogEnabled("reloading the " +
+    "same version") { enableStateStoreCheckpointIds =>
     // Keep executing the same batch for two or more times. Some queries with ForEachBatch
     // will cause this behavior.
     // The test was accidentally fixed by SPARK-48586 (https://github.com/apache/spark/pull/47130)
     val remoteDir = Utils.createTempDir().toString
     val conf = dbConf.copy(minDeltasForSnapshot = 2, compactOnCommit = false)
     new File(remoteDir).delete() // to make sure that the directory gets created
-    withDB(remoteDir, conf = conf) { db =>
+    val versionToUniqueId = new mutable.HashMap[Long, String]()
+    withDB(remoteDir, conf = conf, enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+      versionToUniqueId = versionToUniqueId) { db =>
       // load the same version of pending snapshot uploading
       // This is possible because after committing version x, we can continue to x+1, and replay
       // x+1. The replay will load a checkpoint by version x. At this moment, the snapshot
@@ -1871,13 +2653,13 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       // This test was accidentally fixed by
       // SPARK-48931 (https://github.com/apache/spark/pull/47393)
 
-      db.load(0)
+      db.load(0, versionToUniqueId.get(0))
       db.put("foo", "bar")
       // Snapshot checkpoint not needed
       db.commit()
 
       // Continue using local DB
-      db.load(1)
+      db.load(1, versionToUniqueId.get(1))
       db.put("foo", "bar")
       // Should create a local RocksDB snapshot
       db.commit()
@@ -1885,19 +2667,19 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       db.doMaintenance()
 
       // This will reload Db from the cloud.
-      db.load(1)
+      db.load(1, versionToUniqueId.get(1))
       db.put("foo", "bar")
       // Should create another local snapshot
       db.commit()
 
       // Continue using local DB
-      db.load(2)
+      db.load(2, versionToUniqueId.get(2))
       db.put("foo", "bar")
       // Snapshot checkpoint not needed
       db.commit()
 
       // Reload DB from the cloud, loading from 2.zip
-      db.load(2)
+      db.load(2, versionToUniqueId.get(2))
       db.put("foo", "bar")
       // Snapshot checkpoint not needed
       db.commit()
@@ -1906,14 +2688,14 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       db.doMaintenance()
 
       // Reload new 2.zip just uploaded to validate it is not corrupted.
-      db.load(2)
+      db.load(2, versionToUniqueId.get(2))
       db.put("foo", "bar")
       db.commit()
 
       // Test the maintenance thread is delayed even after the next snapshot is created.
       // There will be two outstanding snapshots.
       for (batchVersion <- 3 to 6) {
-        db.load(batchVersion)
+        db.load(batchVersion, versionToUniqueId.get(batchVersion))
         db.put("foo", "bar")
         // In batchVersion 3 and 5, it will generate a local snapshot but won't be uploaded.
         db.commit()
@@ -1924,7 +2706,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       // maintenance tasks finish quickly.
       for (batchVersion <- 7 to 10) {
         for (j <- 0 to 1) {
-          db.load(batchVersion)
+          db.load(batchVersion, versionToUniqueId.get(batchVersion))
           db.put("foo", "bar")
           db.commit()
           db.doMaintenance()
@@ -1935,22 +2717,27 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
 
   for (randomSeed <- 1 to 8) {
     for (ifTestSkipBatch <- 0 to 1) {
-      testWithChangelogCheckpointingEnabled(
-        s"randomized snapshotting $randomSeed ifTestSkipBatch $ifTestSkipBatch") {
-        // The unit test simulates the case where batches can be reloaded and maintenance tasks
+      testWithStateStoreCheckpointIdsAndChangelogEnabled("randomized snapshotting " +
+        s"$randomSeed ifTestSkipBatch $ifTestSkipBatch") { enableStateStoreCheckpointIds =>
+          // The unit test simulates the case where batches can be reloaded and maintenance tasks
         // can be delayed. After each batch, we randomly decide whether we would move onto the
-        // next batch, and whetehr maintenance task is executed.
+        // next batch, and whether maintenance task is executed.
         val remoteDir = Utils.createTempDir().toString
         val conf = dbConf.copy(minDeltasForSnapshot = 3, compactOnCommit = false)
         new File(remoteDir).delete() // to make sure that the directory gets created
-        withDB(remoteDir, conf = conf) { db =>
+        val versionToUniqueId = new mutable.HashMap[Long, String]()
+        withDB(remoteDir, conf = dbConf,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db =>
           // A second DB is opened to simulate another executor that runs some batches that
           // skipped in the current DB.
-          withDB(remoteDir, conf = conf) { db2 =>
+          withDB(remoteDir, conf = dbConf,
+            enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+            versionToUniqueId = versionToUniqueId) { db2 =>
             val random = new Random(randomSeed)
             var curVer: Int = 0
             for (i <- 1 to 100) {
-              db.load(curVer)
+              db.load(curVer, versionToUniqueId.get(curVer))
               db.put("foo", "bar")
               db.commit()
               // For a one in five chance, maintenance task is executed. The chance is created to
@@ -1985,8 +2772,8 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
-  test("validate Rocks DB SST files do not have a VersionIdMismatch" +
-    " when metadata file is not overwritten - scenario 1") {
+  testWithStateStoreCheckpointIds("validate Rocks DB SST files do not have a VersionIdMismatch" +
+    " when metadata file is not overwritten - scenario 1") { enableStateStoreCheckpointIds =>
     val fmClass = "org.apache.spark.sql.execution.streaming.state." +
       "NoOverwriteFileSystemBasedCheckpointFileManager"
     withTempDir { dir =>
@@ -1995,84 +2782,94 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       hadoopConf.set(STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key, fmClass)
 
       val remoteDir = dir.getCanonicalPath
-      withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db1 =>
-        withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db2 =>
+      val versionToUniqueId = new mutable.HashMap[Long, String]()
+      withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf,
+        enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+        versionToUniqueId = versionToUniqueId) { db1 =>
+        withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db2 =>
           // commit version 1 via db1
-          db1.load(0)
+          db1.load(0, versionToUniqueId.get(0))
           db1.put("a", "1")
           db1.put("b", "1")
 
           db1.commit()
 
           // commit version 1 via db2
-          db2.load(0)
+          db2.load(0, versionToUniqueId.get(0))
           db2.put("a", "1")
           db2.put("b", "1")
 
           db2.commit()
 
           // commit version 2 via db2
-          db2.load(1)
+          db2.load(1, versionToUniqueId.get(1))
           db2.put("a", "2")
           db2.put("b", "2")
 
           db2.commit()
 
           // reload version 1, this should succeed
-          db2.load(1)
-          db1.load(1)
+          db2.load(1, versionToUniqueId.get(1))
+          db1.load(1, versionToUniqueId.get(1))
 
           // reload version 2, this should succeed
-          db2.load(2)
-          db1.load(2)
+          db2.load(2, versionToUniqueId.get(2))
+          db1.load(2, versionToUniqueId.get(2))
         }
       }
     }
   }
 
-  test("validate Rocks DB SST files do not have a VersionIdMismatch" +
-    " when metadata file is overwritten - scenario 1") {
+  testWithStateStoreCheckpointIds("validate Rocks DB SST files do not have a VersionIdMismatch" +
+    " when metadata file is overwritten - scenario 1") { enableStateStoreCheckpointIds =>
     withTempDir { dir =>
       val dbConf = RocksDBConf(StateStoreConf(new SQLConf()))
       val hadoopConf = new Configuration()
       val remoteDir = dir.getCanonicalPath
-      withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db1 =>
-        withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db2 =>
+      val versionToUniqueId = new mutable.HashMap[Long, String]()
+      withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf,
+        enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+        versionToUniqueId = versionToUniqueId) { db1 =>
+        withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db2 =>
           // commit version 1 via db1
-          db1.load(0)
+          db1.load(0, versionToUniqueId.get(0))
           db1.put("a", "1")
           db1.put("b", "1")
 
           db1.commit()
 
           // commit version 1 via db2
-          db2.load(0)
+          db2.load(0, versionToUniqueId.get(0))
           db2.put("a", "1")
           db2.put("b", "1")
 
           db2.commit()
 
           // commit version 2 via db2
-          db2.load(1)
+          db2.load(1, versionToUniqueId.get(1))
           db2.put("a", "2")
           db2.put("b", "2")
 
           db2.commit()
 
           // reload version 1, this should succeed
-          db2.load(1)
-          db1.load(1)
+          db2.load(1, versionToUniqueId.get(1))
+          db1.load(1, versionToUniqueId.get(1))
 
           // reload version 2, this should succeed
-          db2.load(2)
-          db1.load(2)
+          db2.load(2, versionToUniqueId.get(2))
+          db1.load(2, versionToUniqueId.get(2))
         }
       }
     }
   }
 
-  test("validate Rocks DB SST files do not have a VersionIdMismatch" +
-    " when metadata file is not overwritten - scenario 2") {
+  testWithStateStoreCheckpointIds("validate Rocks DB SST files do not have a VersionIdMismatch" +
+    " when metadata file is not overwritten - scenario 2") { enableStateStoreCheckpointIds =>
     val fmClass = "org.apache.spark.sql.execution.streaming.state." +
       "NoOverwriteFileSystemBasedCheckpointFileManager"
     withTempDir { dir =>
@@ -2081,77 +2878,87 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       hadoopConf.set(STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key, fmClass)
 
       val remoteDir = dir.getCanonicalPath
-      withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db1 =>
-        withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db2 =>
+      val versionToUniqueId = new mutable.HashMap[Long, String]()
+      withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf,
+        enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+        versionToUniqueId = versionToUniqueId) { db1 =>
+        withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db2 =>
           // commit version 1 via db2
-          db2.load(0)
+          db2.load(0, versionToUniqueId.get(0))
           db2.put("a", "1")
           db2.put("b", "1")
 
           db2.commit()
 
           // commit version 1 via db1
-          db1.load(0)
+          db1.load(0, versionToUniqueId.get(0))
           db1.put("a", "1")
           db1.put("b", "1")
 
           db1.commit()
 
           // commit version 2 via db2
-          db2.load(1)
+          db2.load(1, versionToUniqueId.get(1))
           db2.put("a", "2")
           db2.put("b", "2")
 
           db2.commit()
 
           // reload version 1, this should succeed
-          db2.load(1)
-          db1.load(1)
+          db2.load(1, versionToUniqueId.get(1))
+          db1.load(1, versionToUniqueId.get(1))
 
           // reload version 2, this should succeed
-          db2.load(2)
-          db1.load(2)
+          db2.load(2, versionToUniqueId.get(2))
+          db1.load(2, versionToUniqueId.get(2))
         }
       }
     }
   }
 
-  test("validate Rocks DB SST files do not have a VersionIdMismatch" +
-    " when metadata file is overwritten - scenario 2") {
+  testWithStateStoreCheckpointIds("validate Rocks DB SST files do not have a VersionIdMismatch" +
+    " when metadata file is overwritten - scenario 2") { enableStateStoreCheckpointIds =>
     withTempDir { dir =>
       val dbConf = RocksDBConf(StateStoreConf(new SQLConf()))
       val hadoopConf = new Configuration()
       val remoteDir = dir.getCanonicalPath
-      withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db1 =>
-        withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db2 =>
+      val versionToUniqueId = new mutable.HashMap[Long, String]()
+      withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf,
+        enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+        versionToUniqueId = versionToUniqueId) { db1 =>
+        withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf,
+          enableStateStoreCheckpointIds = enableStateStoreCheckpointIds,
+          versionToUniqueId = versionToUniqueId) { db2 =>
           // commit version 1 via db2
-          db2.load(0)
+          db2.load(0, versionToUniqueId.get(0))
           db2.put("a", "1")
           db2.put("b", "1")
 
           db2.commit()
 
           // commit version 1 via db1
-          db1.load(0)
+          db1.load(0, versionToUniqueId.get(0))
           db1.put("a", "1")
           db1.put("b", "1")
 
           db1.commit()
 
           // commit version 2 via db2
-          db2.load(1)
+          db2.load(1, versionToUniqueId.get(1))
           db2.put("a", "2")
           db2.put("b", "2")
 
           db2.commit()
 
           // reload version 1, this should succeed
-          db2.load(1)
-          db1.load(1)
+          db2.load(1, versionToUniqueId.get(1))
+          db1.load(1, versionToUniqueId.get(1))
 
           // reload version 2, this should succeed
-          db2.load(2)
-          db1.load(2)
+          db2.load(2, versionToUniqueId.get(2))
+          db1.load(2, versionToUniqueId.get(2))
         }
       }
     }
@@ -2429,25 +3236,77 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
 
   private def dbConf = RocksDBConf(StateStoreConf(SQLConf.get.clone()))
 
+  class RocksDBCheckpointFormatV2(
+      dfsRootDir: String,
+      conf: RocksDBConf,
+      localRootDir: File = Utils.createTempDir(),
+      hadoopConf: Configuration = new Configuration,
+      loggingId: String = "",
+      useColumnFamilies: Boolean = false,
+      val versionToUniqueId : mutable.Map[Long, String] = mutable.Map[Long, String]())
+    extends RocksDB(dfsRootDir, conf, localRootDir, hadoopConf, loggingId,
+      useColumnFamilies, enableStateStoreCheckpointIds = true) {
+
+    override def load(
+        version: Long,
+        ckptId: Option[String] = None,
+        readOnly: Boolean = false): RocksDB = {
+      // When a ckptId is defined, it means the test is explicitly using v2 semantic
+      // When it is not, it is possible that implicitly uses it.
+      // So still do a versionToUniqueId.get
+      ckptId match {
+        case Some(_) => super.load(version, ckptId, readOnly)
+        case None => super.load(version, versionToUniqueId.get(version), readOnly)
+      }
+    }
+
+    override def commit(): Long = {
+      val ret = super.commit()
+      // update versionToUniqueId from lineageManager
+      lineageManager.getLineageForCurrVersion().foreach {
+        case LineageItem(version, id) => versionToUniqueId.getOrElseUpdate(version, id)
+      }
+      ret
+    }
+  }
+
+  // withDB override with checkpoint format v2
   def withDB[T](
       remoteDir: String,
       version: Int = 0,
       conf: RocksDBConf = dbConf,
-      hadoopConf: Configuration = new Configuration(),
+      hadoopConf: Configuration = hadoopConf,
       useColumnFamilies: Boolean = false,
+      enableStateStoreCheckpointIds: Boolean = false,
+      // versionToUniqueId is used in checkpoint format v2, it simulates the lineage
+      // stored in the commit log. The lineage will be automatically updated in db.commit()
+      // When testing V2, please create a versionToUniqueId map
+      // and call versionToUniqueId.get(version) in the db.load() function.
+      // In V1, versionToUniqueId is not used and versionToUniqueId.get(version) returns None.
+      versionToUniqueId : mutable.Map[Long, String] = mutable.Map[Long, String](),
       localDir: File = Utils.createTempDir())(
       func: RocksDB => T): T = {
     var db: RocksDB = null
     try {
-      db = new RocksDB(
-        remoteDir,
-        conf = conf,
-        localRootDir = localDir,
-        hadoopConf = hadoopConf,
-        loggingId = s"[Thread-${Thread.currentThread.getId}]",
-        useColumnFamilies = useColumnFamilies
-        )
-      db.load(version)
+      db = if (enableStateStoreCheckpointIds) {
+        new RocksDBCheckpointFormatV2(
+          remoteDir,
+          conf = conf,
+          localRootDir = localDir,
+          hadoopConf = hadoopConf,
+          loggingId = s"[Thread-${Thread.currentThread.getId}]",
+          useColumnFamilies = useColumnFamilies,
+          versionToUniqueId = versionToUniqueId)
+      } else {
+        new RocksDB(
+          remoteDir,
+          conf = conf,
+          localRootDir = localDir,
+          hadoopConf = hadoopConf,
+          loggingId = s"[Thread-${Thread.currentThread.getId}]",
+          useColumnFamilies = useColumnFamilies)
+      }
+      db.load(version, versionToUniqueId.get(version))
       func(db)
     } finally {
       if (db != null) {
@@ -2468,7 +3327,8 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       fileToLengths: Seq[(String, Int)],
       version: Int,
       numKeys: Int,
-      fileMapping: RocksDBFileMapping): Unit = {
+      fileMapping: RocksDBFileMapping,
+      checkpointUniqueId: Option[String] = None): Unit = {
     val checkpointDir = Utils.createTempDir().getAbsolutePath // local dir to create checkpoints
     generateFiles(checkpointDir, fileToLengths)
     val (dfsFileSuffix, immutableFileMapping) = fileMapping.createSnapshotFileMapping(
@@ -2477,7 +3337,9 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       checkpointDir,
       version,
       numKeys,
-      immutableFileMapping)
+      immutableFileMapping,
+      checkpointUniqueId = checkpointUniqueId)
+
     val snapshotInfo = RocksDBVersionSnapshotInfo(version, dfsFileSuffix)
     fileMapping.snapshotsPendingUpload.remove(snapshotInfo)
   }
@@ -2488,9 +3350,10 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       version: Int,
       expectedFiles: Seq[(String, Int)],
       expectedNumKeys: Int,
-      fileMapping: RocksDBFileMapping): Unit = {
-    val metadata = fileManager.loadCheckpointFromDfs(version,
-      verificationDir, fileMapping)
+      fileMapping: RocksDBFileMapping,
+      checkpointUniqueId: Option[String] = None): Unit = {
+    val metadata = fileManager.loadCheckpointFromDfs(
+      version, verificationDir, fileMapping, checkpointUniqueId)
     val filesAndLengths =
       listFiles(verificationDir).map(f => f.getName -> f.length).toSet ++
       listFiles(verificationDir + "/archive").map(f => s"archive/${f.getName}" -> f.length()).toSet
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/TimerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/TimerSuite.scala
index 24a120be9d9af..428845d5ebcbb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/TimerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/TimerSuite.scala
@@ -72,8 +72,9 @@ class TimerSuite extends StateVariableSuiteBase {
       assert(timerState1.listTimers().toSet === Set(15000L, 1000L))
       assert(timerState1.getExpiredTimers(Long.MaxValue).toSeq ===
         Seq(("test_key", 1000L), ("test_key", 15000L)))
-      // if timestamp equals to expiryTimestampsMs, will not considered expired
-      assert(timerState1.getExpiredTimers(15000L).toSeq === Seq(("test_key", 1000L)))
+      // if timestamp equals to expiryTimestampsMs, it will be considered expired
+      assert(timerState1.getExpiredTimers(15000L).toSeq ===
+        Seq(("test_key", 1000L), ("test_key", 15000L)))
       assert(timerState1.listTimers().toSet === Set(15000L, 1000L))
 
       timerState1.registerTimer(20L * 1000)
@@ -128,7 +129,7 @@ class TimerSuite extends StateVariableSuiteBase {
       timerTimerstamps.foreach(timerState.registerTimer)
       assert(timerState.getExpiredTimers(Long.MaxValue).toSeq.map(_._2) === timerTimerstamps.sorted)
       assert(timerState.getExpiredTimers(4200L).toSeq.map(_._2) ===
-        timerTimerstamps.sorted.takeWhile(_ < 4200L))
+        timerTimerstamps.sorted.takeWhile(_ <= 4200L))
       assert(timerState.getExpiredTimers(Long.MinValue).toSeq === Seq.empty)
       ImplicitGroupingKeyTracker.removeImplicitKey()
     }
@@ -162,7 +163,7 @@ class TimerSuite extends StateVariableSuiteBase {
         (timerTimestamps1 ++ timerTimestamps2 ++ timerTimerStamps3).sorted)
       assert(timerState1.getExpiredTimers(Long.MinValue).toSeq === Seq.empty)
       assert(timerState1.getExpiredTimers(8000L).toSeq.map(_._2) ===
-        (timerTimestamps1 ++ timerTimestamps2 ++ timerTimerStamps3).sorted.takeWhile(_ < 8000L))
+        (timerTimestamps1 ++ timerTimestamps2 ++ timerTimerStamps3).sorted.takeWhile(_ <= 8000L))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ValueStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ValueStateSuite.scala
index 55d08cd8f12a7..037fed045e8ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ValueStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ValueStateSuite.scala
@@ -327,8 +327,8 @@ class ValueStateSuite extends StateVariableSuiteBase {
       var ttlValue = testState.getTTLValue()
       assert(ttlValue.isDefined)
       assert(ttlValue.get._2 === ttlExpirationMs)
-      var ttlStateValueIterator = testState.getValuesInTTLState()
-      assert(ttlStateValueIterator.hasNext)
+      var ttlStateValueIterator = testState.getValueInTTLState()
+      assert(ttlStateValueIterator.isDefined)
 
       // increment batchProcessingTime, or watermark and ensure expired value is not returned
       val nextBatchHandle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
@@ -349,10 +349,9 @@ class ValueStateSuite extends StateVariableSuiteBase {
       ttlValue = nextBatchTestState.getTTLValue()
       assert(ttlValue.isDefined)
       assert(ttlValue.get._2 === ttlExpirationMs)
-      ttlStateValueIterator = nextBatchTestState.getValuesInTTLState()
-      assert(ttlStateValueIterator.hasNext)
-      assert(ttlStateValueIterator.next() === ttlExpirationMs)
-      assert(ttlStateValueIterator.isEmpty)
+      ttlStateValueIterator = nextBatchTestState.getValueInTTLState()
+      assert(ttlStateValueIterator.isDefined)
+      assert(ttlStateValueIterator.get === ttlExpirationMs)
 
       // getWithoutTTL should still return the expired value
       assert(nextBatchTestState.getWithoutEnforcingTTL().get === "v1")
@@ -412,8 +411,8 @@ class ValueStateSuite extends StateVariableSuiteBase {
       val ttlValue = testState.getTTLValue()
       assert(ttlValue.isDefined)
       assert(ttlValue.get._2 === ttlExpirationMs)
-      val ttlStateValueIterator = testState.getValuesInTTLState()
-      assert(ttlStateValueIterator.hasNext)
+      val ttlStateValueIterator = testState.getValueInTTLState()
+      assert(ttlStateValueIterator.isDefined)
     }
   }
 }
@@ -423,7 +422,7 @@ class ValueStateSuite extends StateVariableSuiteBase {
  * types (ValueState, ListState, MapState) used in arbitrary stateful operators.
  */
 abstract class StateVariableSuiteBase extends SharedSparkSession
-  with BeforeAndAfter {
+  with BeforeAndAfter with AlsoTestWithEncodingTypes {
 
   before {
     StateStore.stop()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index 0cc4f7bf2548e..0edbfd10d8cde 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -274,6 +274,19 @@ class ColumnVectorSuite extends SparkFunSuite with SQLHelper {
     }
   }
 
+  testVectors("mutable ColumnarRow with TimestampNTZType", 10, TimestampNTZType) { testVector =>
+    val mutableRow = new MutableColumnarRow(Array(testVector))
+    (0 until 10).foreach { i =>
+      mutableRow.rowId = i
+      mutableRow.setLong(0, 10 - i)
+    }
+    (0 until 10).foreach { i =>
+      mutableRow.rowId = i
+      assert(mutableRow.get(0, TimestampNTZType) === (10 - i))
+      assert(mutableRow.copy().get(0, TimestampNTZType) === (10 - i))
+    }
+  }
+
   val arrayType: ArrayType = ArrayType(IntegerType, containsNull = true)
   testVectors("array", 10, arrayType) { testVector =>
 
@@ -384,18 +397,24 @@ class ColumnVectorSuite extends SparkFunSuite with SQLHelper {
   }
 
   val structType: StructType = new StructType().add("int", IntegerType).add("double", DoubleType)
+    .add("ts", TimestampNTZType)
   testVectors("struct", 10, structType) { testVector =>
     val c1 = testVector.getChild(0)
     val c2 = testVector.getChild(1)
+    val c3 = testVector.getChild(2)
     c1.putInt(0, 123)
     c2.putDouble(0, 3.45)
+    c3.putLong(0, 1000L)
     c1.putInt(1, 456)
     c2.putDouble(1, 5.67)
+    c3.putLong(1, 2000L)
 
     assert(testVector.getStruct(0).get(0, IntegerType) === 123)
     assert(testVector.getStruct(0).get(1, DoubleType) === 3.45)
+    assert(testVector.getStruct(0).get(2, TimestampNTZType) === 1000L)
     assert(testVector.getStruct(1).get(0, IntegerType) === 456)
     assert(testVector.getStruct(1).get(1, DoubleType) === 5.67)
+    assert(testVector.getStruct(1).get(2, TimestampNTZType) === 2000L)
   }
 
   testVectors("SPARK-44805: getInts with dictionary", 3, IntegerType) { testVector =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index a6fc43aa087da..a7af22a0554e9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -79,7 +79,8 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
     assert(info.getSource === "built-in")
 
     val validSources = Seq(
-      "built-in", "hive", "python_udf", "scala_udf", "java_udf", "python_udtf", "internal")
+      "built-in", "hive", "python_udf", "scala_udf", "java_udf", "python_udtf", "internal",
+      "sql_udf")
     validSources.foreach { source =>
       val info = new ExpressionInfo(
         "testClass", null, "testName", null, "", "", "", "", "", "", source)
@@ -229,6 +230,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       // Requires dynamic class loading not available in this test suite.
       "org.apache.spark.sql.catalyst.expressions.FromAvro",
       "org.apache.spark.sql.catalyst.expressions.ToAvro",
+      "org.apache.spark.sql.catalyst.expressions.SchemaOfAvro",
       "org.apache.spark.sql.catalyst.expressions.FromProtobuf",
       "org.apache.spark.sql.catalyst.expressions.ToProtobuf",
       classOf[CurrentUser].getName,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToExpressionConverterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToExpressionConverterSuite.scala
index 76fcdfc380950..d72e86450de22 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToExpressionConverterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToExpressionConverterSuite.scala
@@ -405,4 +405,5 @@ private[internal] case class Nope(override val origin: Origin = CurrentOrigin.ge
   extends ColumnNode {
   override private[internal] def normalize(): Nope = this
   override def sql: String = "nope"
+  override private[internal] def children: Seq[ColumnNodeLike] = Seq.empty
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
new file mode 100644
index 0000000000000..afcdfd343e33b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.scripting
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.logical.CompoundBody
+import org.apache.spark.sql.catalyst.util.QuotingUtils.toSQLConf
+import org.apache.spark.sql.exceptions.SqlScriptingException
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+
+/**
+ * End-to-end tests for SQL Scripting.
+ * This suite is not intended to heavily test the SQL scripting (parser & interpreter) logic.
+ * It is rather focused on testing the sql() API - whether it can handle SQL scripts correctly,
+ *  results are returned in expected manner, config flags are applied properly, etc.
+ * For full functionality tests, see SqlScriptingParserSuite and SqlScriptingInterpreterSuite.
+ */
+class SqlScriptingE2eSuite extends QueryTest with SharedSparkSession {
+  // Helpers
+  private def verifySqlScriptResult(sqlText: String, expected: Seq[Row]): Unit = {
+    val df = spark.sql(sqlText)
+    checkAnswer(df, expected)
+  }
+
+  private def verifySqlScriptResultWithNamedParams(
+      sqlText: String,
+      expected: Seq[Row],
+      args: Map[String, Any]): Unit = {
+    val df = spark.sql(sqlText, args)
+    checkAnswer(df, expected)
+  }
+
+  // Tests setup
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set(SQLConf.SQL_SCRIPTING_ENABLED.key, "true")
+  }
+
+  // Tests
+  test("SQL Scripting not enabled") {
+    withSQLConf(SQLConf.SQL_SCRIPTING_ENABLED.key -> "false") {
+      val sqlScriptText =
+        """
+          |BEGIN
+          |  SELECT 1;
+          |END""".stripMargin
+      checkError(
+        exception = intercept[SqlScriptingException] {
+          spark.sql(sqlScriptText).asInstanceOf[CompoundBody]
+        },
+        condition = "UNSUPPORTED_FEATURE.SQL_SCRIPTING",
+        parameters = Map("sqlScriptingEnabled" -> toSQLConf(SQLConf.SQL_SCRIPTING_ENABLED.key)))
+    }
+  }
+
+  test("single select") {
+    val sqlText = "SELECT 1;"
+    verifySqlScriptResult(sqlText, Seq(Row(1)))
+  }
+
+  test("multiple selects") {
+    val sqlText =
+      """
+        |BEGIN
+        |  SELECT 1;
+        |  SELECT 2;
+        |END""".stripMargin
+    verifySqlScriptResult(sqlText, Seq(Row(2)))
+  }
+
+  test("multi statement - simple") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  SELECT a FROM t;
+          |END
+          |""".stripMargin
+      verifySqlScriptResult(sqlScript, Seq(Row(1)))
+    }
+  }
+
+  test("script without result statement") {
+    val sqlScript =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 1;
+        |  DROP TEMPORARY VARIABLE x;
+        |END
+        |""".stripMargin
+    verifySqlScriptResult(sqlScript, Seq.empty)
+  }
+
+  test("empty script") {
+    val sqlScript =
+      """
+        |BEGIN
+        |END
+        |""".stripMargin
+    verifySqlScriptResult(sqlScript, Seq.empty)
+  }
+
+  test("named params") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  SELECT 1;
+        |  IF :param_1 > 10 THEN
+        |    SELECT :param_2;
+        |  ELSE
+        |    SELECT :param_3;
+        |  END IF;
+        |END""".stripMargin
+    // Define a map with SQL parameters
+    val args: Map[String, Any] = Map(
+      "param_1" -> 5,
+      "param_2" -> "greater",
+      "param_3" -> "smaller"
+    )
+    verifySqlScriptResultWithNamedParams(sqlScriptText, Seq(Row("smaller")), args)
+  }
+
+  test("positional params") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  SELECT 1;
+        |  IF ? > 10 THEN
+        |    SELECT ?;
+        |  ELSE
+        |    SELECT ?;
+        |  END IF;
+        |END""".stripMargin
+    // Define an array with SQL parameters in the correct order.
+    val args: Array[Any] = Array(5, "greater", "smaller")
+    checkError(
+      exception = intercept[SqlScriptingException] {
+        spark.sql(sqlScriptText, args).asInstanceOf[CompoundBody]
+      },
+      condition = "UNSUPPORTED_FEATURE.SQL_SCRIPTING_WITH_POSITIONAL_PARAMETERS",
+      parameters = Map.empty)
+  }
+
+  test("named params with positional params - should fail") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  SELECT ?;
+        |  IF :param > 10 THEN
+        |    SELECT 1;
+        |  ELSE
+        |    SELECT 2;
+        |  END IF;
+        |END""".stripMargin
+    // Define a map with SQL parameters.
+    val args: Map[String, Any] = Map("param" -> 5)
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.sql(sqlScriptText, args).asInstanceOf[CompoundBody]
+      },
+      condition = "UNBOUND_SQL_PARAMETER",
+      parameters = Map("name" -> "_16"),
+      context = ExpectedContext(
+        fragment = "?",
+        start = 16,
+        stop = 16))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNodeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNodeSuite.scala
index baad5702f4f22..325c8ce380c63 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNodeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNodeSuite.scala
@@ -18,11 +18,12 @@
 package org.apache.spark.sql.scripting
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Literal}
-import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, OneRowRelation, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{DropVariable, LeafNode, OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.trees.Origin
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 
 /**
  * Unit tests for execution nodes from SqlScriptingExecutionNode.scala.
@@ -31,6 +32,35 @@ import org.apache.spark.sql.test.SharedSparkSession
  */
 class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSession {
   // Helpers
+  case class TestCompoundBody(
+      statements: Seq[CompoundStatementExec],
+      label: Option[String] = None,
+      isScope: Boolean = false,
+      context: SqlScriptingExecutionContext = null)
+    extends CompoundBodyExec(statements, label, isScope, context) {
+
+    // No-op to remove unnecessary logic for these tests.
+    override def enterScope(): Unit = ()
+
+    // No-op to remove unnecessary logic for these tests.
+    override def exitScope(): Unit = ()
+  }
+
+  case class TestForStatement(
+      query: SingleStatementExec,
+      variableName: Option[String],
+      body: CompoundBodyExec,
+      override val label: Option[String],
+      session: SparkSession,
+      context: SqlScriptingExecutionContext = null)
+    extends ForStatementExec(
+      query,
+      variableName,
+      body,
+      label,
+      session,
+      context)
+
   case class TestLeafStatement(testVal: String) extends LeafStatementExec {
     override def reset(): Unit = ()
   }
@@ -39,7 +69,10 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
     extends SingleStatementExec(
       parsedPlan = Project(Seq(Alias(Literal(condVal), description)()), OneRowRelation()),
       Origin(startIndex = Some(0), stopIndex = Some(description.length)),
-      isInternal = false)
+      Map.empty,
+      isInternal = false,
+      null
+    )
 
   case class DummyLogicalPlan() extends LeafNode {
     override def output: Seq[Attribute] = Seq.empty
@@ -50,7 +83,10 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
     extends SingleStatementExec(
       parsedPlan = DummyLogicalPlan(),
       Origin(startIndex = Some(0), stopIndex = Some(description.length)),
-      isInternal = false)
+      Map.empty,
+      isInternal = false,
+      null
+    )
 
   class LoopBooleanConditionEvaluator(condition: TestLoopCondition) {
     private var callCount: Int = 0
@@ -68,7 +104,7 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
 
   case class TestWhile(
       condition: TestLoopCondition,
-      body: CompoundBodyExec,
+      body: TestCompoundBody,
       label: Option[String] = None)
     extends WhileStatementExec(condition, body, label, spark) {
 
@@ -80,9 +116,9 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   case class TestRepeat(
-    condition: TestLoopCondition,
-    body: CompoundBodyExec,
-    label: Option[String] = None)
+      condition: TestLoopCondition,
+      body: TestCompoundBody,
+      label: Option[String] = None)
     extends RepeatStatementExec(condition, body, label, spark) {
 
     private val evaluator = new LoopBooleanConditionEvaluator(condition)
@@ -92,6 +128,24 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
       statement: LeafStatementExec): Boolean = evaluator.evaluateLoopBooleanCondition()
   }
 
+  case class MockQuery(numberOfRows: Int, columnName: String, description: String)
+      extends SingleStatementExec(
+        DummyLogicalPlan(),
+        Origin(startIndex = Some(0), stopIndex = Some(description.length)),
+        Map.empty,
+        isInternal = false,
+        null) {
+    override def buildDataFrame(session: SparkSession): DataFrame = {
+      val data = Seq.range(0, numberOfRows).map(Row(_))
+      val schema = List(StructField(columnName, IntegerType))
+
+      spark.createDataFrame(
+        spark.sparkContext.parallelize(data),
+        StructType(schema)
+      )
+    }
+  }
+
   private def extractStatementValue(statement: CompoundStatementExec): String =
     statement match {
       case TestLeafStatement(testVal) => testVal
@@ -100,18 +154,21 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
       case loopStmt: LoopStatementExec => loopStmt.label.get
       case leaveStmt: LeaveStatementExec => leaveStmt.label
       case iterateStmt: IterateStatementExec => iterateStmt.label
+      case forStmt: TestForStatement => forStmt.label.get
+      case dropStmt: SingleStatementExec if dropStmt.parsedPlan.isInstanceOf[DropVariable]
+        => "DropVariable"
       case _ => fail("Unexpected statement type")
     }
 
   // Tests
   test("test body - single statement") {
-    val iter = new CompoundBodyExec(Seq(TestLeafStatement("one"))).getTreeIterator
+    val iter = TestCompoundBody(Seq(TestLeafStatement("one"))).getTreeIterator
     val statements = iter.map(extractStatementValue).toSeq
     assert(statements === Seq("one"))
   }
 
   test("test body - no nesting") {
-    val iter = new CompoundBodyExec(
+    val iter = TestCompoundBody(
       Seq(
         TestLeafStatement("one"),
         TestLeafStatement("two"),
@@ -122,26 +179,26 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("test body - nesting") {
-    val iter = new CompoundBodyExec(
+    val iter = TestCompoundBody(
       Seq(
-        new CompoundBodyExec(Seq(TestLeafStatement("one"), TestLeafStatement("two"))),
+        TestCompoundBody(Seq(TestLeafStatement("one"), TestLeafStatement("two"))),
         TestLeafStatement("three"),
-        new CompoundBodyExec(Seq(TestLeafStatement("four"), TestLeafStatement("five")))))
+        TestCompoundBody(Seq(TestLeafStatement("four"), TestLeafStatement("five")))))
       .getTreeIterator
     val statements = iter.map(extractStatementValue).toSeq
     assert(statements === Seq("one", "two", "three", "four", "five"))
   }
 
   test("if else - enter body of the IF clause") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new IfElseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = true, description = "con1")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+          TestCompoundBody(Seq(TestLeafStatement("body1")))
         ),
-        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body2")))),
+        elseBody = Some(TestCompoundBody(Seq(TestLeafStatement("body2")))),
         session = spark
       )
     )).getTreeIterator
@@ -150,15 +207,15 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("if else - enter body of the ELSE clause") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new IfElseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = false, description = "con1")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+          TestCompoundBody(Seq(TestLeafStatement("body1")))
         ),
-        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body2")))),
+        elseBody = Some(TestCompoundBody(Seq(TestLeafStatement("body2")))),
         session = spark
       )
     )).getTreeIterator
@@ -167,17 +224,17 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("if else if - enter body of the IF clause") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new IfElseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = true, description = "con1"),
           TestIfElseCondition(condVal = false, description = "con2")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
-          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+          TestCompoundBody(Seq(TestLeafStatement("body1"))),
+          TestCompoundBody(Seq(TestLeafStatement("body2")))
         ),
-        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body3")))),
+        elseBody = Some(TestCompoundBody(Seq(TestLeafStatement("body3")))),
         session = spark
       )
     )).getTreeIterator
@@ -186,17 +243,17 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("if else if - enter body of the ELSE IF clause") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new IfElseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = false, description = "con1"),
           TestIfElseCondition(condVal = true, description = "con2")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
-          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+          TestCompoundBody(Seq(TestLeafStatement("body1"))),
+          TestCompoundBody(Seq(TestLeafStatement("body2")))
         ),
-        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body3")))),
+        elseBody = Some(TestCompoundBody(Seq(TestLeafStatement("body3")))),
         session = spark
       )
     )).getTreeIterator
@@ -205,7 +262,7 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("if else if - enter body of the second ELSE IF clause") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new IfElseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = false, description = "con1"),
@@ -213,11 +270,11 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
           TestIfElseCondition(condVal = true, description = "con3")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
-          new CompoundBodyExec(Seq(TestLeafStatement("body2"))),
-          new CompoundBodyExec(Seq(TestLeafStatement("body3")))
+          TestCompoundBody(Seq(TestLeafStatement("body1"))),
+          TestCompoundBody(Seq(TestLeafStatement("body2"))),
+          TestCompoundBody(Seq(TestLeafStatement("body3")))
         ),
-        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body4")))),
+        elseBody = Some(TestCompoundBody(Seq(TestLeafStatement("body4")))),
         session = spark
       )
     )).getTreeIterator
@@ -226,17 +283,17 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("if else if - enter body of the ELSE clause") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new IfElseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = false, description = "con1"),
           TestIfElseCondition(condVal = false, description = "con2")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
-          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+          TestCompoundBody(Seq(TestLeafStatement("body1"))),
+          TestCompoundBody(Seq(TestLeafStatement("body2")))
         ),
-        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body3")))),
+        elseBody = Some(TestCompoundBody(Seq(TestLeafStatement("body3")))),
         session = spark
       )
     )).getTreeIterator
@@ -245,15 +302,15 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("if else if - without else (successful check)") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new IfElseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = false, description = "con1"),
           TestIfElseCondition(condVal = true, description = "con2")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
-          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+          TestCompoundBody(Seq(TestLeafStatement("body1"))),
+          TestCompoundBody(Seq(TestLeafStatement("body2")))
         ),
         elseBody = None,
         session = spark
@@ -264,15 +321,15 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("if else if - without else (unsuccessful checks)") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new IfElseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = false, description = "con1"),
           TestIfElseCondition(condVal = false, description = "con2")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
-          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+          TestCompoundBody(Seq(TestLeafStatement("body1"))),
+          TestCompoundBody(Seq(TestLeafStatement("body2")))
         ),
         elseBody = None,
         session = spark
@@ -283,10 +340,10 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("while - doesn't enter body") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       TestWhile(
         condition = TestLoopCondition(condVal = true, reps = 0, description = "con1"),
-        body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+        body = TestCompoundBody(Seq(TestLeafStatement("body1")))
       )
     )).getTreeIterator
     val statements = iter.map(extractStatementValue).toSeq
@@ -294,10 +351,10 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("while - enters body once") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       TestWhile(
         condition = TestLoopCondition(condVal = true, reps = 1, description = "con1"),
-        body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+        body = TestCompoundBody(Seq(TestLeafStatement("body1")))
       )
     )).getTreeIterator
     val statements = iter.map(extractStatementValue).toSeq
@@ -305,10 +362,10 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("while - enters body with multiple statements multiple times") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       TestWhile(
         condition = TestLoopCondition(condVal = true, reps = 2, description = "con1"),
-        body = new CompoundBodyExec(Seq(
+        body = TestCompoundBody(Seq(
           TestLeafStatement("statement1"),
           TestLeafStatement("statement2")))
       )
@@ -319,13 +376,13 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("nested while - 2 times outer 2 times inner") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       TestWhile(
         condition = TestLoopCondition(condVal = true, reps = 2, description = "con1"),
-        body = new CompoundBodyExec(Seq(
+        body = TestCompoundBody(Seq(
           TestWhile(
             condition = TestLoopCondition(condVal = true, reps = 2, description = "con2"),
-            body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+            body = TestCompoundBody(Seq(TestLeafStatement("body1")))
           ))
         )
       )
@@ -338,10 +395,10 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("repeat - true condition") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       TestRepeat(
         condition = TestLoopCondition(condVal = false, reps = 0, description = "con1"),
-        body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+        body = TestCompoundBody(Seq(TestLeafStatement("body1")))
       )
     )).getTreeIterator
     val statements = iter.map(extractStatementValue).toSeq
@@ -349,10 +406,10 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("repeat - condition false once") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       TestRepeat(
         condition = TestLoopCondition(condVal = false, reps = 1, description = "con1"),
-        body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+        body = TestCompoundBody(Seq(TestLeafStatement("body1")))
       )
     )).getTreeIterator
     val statements = iter.map(extractStatementValue).toSeq
@@ -360,10 +417,10 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("repeat - enters body with multiple statements multiple times") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       TestRepeat(
         condition = TestLoopCondition(condVal = false, reps = 2, description = "con1"),
-        body = new CompoundBodyExec(Seq(
+        body = TestCompoundBody(Seq(
           TestLeafStatement("statement1"),
           TestLeafStatement("statement2")))
       )
@@ -374,13 +431,13 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("nested repeat") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       TestRepeat(
         condition = TestLoopCondition(condVal = false, reps = 2, description = "con1"),
-        body = new CompoundBodyExec(Seq(
+        body = TestCompoundBody(Seq(
           TestRepeat(
             condition = TestLoopCondition(condVal = false, reps = 2, description = "con2"),
-            body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+            body = TestCompoundBody(Seq(TestLeafStatement("body1")))
           ))
         )
       )
@@ -396,7 +453,7 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("leave compound block") {
-    val iter = new CompoundBodyExec(
+    val iter = TestCompoundBody(
       statements = Seq(
         TestLeafStatement("one"),
         new LeaveStatementExec("lbl")
@@ -408,11 +465,11 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("leave while loop") {
-    val iter = new CompoundBodyExec(
+    val iter = TestCompoundBody(
       statements = Seq(
         TestWhile(
           condition = TestLoopCondition(condVal = true, reps = 2, description = "con1"),
-          body = new CompoundBodyExec(Seq(
+          body = TestCompoundBody(Seq(
             TestLeafStatement("body1"),
             new LeaveStatementExec("lbl"))
           ),
@@ -425,11 +482,11 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("leave repeat loop") {
-    val iter = new CompoundBodyExec(
+    val iter = TestCompoundBody(
       statements = Seq(
         TestRepeat(
           condition = TestLoopCondition(condVal = false, reps = 2, description = "con1"),
-          body = new CompoundBodyExec(Seq(
+          body = TestCompoundBody(Seq(
             TestLeafStatement("body1"),
             new LeaveStatementExec("lbl"))
           ),
@@ -442,11 +499,11 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("iterate while loop") {
-    val iter = new CompoundBodyExec(
+    val iter = TestCompoundBody(
       statements = Seq(
         TestWhile(
           condition = TestLoopCondition(condVal = true, reps = 2, description = "con1"),
-          body = new CompoundBodyExec(Seq(
+          body = TestCompoundBody(Seq(
             TestLeafStatement("body1"),
             new IterateStatementExec("lbl"),
             TestLeafStatement("body2"))
@@ -460,11 +517,11 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("iterate repeat loop") {
-    val iter = new CompoundBodyExec(
+    val iter = TestCompoundBody(
       statements = Seq(
         TestRepeat(
           condition = TestLoopCondition(condVal = false, reps = 2, description = "con1"),
-          body = new CompoundBodyExec(Seq(
+          body = TestCompoundBody(Seq(
             TestLeafStatement("body1"),
             new IterateStatementExec("lbl"),
             TestLeafStatement("body2"))
@@ -479,14 +536,14 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("leave outer loop from nested while loop") {
-    val iter = new CompoundBodyExec(
+    val iter = TestCompoundBody(
       statements = Seq(
         TestWhile(
           condition = TestLoopCondition(condVal = true, reps = 2, description = "con1"),
-          body = new CompoundBodyExec(Seq(
+          body = TestCompoundBody(Seq(
             TestWhile(
               condition = TestLoopCondition(condVal = true, reps = 2, description = "con2"),
-              body = new CompoundBodyExec(Seq(
+              body = TestCompoundBody(Seq(
                 TestLeafStatement("body1"),
                 new LeaveStatementExec("lbl"))
               ),
@@ -502,14 +559,14 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("leave outer loop from nested repeat loop") {
-    val iter = new CompoundBodyExec(
+    val iter = TestCompoundBody(
       statements = Seq(
         TestRepeat(
           condition = TestLoopCondition(condVal = false, reps = 2, description = "con1"),
-          body = new CompoundBodyExec(Seq(
+          body = TestCompoundBody(Seq(
             TestRepeat(
               condition = TestLoopCondition(condVal = false, reps = 2, description = "con2"),
-              body = new CompoundBodyExec(Seq(
+              body = TestCompoundBody(Seq(
                 TestLeafStatement("body1"),
                 new LeaveStatementExec("lbl"))
               ),
@@ -525,14 +582,14 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("iterate outer loop from nested while loop") {
-    val iter = new CompoundBodyExec(
+    val iter = TestCompoundBody(
       statements = Seq(
         TestWhile(
           condition = TestLoopCondition(condVal = true, reps = 2, description = "con1"),
-          body = new CompoundBodyExec(Seq(
+          body = TestCompoundBody(Seq(
             TestWhile(
               condition = TestLoopCondition(condVal = true, reps = 2, description = "con2"),
-              body = new CompoundBodyExec(Seq(
+              body = TestCompoundBody(Seq(
                 TestLeafStatement("body1"),
                 new IterateStatementExec("lbl"),
                 TestLeafStatement("body2"))
@@ -552,14 +609,14 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("iterate outer loop from nested repeat loop") {
-    val iter = new CompoundBodyExec(
+    val iter = TestCompoundBody(
       statements = Seq(
         TestRepeat(
           condition = TestLoopCondition(condVal = false, reps = 2, description = "con1"),
-          body = new CompoundBodyExec(Seq(
+          body = TestCompoundBody(Seq(
             TestRepeat(
               condition = TestLoopCondition(condVal = false, reps = 2, description = "con2"),
-              body = new CompoundBodyExec(Seq(
+              body = TestCompoundBody(Seq(
                 TestLeafStatement("body1"),
                 new IterateStatementExec("lbl"),
                 TestLeafStatement("body2"))
@@ -579,17 +636,17 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("searched case - enter first WHEN clause") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new CaseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = true, description = "con1"),
           TestIfElseCondition(condVal = false, description = "con2")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
-          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+          TestCompoundBody(Seq(TestLeafStatement("body1"))),
+          TestCompoundBody(Seq(TestLeafStatement("body2")))
         ),
-        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body3")))),
+        elseBody = Some(TestCompoundBody(Seq(TestLeafStatement("body3")))),
         session = spark
       )
     )).getTreeIterator
@@ -598,15 +655,15 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("searched case - enter body of the ELSE clause") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new CaseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = false, description = "con1")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+          TestCompoundBody(Seq(TestLeafStatement("body1")))
         ),
-        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body2")))),
+        elseBody = Some(TestCompoundBody(Seq(TestLeafStatement("body2")))),
         session = spark
       )
     )).getTreeIterator
@@ -615,17 +672,17 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("searched case - enter second WHEN clause") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new CaseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = false, description = "con1"),
           TestIfElseCondition(condVal = true, description = "con2")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
-          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+          TestCompoundBody(Seq(TestLeafStatement("body1"))),
+          TestCompoundBody(Seq(TestLeafStatement("body2")))
         ),
-        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body3")))),
+        elseBody = Some(TestCompoundBody(Seq(TestLeafStatement("body3")))),
         session = spark
       )
     )).getTreeIterator
@@ -634,15 +691,15 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("searched case - without else (successful check)") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new CaseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = false, description = "con1"),
           TestIfElseCondition(condVal = true, description = "con2")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
-          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+          TestCompoundBody(Seq(TestLeafStatement("body1"))),
+          TestCompoundBody(Seq(TestLeafStatement("body2")))
         ),
         elseBody = None,
         session = spark
@@ -653,15 +710,15 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("searched case - without else (unsuccessful checks)") {
-    val iter = new CompoundBodyExec(Seq(
+    val iter = TestCompoundBody(Seq(
       new CaseStatementExec(
         conditions = Seq(
           TestIfElseCondition(condVal = false, description = "con1"),
           TestIfElseCondition(condVal = false, description = "con2")
         ),
         conditionalBodies = Seq(
-          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
-          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+          TestCompoundBody(Seq(TestLeafStatement("body1"))),
+          TestCompoundBody(Seq(TestLeafStatement("body2")))
         ),
         elseBody = None,
         session = spark
@@ -672,10 +729,10 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
   }
 
   test("loop statement with leave") {
-    val iter = new CompoundBodyExec(
+    val iter = TestCompoundBody(
       statements = Seq(
         new LoopStatementExec(
-          body = new CompoundBodyExec(Seq(
+          body = TestCompoundBody(Seq(
             TestLeafStatement("body1"),
             new LeaveStatementExec("lbl"))
           ),
@@ -686,4 +743,363 @@ class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSessi
     val statements = iter.map(extractStatementValue).toSeq
     assert(statements === Seq("body1", "lbl"))
   }
+
+  test("for statement - enters body once") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(1, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("for1"),
+        session = spark,
+        body = TestCompoundBody(Seq(TestLeafStatement("body")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "body",
+      "DropVariable", // drop for query var intCol
+      "DropVariable" // drop for loop var x
+    ))
+  }
+
+  test("for statement - enters body with multiple statements multiple times") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("for1"),
+        session = spark,
+        body = TestCompoundBody(
+          Seq(TestLeafStatement("statement1"), TestLeafStatement("statement2"))
+        )
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "statement1",
+      "statement2",
+      "statement1",
+      "statement2",
+      "DropVariable", // drop for query var intCol
+      "DropVariable" // drop for loop var x
+    ))
+  }
+
+  test("for statement - empty result") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(0, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("for1"),
+        session = spark,
+        body = TestCompoundBody(Seq(TestLeafStatement("body1")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq.empty[String])
+  }
+
+  test("for statement - nested") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("for1"),
+        session = spark,
+        body = TestCompoundBody(Seq(
+          TestForStatement(
+            query = MockQuery(2, "intCol1", "query2"),
+            variableName = Some("y"),
+            label = Some("for2"),
+            session = spark,
+            body = TestCompoundBody(Seq(TestLeafStatement("body")))
+          )
+        ))
+      )),
+      label = Some("lbl")
+    ).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "body",
+      "body",
+      "DropVariable", // drop for query var intCol1
+      "DropVariable", // drop for loop var y
+      "body",
+      "body",
+      "DropVariable", // drop for query var intCol1
+      "DropVariable", // drop for loop var y
+      "DropVariable", // drop for query var intCol
+      "DropVariable" // drop for loop var x
+    ))
+  }
+
+  test("for statement no variable - enters body once") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(1, "intCol", "query1"),
+        variableName = None,
+        label = Some("for1"),
+        session = spark,
+        body = TestCompoundBody(Seq(TestLeafStatement("body")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "body",
+      "DropVariable" // drop for query var intCol
+    ))
+  }
+
+  test("for statement no variable - enters body with multiple statements multiple times") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = None,
+        label = Some("for1"),
+        session = spark,
+        body = TestCompoundBody(Seq(
+          TestLeafStatement("statement1"),
+          TestLeafStatement("statement2")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "statement1", "statement2", "statement1", "statement2",
+      "DropVariable" // drop for query var intCol
+    ))
+  }
+
+  test("for statement no variable - empty result") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(0, "intCol", "query1"),
+        variableName = None,
+        label = Some("for1"),
+        session = spark,
+        body = TestCompoundBody(Seq(TestLeafStatement("body1")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq.empty[String])
+  }
+
+  test("for statement no variable - nested") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = None,
+        label = Some("for1"),
+        session = spark,
+        body = TestCompoundBody(Seq(
+          TestForStatement(
+            query = MockQuery(2, "intCol1", "query2"),
+            variableName = None,
+            label = Some("for2"),
+            session = spark,
+            body = TestCompoundBody(Seq(TestLeafStatement("body")))
+          )
+        ))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "body", "body",
+      "DropVariable", // drop for query var intCol1
+      "body", "body",
+      "DropVariable", // drop for query var intCol1
+      "DropVariable" // drop for query var intCol
+    ))
+  }
+
+  test("for statement - iterate") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("lbl1"),
+        session = spark,
+        body = TestCompoundBody(Seq(
+          TestLeafStatement("statement1"),
+          new IterateStatementExec("lbl1"),
+          TestLeafStatement("statement2")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "statement1",
+      "lbl1",
+      "statement1",
+      "lbl1",
+      "DropVariable", // drop for query var intCol
+      "DropVariable" // drop for loop var x
+    ))
+  }
+
+  test("for statement - leave") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("lbl1"),
+        session = spark,
+        body = TestCompoundBody(Seq(
+          TestLeafStatement("statement1"),
+          new LeaveStatementExec("lbl1"),
+          TestLeafStatement("statement2")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("statement1", "lbl1"))
+  }
+
+  test("for statement - nested - iterate outer loop") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("lbl1"),
+        session = spark,
+        body = TestCompoundBody(Seq(
+          TestLeafStatement("outer_body"),
+          TestForStatement(
+            query = MockQuery(2, "intCol1", "query2"),
+            variableName = Some("y"),
+            label = Some("lbl2"),
+            session = spark,
+            body = TestCompoundBody(Seq(
+              TestLeafStatement("body1"),
+              new IterateStatementExec("lbl1"),
+              TestLeafStatement("body2")))
+          )
+        ))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "outer_body",
+      "body1",
+      "lbl1",
+      "outer_body",
+      "body1",
+      "lbl1",
+      "DropVariable", // drop for query var intCol
+      "DropVariable" // drop for loop var x
+    ))
+  }
+
+  test("for statement - nested - leave outer loop") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("lbl1"),
+        session = spark,
+        body = TestCompoundBody(Seq(
+          TestForStatement(
+            query = MockQuery(2, "intCol", "query2"),
+            variableName = Some("y"),
+            label = Some("lbl2"),
+            session = spark,
+            body = TestCompoundBody(Seq(
+              TestLeafStatement("body1"),
+              new LeaveStatementExec("lbl1"),
+              TestLeafStatement("body2")))
+          )
+        ))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("body1", "lbl1"))
+  }
+
+  test("for statement no variable - iterate") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = None,
+        label = Some("lbl1"),
+        session = spark,
+        body = TestCompoundBody(Seq(
+          TestLeafStatement("statement1"),
+          new IterateStatementExec("lbl1"),
+          TestLeafStatement("statement2")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "statement1", "lbl1", "statement1", "lbl1",
+      "DropVariable" // drop for query var intCol
+    ))
+  }
+
+  test("for statement no variable - leave") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = None,
+        label = Some("lbl1"),
+        session = spark,
+        body = TestCompoundBody(Seq(
+          TestLeafStatement("statement1"),
+          new LeaveStatementExec("lbl1"),
+          TestLeafStatement("statement2")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("statement1", "lbl1"))
+  }
+
+  test("for statement no variable - nested - iterate outer loop") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = None,
+        label = Some("lbl1"),
+        session = spark,
+        body = TestCompoundBody(Seq(
+          TestLeafStatement("outer_body"),
+          TestForStatement(
+            query = MockQuery(2, "intCol1", "query2"),
+            variableName = None,
+            label = Some("lbl2"),
+            session = spark,
+            body = TestCompoundBody(Seq(
+              TestLeafStatement("body1"),
+              new IterateStatementExec("lbl1"),
+              TestLeafStatement("body2")))
+          )
+        ))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "outer_body", "body1", "lbl1", "outer_body", "body1", "lbl1",
+      "DropVariable" // drop for query var intCol
+    ))
+  }
+
+  test("for statement no variable - nested - leave outer loop") {
+    val iter = TestCompoundBody(Seq(
+      TestForStatement(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = None,
+        label = Some("lbl1"),
+        session = spark,
+        body = TestCompoundBody(Seq(
+          TestForStatement(
+            query = MockQuery(2, "intCol1", "query2"),
+            variableName = None,
+            label = Some("lbl2"),
+            session = spark,
+            body = TestCompoundBody(Seq(
+              TestLeafStatement("body1"),
+              new LeaveStatementExec("lbl1"),
+              TestLeafStatement("body2")))
+          )
+        ))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("body1", "lbl1"))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala
new file mode 100644
index 0000000000000..5b5285ea13275
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala
@@ -0,0 +1,1069 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.scripting
+
+import scala.collection.mutable.ListBuffer
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.CompoundBody
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * SQL Scripting interpreter tests.
+ * Output from the parser is provided to the interpreter.
+ * Output from the interpreter (iterator over executable statements) is then checked - statements
+ *   are executed and output DataFrames are compared with expected outputs.
+ */
+class SqlScriptingExecutionSuite extends QueryTest with SharedSparkSession {
+
+  // Tests setup
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set(SQLConf.SQL_SCRIPTING_ENABLED.key, "true")
+  }
+
+  // Helpers
+  private def runSqlScript(
+      sqlText: String,
+      args: Map[String, Expression] = Map.empty): Seq[Array[Row]] = {
+    val compoundBody = spark.sessionState.sqlParser.parsePlan(sqlText).asInstanceOf[CompoundBody]
+    val sse = new SqlScriptingExecution(compoundBody, spark, args)
+    val result: ListBuffer[Array[Row]] = ListBuffer.empty
+
+    var df = sse.getNextResult
+    while (df.isDefined) {
+      // Collect results from the current DataFrame.
+      result.append(df.get.collect())
+      df = sse.getNextResult
+    }
+    result.toSeq
+  }
+
+  private def verifySqlScriptResult(sqlText: String, expected: Seq[Seq[Row]]): Unit = {
+    val result = runSqlScript(sqlText)
+    assert(result.length == expected.length)
+    result.zip(expected).foreach {
+      case (actualAnswer, expectedAnswer) =>
+        assert(actualAnswer.sameElements(expectedAnswer))
+    }
+  }
+
+  // Tests
+  test("multi statement - simple") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |SELECT a, b FROM t WHERE a = 12;
+          |SELECT a FROM t;
+          |END
+          |""".stripMargin
+      val expected = Seq(
+        Seq.empty[Row], // select
+        Seq(Row(1)) // select
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("multi statement - count") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |SELECT
+          | CASE WHEN COUNT(*) > 10 THEN true
+          | ELSE false
+          | END AS MoreThanTen
+          |FROM t;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(false)))
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("session vars - set and read (SET VAR)") {
+    val sqlScript =
+      """
+        |BEGIN
+        |DECLARE var = 1;
+        |SET VAR var = var + 1;
+        |SELECT var;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(2)))
+    verifySqlScriptResult(sqlScript, expected)
+  }
+
+  test("session vars - set and read (SET)") {
+    val sqlScript =
+      """
+        |BEGIN
+        |DECLARE var = 1;
+        |SET var = var + 1;
+        |SELECT var;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(2)))
+    verifySqlScriptResult(sqlScript, expected)
+  }
+
+  test("session vars - set and read scoped") {
+    val sqlScript =
+      """
+        |BEGIN
+        | BEGIN
+        |   DECLARE var = 1;
+        |   SELECT var;
+        | END;
+        | BEGIN
+        |   DECLARE var = 2;
+        |   SELECT var;
+        | END;
+        | BEGIN
+        |   DECLARE var = 3;
+        |   SET VAR var = var + 1;
+        |   SELECT var;
+        | END;
+        |END
+        |""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)), // select
+      Seq(Row(2)), // select
+      Seq(Row(4)) // select
+    )
+    verifySqlScriptResult(sqlScript, expected)
+  }
+
+  test("session vars - drop var statement") {
+    val sqlScript =
+      """
+        |BEGIN
+        |DECLARE var = 1;
+        |SET VAR var = var + 1;
+        |SELECT var;
+        |DROP TEMPORARY VARIABLE var;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(2)))
+    verifySqlScriptResult(sqlScript, expected)
+  }
+
+  test("if") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=1 THEN
+        |   SELECT 42;
+        | END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if nested") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=1 THEN
+        |   IF 2=1 THEN
+        |     SELECT 41;
+        |   ELSE
+        |     SELECT 42;
+        |   END IF;
+        | END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if else going in if") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=1
+        | THEN
+        |   SELECT 42;
+        | ELSE
+        |   SELECT 43;
+        | END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if else if going in else if") {
+    val commands =
+      """
+        |BEGIN
+        |  IF 1=2
+        |  THEN
+        |    SELECT 42;
+        |  ELSE IF 1=1
+        |  THEN
+        |    SELECT 43;
+        |  ELSE
+        |    SELECT 44;
+        |  END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(43)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if else going in else") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=2
+        | THEN
+        |   SELECT 42;
+        | ELSE
+        |   SELECT 43;
+        | END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(43)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if else if going in else") {
+    val commands =
+      """
+        |BEGIN
+        |  IF 1=2
+        |  THEN
+        |    SELECT 42;
+        |  ELSE IF 1=3
+        |  THEN
+        |    SELECT 43;
+        |  ELSE
+        |    SELECT 44;
+        |  END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(44)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |IF (SELECT COUNT(*) > 2 FROM t) THEN
+          |   SELECT 42;
+          | ELSE
+          |   SELECT 43;
+          | END IF;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("if else if with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  IF (SELECT COUNT(*) > 2 FROM t) THEN
+          |    SELECT 42;
+          |  ELSE IF (SELECT COUNT(*) > 1 FROM t) THEN
+          |    SELECT 43;
+          |  ELSE
+          |    SELECT 44;
+          |  END IF;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("searched case") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = 1 THEN
+        |     SELECT 42;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case nested") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1=1 THEN
+        |   CASE
+        |    WHEN 2=1 THEN
+        |     SELECT 41;
+        |   ELSE
+        |     SELECT 42;
+        |   END CASE;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case second case") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = (SELECT 2) THEN
+        |     SELECT 1;
+        |   WHEN 2 = 2 THEN
+        |     SELECT 42;
+        |   WHEN (SELECT * FROM t) THEN
+        |     SELECT * FROM b;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case going in else") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 2 = 1 THEN
+        |     SELECT 1;
+        |   WHEN 3 IN (1,2) THEN
+        |     SELECT 2;
+        |   ELSE
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(43)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |CASE
+          | WHEN (SELECT COUNT(*) > 2 FROM t) THEN
+          |   SELECT 42;
+          | ELSE
+          |   SELECT 43;
+          | END CASE;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("searched case else with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  CASE
+          |  WHEN (SELECT COUNT(*) > 2 FROM t) THEN
+          |   SELECT 42;
+          |  WHEN (SELECT COUNT(*) > 1 FROM t) THEN
+          |   SELECT 43;
+          |  ELSE
+          |    SELECT 44;
+          |  END CASE;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("searched case no cases matched no else") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = 2 THEN
+        |     SELECT 42;
+        |   WHEN 1 = 3 THEN
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 1 THEN
+        |     SELECT 42;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case nested") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 1 THEN
+        |   CASE 2
+        |    WHEN (SELECT 3) THEN
+        |     SELECT 41;
+        |   ELSE
+        |     SELECT 42;
+        |   END CASE;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case second case") {
+    val commands =
+      """
+        |BEGIN
+        | CASE (SELECT 2)
+        |   WHEN 1 THEN
+        |     SELECT 1;
+        |   WHEN 2 THEN
+        |     SELECT 42;
+        |   WHEN (SELECT * FROM t) THEN
+        |     SELECT * FROM b;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case going in else") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 2 THEN
+        |     SELECT 1;
+        |   WHEN 3 THEN
+        |     SELECT 2;
+        |   ELSE
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(43)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |CASE (SELECT COUNT(*) FROM t)
+          | WHEN 1 THEN
+          |   SELECT 41;
+          | WHEN 2 THEN
+          |   SELECT 42;
+          | ELSE
+          |   SELECT 43;
+          | END CASE;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(42)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("simple case else with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  INSERT INTO t VALUES (2, 'b', 2.0);
+          |  CASE (SELECT COUNT(*) FROM t)
+          |   WHEN 1 THEN
+          |     SELECT 42;
+          |   WHEN 3 THEN
+          |     SELECT 43;
+          |   ELSE
+          |     SELECT 44;
+          |  END CASE;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(44)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("simple case no cases matched no else") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 2 THEN
+        |     SELECT 42;
+        |   WHEN 3 THEN
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case compare with null") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT) USING parquet;
+          |  CASE (SELECT COUNT(*) FROM t)
+          |   WHEN 1 THEN
+          |     SELECT 42;
+          |   ELSE
+          |     SELECT 43;
+          |  END CASE;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("while") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | WHILE i < 3 DO
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | END WHILE;
+        |END
+        |""".stripMargin
+    val expected = Seq(
+      Seq(Row(0)), // select i
+      Seq(Row(1)), // select i
+      Seq(Row(2)) // select i
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("while: not entering body") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 3;
+        | WHILE i < 3 DO
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | END WHILE;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("nested while") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | DECLARE j = 0;
+        | WHILE i < 2 DO
+        |   SET VAR j = 0;
+        |   WHILE j < 2 DO
+        |     SELECT i, j;
+        |     SET VAR j = j + 1;
+        |   END WHILE;
+        |   SET VAR i = i + 1;
+        | END WHILE;
+        |END
+        |""".stripMargin
+    val expected = Seq(
+      Seq(Row(0, 0)), // select i, j
+      Seq(Row(0, 1)), // select i, j
+      Seq(Row(1, 0)), // select i, j
+      Seq(Row(1, 1)) // select i, j
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("while with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |WHILE (SELECT COUNT(*) < 2 FROM t) DO
+          |  SELECT 42;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |END WHILE;
+          |END
+          |""".stripMargin
+      val expected = Seq(
+        Seq(Row(42)), // select
+        Seq(Row(42)) // select
+      )
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("repeat") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | REPEAT
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | UNTIL
+        |   i = 3
+        | END REPEAT;
+        |END
+        |""".stripMargin
+    val expected = Seq(
+      Seq(Row(0)), // select i
+      Seq(Row(1)), // select i
+      Seq(Row(2)) // select i
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("repeat: enters body only once") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 3;
+        | REPEAT
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | UNTIL
+        |   1 = 1
+        | END REPEAT;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(Seq(Row(3)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("nested repeat") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | DECLARE j = 0;
+        | REPEAT
+        |   SET VAR j = 0;
+        |   REPEAT
+        |     SELECT i, j;
+        |     SET VAR j = j + 1;
+        |   UNTIL j >= 2
+        |   END REPEAT;
+        |   SET VAR i = i + 1;
+        | UNTIL i >= 2
+        | END REPEAT;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(
+      Seq(Row(0, 0)), // select i, j
+      Seq(Row(0, 1)), // select i, j
+      Seq(Row(1, 0)), // select i, j
+      Seq(Row(1, 1)) // select i, j
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("repeat with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |REPEAT
+          |  SELECT 42;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |UNTIL (SELECT COUNT(*) >= 2 FROM t)
+          |END REPEAT;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq(Row(42)), // select
+        Seq(Row(42)) // select
+      )
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("leave compound block") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |    SELECT 2;
+        |  END;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: WHILE 1 = 1 DO
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |  END WHILE;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: REPEAT
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |  UNTIL 1 = 2
+        |  END REPEAT;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: WHILE x < 2 DO
+        |    SET x = x + 1;
+        |    ITERATE lbl;
+        |    SET x = x + 2;
+        |  END WHILE;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(2)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: REPEAT
+        |    SET x = x + 1;
+        |    ITERATE lbl;
+        |    SET x = x + 2;
+        |  UNTIL x > 1
+        |  END REPEAT;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(2)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave outer loop from nested repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: REPEAT
+        |    lbl2: REPEAT
+        |      SELECT 1;
+        |      LEAVE lbl;
+        |    UNTIL 1 = 2
+        |    END REPEAT;
+        |  UNTIL 1 = 2
+        |  END REPEAT;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave outer loop from nested while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: WHILE 1 = 1 DO
+        |    lbl2: WHILE 2 = 2 DO
+        |      SELECT 1;
+        |      LEAVE lbl;
+        |    END WHILE;
+        |  END WHILE;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate outer loop from nested while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: WHILE x < 2 DO
+        |    SET x = x + 1;
+        |    lbl2: WHILE 2 = 2 DO
+        |      SELECT 1;
+        |      ITERATE lbl;
+        |    END WHILE;
+        |  END WHILE;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)), // select 1
+      Seq(Row(1)), // select 1
+      Seq(Row(2)) // select x
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("nested compounds in loop - leave in inner compound") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: WHILE x < 2 DO
+        |    SET x = x + 1;
+        |    BEGIN
+        |      SELECT 1;
+        |      lbl2: BEGIN
+        |        SELECT 2;
+        |        LEAVE lbl2;
+        |        SELECT 3;
+        |      END;
+        |    END;
+        |  END WHILE;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)), // select 1
+      Seq(Row(2)), // select 2
+      Seq(Row(1)), // select 1
+      Seq(Row(2)), // select 2
+      Seq(Row(2)) // select x
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate outer loop from nested repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: REPEAT
+        |    SET x = x + 1;
+        |    lbl2: REPEAT
+        |      SELECT 1;
+        |      ITERATE lbl;
+        |    UNTIL 1 = 2
+        |    END REPEAT;
+        |  UNTIL x > 1
+        |  END REPEAT;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)), // select 1
+      Seq(Row(1)), // select 1
+      Seq(Row(2)) // select x
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("loop statement with leave") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: LOOP
+        |    SET x = x + 1;
+        |    SELECT x;
+        |    IF x > 2
+        |    THEN
+        |     LEAVE lbl;
+        |    END IF;
+        |  END LOOP;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)), // select x
+      Seq(Row(2)), // select x
+      Seq(Row(3)), // select x
+      Seq(Row(3)) // select x
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("nested loop statement with leave") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE x = 0;
+        | DECLARE y = 0;
+        | lbl1: LOOP
+        |   SET VAR y = 0;
+        |   lbl2: LOOP
+        |     SELECT x, y;
+        |     SET VAR y = y + 1;
+        |     IF y >= 2 THEN
+        |       LEAVE lbl2;
+        |     END IF;
+        |   END LOOP;
+        |   SET VAR x = x + 1;
+        |   IF x >= 2 THEN
+        |     LEAVE lbl1;
+        |   END IF;
+        | END LOOP;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(
+      Seq(Row(0, 0)), // select x, y
+      Seq(Row(0, 1)), // select x, y
+      Seq(Row(1, 0)), // select x, y
+      Seq(Row(1, 1)) // select x, y
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("iterate loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: LOOP
+        |    SET x = x + 1;
+        |    IF x > 1 THEN
+        |     LEAVE lbl;
+        |    END IF;
+        |    ITERATE lbl;
+        |    SET x = x + 2;
+        |  END LOOP;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(2)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave outer loop from nested loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: LOOP
+        |    lbl2: LOOP
+        |      SELECT 1;
+        |      LEAVE lbl;
+        |    END LOOP;
+        |  END LOOP;
+        |END""".stripMargin
+    // Execution immediately leaves the outer loop after SELECT,
+    //   so we expect only a single row in the result set.
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate outer loop from nested loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: LOOP
+        |    SET x = x + 1;
+        |    IF x > 2 THEN
+        |     LEAVE lbl;
+        |    END IF;
+        |    lbl2: LOOP
+        |      SELECT 1;
+        |      ITERATE lbl;
+        |      SET x = 10;
+        |    END LOOP;
+        |  END LOOP;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)), // select 1
+      Seq(Row(1)), // select 1
+      Seq(Row(3)) // select x
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreterSuite.scala
index b0b844d2b52ca..c7439a8934d73 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreterSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.scripting
 import org.apache.spark.{SparkConf, SparkException, SparkNumberFormatException}
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, QueryTest, Row}
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.CompoundBody
 import org.apache.spark.sql.exceptions.SqlScriptingException
 import org.apache.spark.sql.internal.SQLConf
@@ -39,11 +40,19 @@ class SqlScriptingInterpreterSuite extends QueryTest with SharedSparkSession {
   }
 
   // Helpers
-  private def runSqlScript(sqlText: String): Array[DataFrame] = {
-    val interpreter = SqlScriptingInterpreter()
+  private def runSqlScript(
+      sqlText: String,
+      args: Map[String, Expression] = Map.empty): Array[DataFrame] = {
+    val interpreter = SqlScriptingInterpreter(spark)
     val compoundBody = spark.sessionState.sqlParser.parsePlan(sqlText).asInstanceOf[CompoundBody]
-    val executionPlan = interpreter.buildExecutionPlan(compoundBody, spark)
-    executionPlan.flatMap {
+
+    // Initialize context so scopes can be entered correctly.
+    val context = new SqlScriptingExecutionContext()
+    val executionPlan = interpreter.buildExecutionPlan(compoundBody, args, context)
+    context.frames.append(new SqlScriptingExecutionFrame(executionPlan.getTreeIterator))
+    executionPlan.enterScope()
+
+    executionPlan.getTreeIterator.flatMap {
       case statement: SingleStatementExec =>
         if (statement.isExecuted) {
           None
@@ -107,6 +116,61 @@ class SqlScriptingInterpreterSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("empty begin end block") {
+    val sqlScript =
+      """
+        |BEGIN
+        |END
+        |""".stripMargin
+    val expected = Seq.empty[Seq[Row]]
+    verifySqlScriptResult(sqlScript, expected)
+  }
+
+  test("empty begin end blocks") {
+    val sqlScript =
+      """
+        |BEGIN
+        | BEGIN
+        | END;
+        | BEGIN
+        | END;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty[Seq[Row]]
+    verifySqlScriptResult(sqlScript, expected)
+  }
+
+  test("empty begin end blocks with single statement") {
+    val sqlScript =
+      """
+        |BEGIN
+        | BEGIN
+        | END;
+        | SELECT 1;
+        | BEGIN
+        | END;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScript, expected)
+  }
+
+  test("empty begin end blocks - nested") {
+    val sqlScript =
+      """
+        |BEGIN
+        | BEGIN
+        |   BEGIN
+        |   END;
+        |   BEGIN
+        |   END;
+        | END;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty[Seq[Row]]
+    verifySqlScriptResult(sqlScript, expected)
+  }
+
   test("session vars - set and read (SET VAR)") {
     val sqlScript =
       """
@@ -237,6 +301,40 @@ class SqlScriptingInterpreterSuite extends QueryTest with SharedSparkSession {
     verifySqlScriptResult(commands, expected)
   }
 
+  test("if - empty body") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=1 THEN
+        |   BEGIN
+        |   END;
+        | END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty[Seq[Row]]
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if - nested empty body") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=1 THEN
+        |   BEGIN
+        |     BEGIN
+        |     END;
+        |   END;
+        |   BEGIN
+        |     BEGIN
+        |     END;
+        |   END;
+        | END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty[Seq[Row]]
+    verifySqlScriptResult(commands, expected)
+  }
+
   test("if nested") {
     val commands =
       """
@@ -386,6 +484,42 @@ class SqlScriptingInterpreterSuite extends QueryTest with SharedSparkSession {
     verifySqlScriptResult(commands, expected)
   }
 
+  test("searched case - empty body") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = 1 THEN
+        |     BEGIN
+        |     END;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty[Seq[Row]]
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case - nested empty body") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = 1 THEN
+        |     BEGIN
+        |       BEGIN
+        |       END;
+        |     END;
+        |     BEGIN
+        |       BEGIN
+        |       END;
+        |     END;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty[Seq[Row]]
+    verifySqlScriptResult(commands, expected)
+  }
+
   test("searched case nested") {
     val commands =
       """
@@ -586,6 +720,42 @@ class SqlScriptingInterpreterSuite extends QueryTest with SharedSparkSession {
     verifySqlScriptResult(commands, expected)
   }
 
+  test("simple case - empty body") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 1 THEN
+        |     BEGIN
+        |     END;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty[Seq[Row]]
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case - nested empty body") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 1 THEN
+        |     BEGIN
+        |       BEGIN
+        |       END;
+        |     END;
+        |     BEGIN
+        |       BEGIN
+        |       END;
+        |     END;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty[Seq[Row]]
+    verifySqlScriptResult(commands, expected)
+  }
+
   test("simple case nested") {
     val commands =
       """
@@ -982,6 +1152,42 @@ class SqlScriptingInterpreterSuite extends QueryTest with SharedSparkSession {
     verifySqlScriptResult(commands, expected)
   }
 
+  test("repeat - empty body") {
+    val commands =
+      """
+        |BEGIN
+        | REPEAT
+        |   BEGIN
+        |   END;
+        | UNTIL 1 = 1
+        | END REPEAT;
+        |END
+        |""".stripMargin
+
+    val expected = Seq.empty[Seq[Row]]
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("repeat - nested empty body") {
+    val commands =
+      """
+        |BEGIN
+        | REPEAT
+        |   BEGIN
+        |     BEGIN
+        |     END;
+        |   END;
+        |   BEGIN
+        |   END;
+        | UNTIL 1 = 1
+        | END REPEAT;
+        |END
+        |""".stripMargin
+
+    val expected = Seq.empty[Seq[Row]]
+    verifySqlScriptResult(commands, expected)
+  }
+
   test("nested repeat") {
     val commands =
       """
@@ -1547,4 +1753,1116 @@ class SqlScriptingInterpreterSuite extends QueryTest with SharedSparkSession {
     )
     verifySqlScriptResult(sqlScriptText, expected)
   }
+
+  test("for statement - enters body once") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING, doubleCol DOUBLE) using parquet;
+          | INSERT INTO t VALUES (1, 'first', 1.0);
+          | FOR row AS SELECT * FROM t DO
+          |   SELECT row.intCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(1)), // select row.intCol
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - enters body with multiple statements multiple times") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING, doubleCol DOUBLE) using parquet;
+          | INSERT INTO t VALUES (1, 'first', 1.0);
+          | INSERT INTO t VALUES (2, 'second', 2.0);
+          | FOR row AS SELECT * FROM t ORDER BY intCol DO
+          |   SELECT row.intCol;
+          |   SELECT intCol;
+          |   SELECT row.stringCol;
+          |   SELECT stringCol;
+          |   SELECT row.doubleCol;
+          |   SELECT doubleCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(1)), // select row.intCol
+        Seq(Row(1)), // select intCol
+        Seq(Row("first")), // select row.stringCol
+        Seq(Row("first")), // select stringCol
+        Seq(Row(1.0)), // select row.doubleCol
+        Seq(Row(1.0)), // select doubleCol
+        Seq(Row(2)), // select row.intCol
+        Seq(Row(2)), // select intCol
+        Seq(Row("second")), // select row.stringCol
+        Seq(Row("second")), // select stringCol
+        Seq(Row(2.0)), // select row.doubleCol
+        Seq(Row(2.0)), // select doubleCol
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - sum of column from table") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | DECLARE sumOfCols = 0;
+          | CREATE TABLE t (intCol INT) using parquet;
+          | INSERT INTO t VALUES (1), (2), (3), (4);
+          | FOR row AS SELECT * FROM t DO
+          |   SET sumOfCols = sumOfCols + row.intCol;
+          | END FOR;
+          | SELECT sumOfCols;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // declare sumOfCols
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq(Row(10)), // select sumOfCols
+        Seq.empty[Row] // drop sumOfCols
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - map, struct, array") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (int_column INT, map_column MAP<STRING, INT>,
+          | struct_column STRUCT<name: STRING, age: INT>, array_column ARRAY<STRING>);
+          | INSERT INTO t VALUES
+          |  (1, MAP('a', 1), STRUCT('John', 25), ARRAY('apricot', 'quince')),
+          |  (2, MAP('b', 2), STRUCT('Jane', 30), ARRAY('plum', 'pear'));
+          | FOR row AS SELECT * FROM t ORDER BY int_column DO
+          |   SELECT row.map_column;
+          |   SELECT map_column;
+          |   SELECT row.struct_column;
+          |   SELECT struct_column;
+          |   SELECT row.array_column;
+          |   SELECT array_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Map("a" -> 1))), // select row.map_column
+        Seq(Row(Map("a" -> 1))), // select map_column
+        Seq(Row(Row("John", 25))), // select row.struct_column
+        Seq(Row(Row("John", 25))), // select struct_column
+        Seq(Row(Array("apricot", "quince"))), // select row.array_column
+        Seq(Row(Array("apricot", "quince"))), // select array_column
+        Seq(Row(Map("b" -> 2))), // select row.map_column
+        Seq(Row(Map("b" -> 2))), // select map_column
+        Seq(Row(Row("Jane", 30))), // select row.struct_column
+        Seq(Row(Row("Jane", 30))), // select struct_column
+        Seq(Row(Array("plum", "pear"))), // select row.array_column
+        Seq(Row(Array("plum", "pear"))), // select array_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested struct") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t
+          | (int_column INT,
+          | struct_column STRUCT<num: INT, struct2: STRUCT<struct3: STRUCT<name: STRING>>>);
+          | INSERT INTO t VALUES
+          |  (1, STRUCT(1, STRUCT(STRUCT("one")))),
+          |  (2, STRUCT(2, STRUCT(STRUCT("two"))));
+          | FOR row AS SELECT * FROM t ORDER BY int_column DO
+          |   SELECT row.struct_column;
+          |   SELECT struct_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Row(1, Row(Row("one"))))), // select row.struct_column
+        Seq(Row(Row(1, Row(Row("one"))))), // select struct_column
+        Seq(Row(Row(2, Row(Row("two"))))), // select row.struct_column
+        Seq(Row(Row(2, Row(Row("two"))))), // select struct_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested map") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (int_column INT, map_column MAP<STRING, MAP<INT, MAP<BOOLEAN, INT>>>);
+          | INSERT INTO t VALUES
+          |  (1, MAP('a', MAP(1, MAP(false, 10)))),
+          |  (2, MAP('b', MAP(2, MAP(true, 20))));
+          | FOR row AS SELECT * FROM t ORDER BY int_column DO
+          |   SELECT row.map_column;
+          |   SELECT map_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Map("a" -> Map(1 -> Map(false -> 10))))), // select row.map_column
+        Seq(Row(Map("a" -> Map(1 -> Map(false -> 10))))), // select map_column
+        Seq(Row(Map("b" -> Map(2 -> Map(true -> 20))))), // select row.map_column
+        Seq(Row(Map("b" -> Map(2 -> Map(true -> 20))))), // select map_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested array") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t
+          | (int_column INT, array_column ARRAY<ARRAY<ARRAY<INT>>>);
+          | INSERT INTO t VALUES
+          |  (1, ARRAY(ARRAY(ARRAY(1, 2), ARRAY(3, 4)), ARRAY(ARRAY(5, 6)))),
+          |  (2, ARRAY(ARRAY(ARRAY(7, 8), ARRAY(9, 10)), ARRAY(ARRAY(11, 12))));
+          | FOR row AS SELECT * FROM t ORDER BY int_column DO
+          |   SELECT row.array_column;
+          |   SELECT array_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Seq(Seq(Seq(1, 2), Seq(3, 4)), Seq(Seq(5, 6))))), // row.array_column
+        Seq(Row(Seq(Seq(Seq(1, 2), Seq(3, 4)), Seq(Seq(5, 6))))), // array_column
+        Seq(Row(Array(Seq(Seq(7, 8), Seq(9, 10)), Seq(Seq(11, 12))))), // row.array_column
+        Seq(Row(Array(Seq(Seq(7, 8), Seq(9, 10)), Seq(Seq(11, 12))))), // array_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - empty result") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | FOR row AS SELECT * FROM t ORDER BY intCol DO
+          |   SELECT row.intCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row] // create table
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - empty body") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING, doubleCol DOUBLE) using parquet;
+          | INSERT INTO t VALUES (1, 'first', 1.0);
+          | FOR row AS SELECT * FROM t DO
+          |   BEGIN
+          |   END;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested empty body") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING, doubleCol DOUBLE) using parquet;
+          | INSERT INTO t VALUES (1, 'first', 1.0);
+          | FOR row AS SELECT * FROM t DO
+          |   BEGIN
+          |     BEGIN
+          |     END;
+          |   END;
+          |   BEGIN
+          |     BEGIN
+          |     END;
+          |   END;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement iterate") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING) using parquet;
+          | INSERT INTO t VALUES (1, 'first'), (2, 'second'), (3, 'third'), (4, 'fourth');
+          |
+          | lbl: FOR x AS SELECT * FROM t ORDER BY intCol DO
+          |   IF x.intCol = 2 THEN
+          |     ITERATE lbl;
+          |   END IF;
+          |   SELECT stringCol;
+          |   SELECT x.stringCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row("first")), // select stringCol
+        Seq(Row("first")), // select x.stringCol
+        Seq(Row("third")), // select stringCol
+        Seq(Row("third")), // select x.stringCol
+        Seq(Row("fourth")), // select stringCol
+        Seq(Row("fourth")), // select x.stringCol
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement leave") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING) using parquet;
+          | INSERT INTO t VALUES (1, 'first'), (2, 'second'), (3, 'third'), (4, 'fourth');
+          |
+          | lbl: FOR x AS SELECT * FROM t ORDER BY intCol DO
+          |   IF x.intCol = 3 THEN
+          |     LEAVE lbl;
+          |   END IF;
+          |   SELECT stringCol;
+          |   SELECT x.stringCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row("first")), // select stringCol
+        Seq(Row("first")), // select x.stringCol
+        Seq(Row("second")), // select stringCol
+        Seq(Row("second")) // select x.stringCol
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested - in while") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | DECLARE cnt = 0;
+          | CREATE TABLE t (intCol INT) using parquet;
+          | INSERT INTO t VALUES (0);
+          | WHILE cnt < 2 DO
+          |   SET cnt = cnt + 1;
+          |   FOR x AS SELECT * FROM t ORDER BY intCol DO
+          |     SELECT x.intCol;
+          |   END FOR;
+          |   INSERT INTO t VALUES (cnt);
+          | END WHILE;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // declare cnt
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq(Row(1)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop cnt
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested - in other for") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | FOR x as SELECT * FROM t ORDER BY intCol DO
+          |   FOR y AS SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT x.intCol;
+          |     SELECT intCol;
+          |     SELECT y.intCol2;
+          |     SELECT intCol2;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(0)), // select x.intCol
+        Seq(Row(0)), // select intCol
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)), // select intCol2
+        Seq(Row(0)), // select x.intCol
+        Seq(Row(0)), // select intCol
+        Seq(Row(2)), // select y.intCol2
+        Seq(Row(2)), // select intCol2
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq(Row(1)), // select x.intCol
+        Seq(Row(1)), // select intCol
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)), // select intCol2
+        Seq(Row(1)), // select x.intCol
+        Seq(Row(1)), // select intCol
+        Seq(Row(2)), // select y.intCol2
+        Seq(Row(2)), // select intCol2
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop outer var
+        Seq.empty[Row] // drop outer var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  // ignored until loops are fixed to support empty bodies
+  ignore("for statement - nested - empty result set") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | REPEAT
+          |   FOR x AS SELECT * FROM t ORDER BY intCol DO
+          |     SELECT x.intCol;
+          |   END FOR;
+          | UNTIL 1 = 1
+          | END REPEAT;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // declare cnt
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq(Row(1)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop cnt
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested - iterate outer loop") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | lbl1: FOR x as SELECT * FROM t ORDER BY intCol DO
+          |   lbl2: FOR y AS SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT y.intCol2;
+          |     SELECT intCol2;
+          |     ITERATE lbl1;
+          |     SELECT 1;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)), // select intCol2
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)), // select intCol2
+        Seq.empty[Row], // drop outer var
+        Seq.empty[Row] // drop outer var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested - leave outer loop") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | lbl1: FOR x as SELECT * FROM t ORDER BY intCol DO
+          |   lbl2: FOR y AS SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT y.intCol2;
+          |     SELECT intCol2;
+          |     LEAVE lbl1;
+          |     SELECT 1;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)) // select intCol2
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested - leave inner loop") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | lbl1: FOR x as SELECT * FROM t ORDER BY intCol DO
+          |   lbl2: FOR y AS SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT y.intCol2;
+          |     SELECT intCol2;
+          |     LEAVE lbl2;
+          |     SELECT 1;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)), // select intCol2
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)), // select intCol2
+        Seq.empty[Row], // drop outer var
+        Seq.empty[Row] // drop outer var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - enters body once") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING, doubleCol DOUBLE) using parquet;
+          | INSERT INTO t VALUES (1, 'first', 1.0);
+          | FOR SELECT * FROM t DO
+          |   SELECT intCol;
+          |   SELECT stringCol;
+          |   SELECT doubleCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(1)), // select intCol
+        Seq(Row("first")), // select stringCol
+        Seq(Row(1.0)), // select doubleCol
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - enters body with multiple statements multiple times") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING, doubleCol DOUBLE) using parquet;
+          | INSERT INTO t VALUES (1, 'first', 1.0);
+          | INSERT INTO t VALUES (2, 'second', 2.0);
+          | FOR SELECT * FROM t ORDER BY intCol DO
+          |   SELECT intCol;
+          |   SELECT stringCol;
+          |   SELECT doubleCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(1)), // select intCol
+        Seq(Row("first")), // select stringCol
+        Seq(Row(1.0)), // select doubleCol
+        Seq(Row(2)), // select intCol
+        Seq(Row("second")), // select stringCol
+        Seq(Row(2.0)), // select doubleCol
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - sum of column from table") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | DECLARE sumOfCols = 0;
+          | CREATE TABLE t (intCol INT) using parquet;
+          | INSERT INTO t VALUES (1), (2), (3), (4);
+          | FOR SELECT * FROM t DO
+          |   SET sumOfCols = sumOfCols + intCol;
+          | END FOR;
+          | SELECT sumOfCols;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // declare sumOfCols
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // drop local var
+        Seq(Row(10)), // select sumOfCols
+        Seq.empty[Row] // drop sumOfCols
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - map, struct, array") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (int_column INT, map_column MAP<STRING, INT>,
+          | struct_column STRUCT<name: STRING, age: INT>, array_column ARRAY<STRING>);
+          | INSERT INTO t VALUES
+          |  (1, MAP('a', 1), STRUCT('John', 25), ARRAY('apricot', 'quince')),
+          |  (2, MAP('b', 2), STRUCT('Jane', 30), ARRAY('plum', 'pear'));
+          | FOR SELECT * FROM t ORDER BY int_column DO
+          |   SELECT map_column;
+          |   SELECT struct_column;
+          |   SELECT array_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Map("a" -> 1))), // select map_column
+        Seq(Row(Row("John", 25))), // select struct_column
+        Seq(Row(Array("apricot", "quince"))), // select array_column
+        Seq(Row(Map("b" -> 2))), // select map_column
+        Seq(Row(Row("Jane", 30))), // select struct_column
+        Seq(Row(Array("plum", "pear"))), // select array_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested struct") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (int_column INT,
+          | struct_column STRUCT<num: INT, struct2: STRUCT<struct3: STRUCT<name: STRING>>>);
+          | INSERT INTO t VALUES
+          |  (1, STRUCT(1, STRUCT(STRUCT("one")))),
+          |  (2, STRUCT(2, STRUCT(STRUCT("two"))));
+          | FOR SELECT * FROM t ORDER BY int_column DO
+          |   SELECT struct_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Row(1, Row(Row("one"))))), // select struct_column
+        Seq(Row(Row(2, Row(Row("two"))))), // select struct_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested map") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (int_column INT, map_column MAP<STRING, MAP<INT, MAP<BOOLEAN, INT>>>);
+          | INSERT INTO t VALUES
+          |  (1, MAP('a', MAP(1, MAP(false, 10)))),
+          |  (2, MAP('b', MAP(2, MAP(true, 20))));
+          | FOR SELECT * FROM t ORDER BY int_column DO
+          |   SELECT map_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Map("a" -> Map(1 -> Map(false -> 10))))), // select map_column
+        Seq(Row(Map("b" -> Map(2 -> Map(true -> 20))))), // select map_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested array") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t
+          | (int_column INT, array_column ARRAY<ARRAY<ARRAY<INT>>>);
+          | INSERT INTO t VALUES
+          |  (1, ARRAY(ARRAY(ARRAY(1, 2), ARRAY(3, 4)), ARRAY(ARRAY(5, 6)))),
+          |  (2, ARRAY(ARRAY(ARRAY(7, 8), ARRAY(9, 10)), ARRAY(ARRAY(11, 12))));
+          | FOR SELECT * FROM t ORDER BY int_column DO
+          |   SELECT array_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Seq(Seq(Seq(1, 2), Seq(3, 4)), Seq(Seq(5, 6))))), // array_column
+        Seq(Row(Array(Seq(Seq(7, 8), Seq(9, 10)), Seq(Seq(11, 12))))), // array_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - empty result") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | FOR SELECT * FROM t ORDER BY intCol DO
+          |   SELECT intCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row] // create table
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - iterate") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING) using parquet;
+          | INSERT INTO t VALUES (1, 'first'), (2, 'second'), (3, 'third'), (4, 'fourth');
+          |
+          | lbl: FOR SELECT * FROM t ORDER BY intCol DO
+          |   IF intCol = 2 THEN
+          |     ITERATE lbl;
+          |   END IF;
+          |   SELECT stringCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row("first")), // select stringCol
+        Seq(Row("third")), // select stringCol
+        Seq(Row("fourth")), // select stringCol
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - leave") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING) using parquet;
+          | INSERT INTO t VALUES (1, 'first'), (2, 'second'), (3, 'third'), (4, 'fourth');
+          |
+          | lbl: FOR SELECT * FROM t ORDER BY intCol DO
+          |   IF intCol = 3 THEN
+          |     LEAVE lbl;
+          |   END IF;
+          |   SELECT stringCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row("first")), // select stringCol
+        Seq(Row("second")) // select stringCol
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested - in while") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | DECLARE cnt = 0;
+          | CREATE TABLE t (intCol INT) using parquet;
+          | INSERT INTO t VALUES (0);
+          | WHILE cnt < 2 DO
+          |   SET cnt = cnt + 1;
+          |   FOR SELECT * FROM t ORDER BY intCol DO
+          |     SELECT intCol;
+          |   END FOR;
+          |   INSERT INTO t VALUES (cnt);
+          | END WHILE;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // declare cnt
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq(Row(1)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop cnt
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested - in other for") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | FOR SELECT * FROM t ORDER BY intCol DO
+          |   FOR SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT intCol;
+          |     SELECT intCol2;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(0)), // select intCol
+        Seq(Row(3)), // select intCol2
+        Seq(Row(0)), // select intCol
+        Seq(Row(2)), // select intCol2
+        Seq.empty[Row], // drop local var
+        Seq(Row(1)), // select intCol
+        Seq(Row(3)), // select intCol2
+        Seq(Row(1)), // select intCol
+        Seq(Row(2)), // select intCol2
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop outer var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  // ignored until loops are fixed to support empty bodies
+  ignore("for statement - no variable - nested - empty result set") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | REPEAT
+          |   FOR SELECT * FROM t ORDER BY intCol DO
+          |     SELECT intCol;
+          |   END FOR;
+          | UNTIL 1 = 1
+          | END REPEAT;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // declare cnt
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq(Row(1)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop cnt
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested - iterate outer loop") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | lbl1: FOR SELECT * FROM t ORDER BY intCol DO
+          |   lbl2: FOR SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT intCol2;
+          |     ITERATE lbl1;
+          |     SELECT 1;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(3)), // select intCol2
+        Seq(Row(3)), // select intCol2
+        Seq.empty[Row] // drop outer var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested - leave outer loop") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | lbl1: FOR SELECT * FROM t ORDER BY intCol DO
+          |   lbl2: FOR SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT intCol2;
+          |     LEAVE lbl1;
+          |     SELECT 1;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(3)) // select intCol2
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested - leave inner loop") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | lbl1: FOR SELECT * FROM t ORDER BY intCol DO
+          |   lbl2: FOR SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT intCol2;
+          |     LEAVE lbl2;
+          |     SELECT 1;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(3)), // select intCol2
+        Seq(Row(3)), // select intCol2
+        Seq.empty[Row] // drop outer var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 24732223c6698..c4b09c4b289e9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -32,7 +32,6 @@ import org.apache.spark.sql.execution.datasources.BucketingUtils
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.joins.SortMergeJoinExec
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
@@ -229,7 +228,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
       checkPrunedAnswers(
         bucketSpec,
         bucketValues = Seq(bucketValue, bucketValue + 1, bucketValue + 2, bucketValue + 3),
-        filterCondition = column(inSetExpr),
+        filterCondition = Column(inSetExpr),
         df)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/CommitLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/CommitLogSuite.scala
index 92bea82b35874..068f56839e6e1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/CommitLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/CommitLogSuite.scala
@@ -17,11 +17,18 @@
 
 package org.apache.spark.sql.streaming
 
-import java.io.{ByteArrayInputStream, FileInputStream, FileOutputStream}
+import java.io.{ByteArrayInputStream, FileInputStream, FileOutputStream, InputStream, OutputStream}
+import java.nio.charset.StandardCharsets.UTF_8
 import java.nio.file.Path
 
+import scala.io.{Source => IOSource}
+
+import org.json4s.{Formats, NoTypeHints}
+import org.json4s.jackson.Serialization
+
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.execution.streaming.{CommitLog, CommitMetadata}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.streaming.{CommitLog, CommitMetadata, HDFSMetadataLog}
 import org.apache.spark.sql.test.SharedSparkSession
 
 class CommitLogSuite extends SparkFunSuite with SharedSparkSession {
@@ -32,13 +39,8 @@ class CommitLogSuite extends SparkFunSuite with SharedSparkSession {
       "core",
       "src",
       "test",
-      "scala",
-      "org",
-      "apache",
-      "spark",
-      "sql",
-      "streaming",
       "resources",
+      "structured-streaming",
       "testCommitLogV2"
     )
   }
@@ -49,13 +51,8 @@ class CommitLogSuite extends SparkFunSuite with SharedSparkSession {
       "core",
       "src",
       "test",
-      "scala",
-      "org",
-      "apache",
-      "spark",
-      "sql",
-      "streaming",
       "resources",
+      "structured-streaming",
       "testCommitLogV1"
     )
   }
@@ -108,6 +105,57 @@ class CommitLogSuite extends SparkFunSuite with SharedSparkSession {
     assert(commitMetadata.nextBatchWatermarkMs === 233)
     assert(commitMetadata.stateUniqueIds === Map.empty)
   }
+
+  // Test an old version of Spark can ser-de the new version of commit log,
+  // but running under V1 (i.e. no stateUniqueIds)
+  test("v1 Serde backward compatibility") {
+    // This is the json created by a V1 commit log
+    val commitLogV1WithStateUniqueId = """v1
+                        |{"nextBatchWatermarkMs":1,"stateUniqueIds":{}}""".stripMargin
+    val inputStream: ByteArrayInputStream =
+      new ByteArrayInputStream(commitLogV1WithStateUniqueId.getBytes("UTF-8"))
+    val commitMetadata: CommitMetadataLegacy = new CommitLogLegacy(
+      spark, testCommitLogV1FilePath.toString).deserialize(inputStream)
+    assert(commitMetadata.nextBatchWatermarkMs === 1)
+  }
+}
+
+// DO-NOT-MODIFY-THE-CODE-BELOW
+// Below are the legacy commit log code carbon copied from Spark branch-3.5, except
+// adding a "Legacy" to the class names.
+case class CommitMetadataLegacy(nextBatchWatermarkMs: Long = 0) {
+  def json: String = Serialization.write(this)(CommitMetadataLegacy.format)
 }
 
+object CommitMetadataLegacy {
+  implicit val format: Formats = Serialization.formats(NoTypeHints)
 
+  def apply(json: String): CommitMetadataLegacy = Serialization.read[CommitMetadataLegacy](json)
+}
+
+class CommitLogLegacy(sparkSession: SparkSession, path: String)
+  extends HDFSMetadataLog[CommitMetadataLegacy](sparkSession, path) {
+
+  private val VERSION = 1
+  private val EMPTY_JSON = "{}"
+
+  override def deserialize(in: InputStream): CommitMetadataLegacy = {
+    // called inside a try-finally where the underlying stream is closed in the caller
+    val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines()
+    if (!lines.hasNext) {
+      throw new IllegalStateException("Incomplete log file in the offset commit log")
+    }
+    validateVersion(lines.next().trim, VERSION)
+    val metadataJson = if (lines.hasNext) lines.next() else EMPTY_JSON
+    CommitMetadataLegacy(metadataJson)
+  }
+
+  override def serialize(metadata: CommitMetadataLegacy, out: OutputStream): Unit = {
+    // called inside a try-finally where the underlying stream is closed in the caller
+    out.write(s"v${VERSION}".getBytes(UTF_8))
+    out.write('\n')
+
+    // write metadata
+    out.write(metadata.json.getBytes(UTF_8))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index bcf0d4ac46655..0f382f4ed77de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -874,6 +874,26 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
     )
   }
 
+  testWithAllStateVersions("test that avro encoding is not supported") {
+    val inputData = MemoryStream[Int]
+
+    val aggregated =
+      inputData.toDF()
+        .groupBy($"value")
+        .agg(count("*"))
+        .as[(Int, Long)]
+
+    val ex = intercept[Exception] {
+      withSQLConf(SQLConf.STREAMING_STATE_STORE_ENCODING_FORMAT.key -> "avro") {
+        testStream(aggregated, Update)(
+          AddData(inputData, 3),
+          ProcessAllAvailable()
+        )
+      }
+    }
+    assert(ex.getMessage.contains("State store encoding format as avro is not supported"))
+  }
+
   private def prepareTestForChangingSchemaOfState(
       tempDir: File): (MemoryStream[Int], DataFrame) = {
     val inputData = MemoryStream[Int]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
index ab9df9a1e5a6f..040b99e55cb01 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
@@ -574,6 +574,21 @@ class StreamingDeduplicationSuite extends StateStoreMetricsTest {
       matchPVals = true
     )
   }
+
+  test("test that avro encoding is not supported") {
+    val inputData = MemoryStream[String]
+    val result = inputData.toDS().dropDuplicates()
+
+    val ex = intercept[Exception] {
+      withSQLConf(SQLConf.STREAMING_STATE_STORE_ENCODING_FORMAT.key -> "avro") {
+        testStream(result, Append)(
+          AddData(inputData, "a"),
+          ProcessAllAvailable()
+        )
+      }
+    }
+    assert(ex.getMessage.contains("State store encoding format as avro is not supported"))
+  }
 }
 
 @SlowSQLTest
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala
index 9a02ab3df7dd4..af86e6ec88996 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala
@@ -220,4 +220,18 @@ class StreamingDeduplicationWithinWatermarkSuite extends StateStoreMetricsTest {
       )
     }
   }
+
+  test("SPARK-50492: drop event time column after dropDuplicatesWithinWatermark") {
+    val inputData = MemoryStream[(Int, Int)]
+    val result = inputData.toDS()
+      .withColumn("first", timestamp_seconds($"_1"))
+      .withWatermark("first", "10 seconds")
+      .dropDuplicatesWithinWatermark("_2")
+      .select("_2")
+
+    testStream(result, Append)(
+      AddData(inputData, (1, 2)),
+      CheckAnswer(2)
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateSuite.scala
index 88862e2ad0791..f7606cd45949f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.streaming
 import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.sql.Encoders
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, RocksDBStateStoreProvider}
+import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithEncodingTypes, AlsoTestWithRocksDBFeatures, RocksDBStateStoreProvider}
 import org.apache.spark.sql.internal.SQLConf
 
 case class InputRow(key: String, action: String, value: String)
@@ -127,7 +127,7 @@ class ToggleSaveAndEmitProcessor
 }
 
 class TransformWithListStateSuite extends StreamTest
-  with AlsoTestWithChangelogCheckpointingEnabled {
+  with AlsoTestWithRocksDBFeatures with AlsoTestWithEncodingTypes {
   import testImplicits._
 
   test("test appending null value in list state throw exception") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateTTLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateTTLSuite.scala
index 409a255ae3e64..b188b92bdbb7c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateTTLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateTTLSuite.scala
@@ -20,11 +20,77 @@ package org.apache.spark.sql.streaming
 import java.time.Duration
 
 import org.apache.spark.sql.Encoders
-import org.apache.spark.sql.execution.streaming.{ListStateImplWithTTL, MemoryStream}
+import org.apache.spark.sql.execution.streaming.{ListStateImplWithTTL, MapStateImplWithTTL, MemoryStream, ValueStateImplWithTTL}
 import org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
 
+// MultiStatefulVariableTTLProcessor is a StatefulProcessor that consumes a stream of
+// strings and returns a stream of <string, count> pairs.
+//
+// Internally, it uses several stateful variables to store the count of each string, for
+// the sole purpose of verifying that these stateful variables all stay in sync and do not
+// interfere with each other.
+//
+// The pattern of calling appendValue is to simulate the old behavior of appendValue, which
+// used to add a record into the secondary index for every appendList call.
+class MultiStatefulVariableTTLProcessor(ttlConfig: TTLConfig)
+  extends StatefulProcessor[String, String, (String, Long)]{
+  @transient private var _listState: ListStateImplWithTTL[String] = _
+  // Map from index to count
+  @transient private var _mapState: MapStateImplWithTTL[Long, Long] = _
+  // Counts the number of times the string has occurred. It should always be
+  // equal to the size of the list state at the start and end of handleInputRows.
+  @transient private var _valueState: ValueStateImplWithTTL[Long] = _
+
+  override def init(
+      outputMode: OutputMode,
+      timeMode: TimeMode): Unit = {
+    _listState = getHandle
+      .getListState("listState", Encoders.STRING, ttlConfig)
+      .asInstanceOf[ListStateImplWithTTL[String]]
+    _mapState = getHandle
+        .getMapState("mapState", Encoders.scalaLong, Encoders.scalaLong, ttlConfig)
+        .asInstanceOf[MapStateImplWithTTL[Long, Long]]
+    _valueState = getHandle
+        .getValueState("valueState", Encoders.scalaLong, ttlConfig)
+        .asInstanceOf[ValueStateImplWithTTL[Long]]
+  }
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[String],
+      timerValues: TimerValues): Iterator[(String, Long)] = {
+    assertSanity()
+    val iter = inputRows.map { row =>
+      // Update the list state
+      _listState.appendValue(row)
+
+      // Update the map state
+      val mapStateCurrentSize = _mapState.iterator().size
+      _mapState.updateValue(mapStateCurrentSize + 1, mapStateCurrentSize + 1)
+
+      // Update the value state
+      val currentCountFromValueState = _valueState.get()
+      _valueState.update(currentCountFromValueState + 1)
+
+      assertSanity()
+
+      (key, _listState.get().size.toLong)
+    }
+
+    iter
+  }
+
+  // Asserts that the list state, map state, and value state are all in sync.
+  private def assertSanity(): Unit = {
+    val listSize = _listState.get().size
+    val mapSize = _mapState.iterator().size
+    val valueState = _valueState.get()
+    assert(listSize == mapSize)
+    assert(listSize == valueState)
+  }
+}
+
 class ListStateTTLProcessor(ttlConfig: TTLConfig)
   extends StatefulProcessor[String, InputEvent, OutputEvent] {
 
@@ -80,10 +146,17 @@ class ListStateTTLProcessor(ttlConfig: TTLConfig)
     } else if (row.action == "append") {
       listState.appendValue(row.value)
     } else if (row.action == "get_values_in_ttl_state") {
-      val ttlValues = listState.getValuesInTTLState()
+      val ttlValues = listState.getValueInTTLState()
       ttlValues.foreach { v =>
         results = OutputEvent(key, -1, isTTLValue = true, ttlValue = v) :: results
       }
+    } else if (row.action == "get_values_in_min_state") {
+      val minValues = listState.getMinValues()
+      minValues.foreach { minExpirationMs =>
+        results = OutputEvent(key, -1, isTTLValue = true, ttlValue = minExpirationMs) :: results
+      }
+    } else if (row.action == "clear") {
+      listState.clear()
     }
 
     results.iterator
@@ -94,7 +167,8 @@ class ListStateTTLProcessor(ttlConfig: TTLConfig)
  * Test suite for testing list state with TTL.
  * We use the base TTL suite with a list state processor.
  */
-class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
+class TransformWithListStateTTLSuite extends TransformWithStateTTLTest
+  with StateStoreMetricsTest {
 
   import testImplicits._
 
@@ -105,6 +179,68 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
 
   override def getStateTTLMetricName: String = "numListStateWithTTLVars"
 
+  test("verify the list state secondary index has at most one record per key") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
+      val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(10))
+      val inputStream = MemoryStream[String]
+        val result = inputStream.toDS()
+          .groupByKey(x => x)
+          .transformWithState(
+            new MultiStatefulVariableTTLProcessor(ttlConfig),
+            TimeMode.ProcessingTime(),
+            OutputMode.Append())
+      val clock = new StreamManualClock
+
+      testStream(result)(
+        StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+
+        // We want all of the inputs to have different timestamps, so that each record
+        // gets its own unique TTL, and thus, its own unique secondary index record. Each
+        // is also processed in its own microbatch to ensure a unique batchTimestampMs.
+        AddData(inputStream, "k1"),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(("k1", 1)),
+
+        AddData(inputStream, "k2"),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(("k2", 1)),
+
+        AddData(inputStream, "k1"),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(("k1", 2)),
+
+        AddData(inputStream, "k2"),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(("k2", 2)),
+
+        AddData(inputStream, "k1"),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(("k1", 3)),
+
+        AddData(inputStream, "k2"),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(("k2", 3)),
+
+        // For each unique key that occurs t times, the MultiStatefulVariableTTLProcessor maintains:
+        //    - Map state: t records in the primary, and t records in the TTL index
+        //    - List state: 1 record in the primary, TTL, min, and count indexes
+        //    - Value state: 1 record in the primary, and 1 record in the TTL index
+        //
+        // So in total, that amounts to 2t + 4 + 2 = 2t + 6 records.
+        //
+        // In this test, we have 2 unique keys, and each key occurs 3 times. Thus, the total number
+        // of keys in state is 2 * (2t + 6) where t = 3, which is 24.
+        //
+        // The number of updated rows is the total across the last time assertNumStateRows
+        // was called, and we only update numRowsUpdated for primary key updates. We ran 6 batches
+        // and each wrote 3 primary keys, so the total number of updated rows is 6 * 3 = 18.
+        assertNumStateRows(total = 24, updated = 18)
+      )
+    }
+  }
+
   test("verify iterator works with expired values in beginning of list") {
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
       classOf[RocksDBStateStoreProvider].getName,
@@ -223,6 +359,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           // advance clock to trigger processing
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(),
+
           // get ttl values
           AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1, null)),
           AdvanceManualClock(1 * 1000),
@@ -231,6 +368,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
             OutputEvent("k1", 2, isTTLValue = true, 182000),
             OutputEvent("k1", 3, isTTLValue = true, 182000)
           ),
+
           AddData(inputStream, InputEvent("k1", "get", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
@@ -262,6 +400,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           // advance clock to trigger processing
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(),
+
           // get all elements without enforcing ttl
           AddData(inputStream, InputEvent("k1", "get_without_enforcing_ttl", -1, null)),
           AdvanceManualClock(1 * 1000),
@@ -273,6 +412,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
             OutputEvent("k1", 5, isTTLValue = false, -1),
             OutputEvent("k1", 6, isTTLValue = false, -1)
           ),
+
           AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
@@ -297,15 +437,14 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           // advance clock to trigger processing
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(),
+
           // advance clock to expire the middle three elements
           AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
-            OutputEvent("k1", -1, isTTLValue = true, 20000),
-            OutputEvent("k1", -1, isTTLValue = true, 181000),
-            OutputEvent("k1", -1, isTTLValue = true, 182000),
-            OutputEvent("k1", -1, isTTLValue = true, 188000)
+            OutputEvent("k1", -1, isTTLValue = true, 20000)
           ),
+
           // progress batch timestamp from 9000 to 54000, expiring the middle
           // three elements.
           AdvanceManualClock(45 * 1000),
@@ -320,6 +459,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
             OutputEvent("k1", 8, isTTLValue = false, -1),
             OutputEvent("k1", 9, isTTLValue = false, -1)
           ),
+
           AddData(inputStream, InputEvent("k1", "get_without_enforcing_ttl", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
@@ -330,12 +470,11 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
             OutputEvent("k1", 8, isTTLValue = false, -1),
             OutputEvent("k1", 9, isTTLValue = false, -1)
           ),
+
           AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
-            OutputEvent("k1", -1, isTTLValue = true, 181000),
-            OutputEvent("k1", -1, isTTLValue = true, 182000),
-            OutputEvent("k1", -1, isTTLValue = true, 188000)
+            OutputEvent("k1", -1, isTTLValue = true, 181000)
           ),
           StopStream
         )
@@ -343,6 +482,104 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
     }
   }
 
+  // If we have a list for a key k1 -> [(v1, t1), (v2, t2), (v3, t3)] and they _all_ expire,
+  // then there should be no remaining records in any primary (or secondary index) for that key.
+  // However, if we have a separate key k2 -> [(v1, t4)] and the time is less than t4, then it
+  // should still be present after the clearing for k1.
+  test("verify min-expiry index doesn't insert when the new minimum is None") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
+      withTempDir { checkpointLocation =>
+        val inputStream = MemoryStream[InputEvent]
+        val ttlConfig1 = TTLConfig(ttlDuration = Duration.ofMinutes(1))
+        val result1 = inputStream
+          .toDS()
+          .groupByKey(x => x.key)
+          .transformWithState(
+            getProcessor(ttlConfig1),
+            TimeMode.ProcessingTime(),
+            OutputMode.Append()
+          )
+
+        val clock = new StreamManualClock
+        testStream(result1)(
+          StartStream(
+            Trigger.ProcessingTime("1 second"),
+            triggerClock = clock,
+            checkpointLocation = checkpointLocation.getAbsolutePath
+          ),
+
+          // Add 3 elements all with different eviction timestamps.
+          AddData(inputStream, InputEvent("k1", "append", 1)),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer(),
+
+          AddData(inputStream, InputEvent("k1", "append", 2)),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer(),
+
+          AddData(inputStream, InputEvent("k1", "append", 3)),
+          AdvanceManualClock(1 * 1000), // Time is 3000
+          CheckNewAnswer(),
+
+          // Add a separate key; this should not be affected by k1 expiring.
+          // It will have an expiration of 64000.
+          AddData(inputStream, InputEvent("k2", "put", 1)),
+
+          // Now, we should have: k1 -> [1, 2, 3] with TTLs [61000, 62000, 63000] respectively
+          AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1, null)),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer( // Time is 4000 for this micro-batch
+            OutputEvent("k1", 1, isTTLValue = true, 61000),
+            OutputEvent("k1", 2, isTTLValue = true, 62000),
+            OutputEvent("k1", 3, isTTLValue = true, 63000)
+          ),
+
+          AddData(inputStream, InputEvent("k1", "get_values_in_min_state", -1, null)),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer( // Time is 5000 for this micro-batch
+            OutputEvent("k1", -1, isTTLValue = true, 61000)
+          ),
+
+          // The k1 records expire at 63000, and the current time is 5000. So, we advance the
+          // clock by 63 - 5 = 58 seconds to expire those.
+          AdvanceManualClock((63 - 5) * 1000),
+          CheckNewAnswer(),
+
+          // There should be 4 state rows left over: the primary, TTL, min-expiry, and count
+          // indexes for k2.
+          //
+          // It's important to check with assertNumStateRows, since the InputEvents
+          // only return values for the current grouping key, not the entirety of RocksDB.
+          assertNumStateRows(total = 4, updated = 4),
+
+          // The k1 calls should both return no values. However, the k2 calls should return
+          // one record each. We put these into one AddData call since we want them all to
+          // run when the batchTimestampMs is 65000.
+          AddData(inputStream,
+            // These should both return no values, since all of k1 has been expired.
+            InputEvent("k1", "get_values_in_ttl_state", -1, null),
+            InputEvent("k1", "get_values_in_min_state", -1, null),
+
+            // However, k2 still has a record.
+            InputEvent("k2", "get_values_in_ttl_state", -1, null),
+            InputEvent("k2", "get_values_in_min_state", -1, null)
+          ),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer( // Time is 65000 for this micro-batch
+            OutputEvent("k2", -1, isTTLValue = true, 64000),
+            OutputEvent("k2", -1, isTTLValue = true, 64000)
+          ),
+
+          assertNumStateRows(total = 0, updated = 0),
+
+          StopStream
+        )
+      }
+    }
+  }
+
   test("verify iterator works with expired values in end of list") {
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
       classOf[RocksDBStateStoreProvider].getName,
@@ -380,14 +617,23 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           // advance clock to trigger processing
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(),
+
           // get ttl values
-          AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1, null)),
+          AddData(inputStream,
+            InputEvent("k1", "get_ttl_value_from_state", -1, null),
+            InputEvent("k1", "get_values_in_min_state", -1)
+          ),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
+            // From the get_ttl_value_from_state call
             OutputEvent("k1", 1, isTTLValue = true, 121000),
             OutputEvent("k1", 2, isTTLValue = true, 122000),
-            OutputEvent("k1", 3, isTTLValue = true, 122000)
+            OutputEvent("k1", 3, isTTLValue = true, 122000),
+
+            // From the get_values_in_min_state call
+            OutputEvent("k1", -1, isTTLValue = true, 121000)
           ),
+
           AddData(inputStream, InputEvent("k1", "get", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
@@ -410,6 +656,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           // advance clock to trigger processing
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(),
+
           // get ttl values
           AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1, null)),
           AdvanceManualClock(1 * 1000),
@@ -423,9 +670,8 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           ),
           AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1, null)),
           AdvanceManualClock(1 * 1000),
+
           CheckNewAnswer(
-            OutputEvent("k1", -1, isTTLValue = true, 121000),
-            OutputEvent("k1", -1, isTTLValue = true, 122000),
             OutputEvent("k1", -1, isTTLValue = true, 65000)
           ),
           // expire end values, batch timestamp from 7000 to 67000
@@ -447,8 +693,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
-            OutputEvent("k1", -1, isTTLValue = true, 121000),
-            OutputEvent("k1", -1, isTTLValue = true, 122000)
+            OutputEvent("k1", -1, isTTLValue = true, 121000)
           ),
           StopStream
         )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateSuite.scala
index 76c5cbeee424b..6884ef577f8ef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.streaming
 import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.sql.Encoders
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, RocksDBStateStoreProvider}
+import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithEncodingTypes, AlsoTestWithRocksDBFeatures, RocksDBStateStoreProvider}
 import org.apache.spark.sql.internal.SQLConf
 
 case class InputMapRow(key: String, action: String, value: (String, String))
@@ -81,7 +81,7 @@ class TestMapStateProcessor
  * operators such as transformWithState.
  */
 class TransformWithMapStateSuite extends StreamTest
-  with AlsoTestWithChangelogCheckpointingEnabled {
+  with AlsoTestWithRocksDBFeatures with AlsoTestWithEncodingTypes {
   import testImplicits._
 
   private def testMapStateWithNullUserKey(inputMapRow: InputMapRow): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateTTLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateTTLSuite.scala
index 022280eb3bcef..2cb15263459ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateTTLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateTTLSuite.scala
@@ -83,6 +83,8 @@ class MapStateSingleKeyTTLProcessor(ttlConfig: TTLConfig)
       ttlValues.foreach { v =>
         results = OutputEvent(key, -1, isTTLValue = true, ttlValue = v._2) :: results
       }
+    } else if (row.action == "clear") {
+      mapState.clear()
     }
 
     results.iterator
@@ -308,7 +310,6 @@ class TransformWithMapStateTTLSuite extends TransformWithStateTTLTest {
         AddData(inputStream, MapInputEvent("k1", "", "get_values_in_ttl_state", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(
-          MapOutputEvent("k1", "key3", -1, isTTLValue = true, 123000),
           MapOutputEvent("k1", "key3", -1, isTTLValue = true, 126000),
           MapOutputEvent("k1", "key4", -1, isTTLValue = true, 123000),
           MapOutputEvent("k1", "key5", -1, isTTLValue = true, 123000)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateChainingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateChainingSuite.scala
index 6888fcba45f3e..0e963bec41b4d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateChainingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateChainingSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.{SparkRuntimeException, SparkThrowable}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
 import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamExecution}
-import org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider
+import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithEncodingTypes, AlsoTestWithRocksDBFeatures, RocksDBStateStoreProvider}
 import org.apache.spark.sql.functions.window
 import org.apache.spark.sql.internal.SQLConf
 
@@ -103,47 +103,53 @@ case class AggEventRow(
     window: Window,
     count: Long)
 
-class TransformWithStateChainingSuite extends StreamTest {
+class TransformWithStateChainingSuite extends StreamTest
+  with AlsoTestWithRocksDBFeatures
+  with AlsoTestWithEncodingTypes {
   import testImplicits._
 
+  private def isAvroEnabled: Boolean = SQLConf.get.stateStoreEncodingFormat == "avro"
+
   test("watermark is propagated correctly for next stateful operator" +
     " after transformWithState") {
-    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
-      classOf[RocksDBStateStoreProvider].getName) {
-      val inputData = MemoryStream[InputEventRow]
+    if (!isAvroEnabled) {
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName) {
+        val inputData = MemoryStream[InputEventRow]
 
-      val result = inputData.toDS()
-        .withWatermark("eventTime", "1 minute")
-        .groupByKey(x => x.key)
-        .transformWithState[OutputRow](
-          new TestStatefulProcessor(),
-          "outputEventTime",
-          OutputMode.Append())
-        .groupBy(window($"outputEventTime", "1 minute"))
-        .count()
-        .as[AggEventRow]
+        val result = inputData.toDS()
+          .withWatermark("eventTime", "1 minute")
+          .groupByKey(x => x.key)
+          .transformWithState[OutputRow](
+            new TestStatefulProcessor(),
+            "outputEventTime",
+            OutputMode.Append())
+          .groupBy(window($"outputEventTime", "1 minute"))
+          .count()
+          .as[AggEventRow]
 
-      testStream(result, OutputMode.Append())(
-        AddData(inputData, InputEventRow("k1", timestamp("2024-01-01 00:00:00"), "e1")),
-        // watermark should be 1 minute behind `2024-01-01 00:00:00`, nothing is
-        // emitted as all records have timestamp > epoch
-        CheckNewAnswer(),
-        Execute("assertWatermarkEquals") { q =>
-          assertWatermarkEquals(q, timestamp("2023-12-31 23:59:00"))
-        },
-        AddData(inputData, InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1")),
-        // global watermark should now be 1 minute behind  `2024-02-01 00:00:00`.
-        CheckNewAnswer(AggEventRow(
-          Window(timestamp("2024-01-01 00:00:00"), timestamp("2024-01-01 00:01:00")), 1)
-        ),
-        Execute("assertWatermarkEquals") { q =>
-          assertWatermarkEquals(q, timestamp("2024-01-31 23:59:00"))
-        },
-        AddData(inputData, InputEventRow("k1", timestamp("2024-02-02 00:00:00"), "e1")),
-        CheckNewAnswer(AggEventRow(
-          Window(timestamp("2024-02-01 00:00:00"), timestamp("2024-02-01 00:01:00")), 1)
+        testStream(result, OutputMode.Append())(
+          AddData(inputData, InputEventRow("k1", timestamp("2024-01-01 00:00:00"), "e1")),
+          // watermark should be 1 minute behind `2024-01-01 00:00:00`, nothing is
+          // emitted as all records have timestamp > epoch
+          CheckNewAnswer(),
+          Execute("assertWatermarkEquals") { q =>
+            assertWatermarkEquals(q, timestamp("2023-12-31 23:59:00"))
+          },
+          AddData(inputData, InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1")),
+          // global watermark should now be 1 minute behind  `2024-02-01 00:00:00`.
+          CheckNewAnswer(AggEventRow(
+            Window(timestamp("2024-01-01 00:00:00"), timestamp("2024-01-01 00:01:00")), 1)
+          ),
+          Execute("assertWatermarkEquals") { q =>
+            assertWatermarkEquals(q, timestamp("2024-01-31 23:59:00"))
+          },
+          AddData(inputData, InputEventRow("k1", timestamp("2024-02-02 00:00:00"), "e1")),
+          CheckNewAnswer(AggEventRow(
+            Window(timestamp("2024-02-01 00:00:00"), timestamp("2024-02-01 00:01:00")), 1)
+          )
         )
-      )
+      }
     }
   }
 
@@ -166,33 +172,35 @@ class TransformWithStateChainingSuite extends StreamTest {
     }
   }
 
-  test("missing eventTime column to transformWithState fails the query if" +
-    " another stateful operator is added") {
-    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
-      classOf[RocksDBStateStoreProvider].getName) {
-      val inputData = MemoryStream[InputEventRow]
+  test("missing eventTime column to transformWithState fails the query if " +
+    "another stateful operator is added") {
+    if (!isAvroEnabled) {
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName) {
+        val inputData = MemoryStream[InputEventRow]
 
-      val result = inputData.toDS()
-        .withWatermark("eventTime", "1 minute")
-        .groupByKey(x => x.key)
-        .transformWithState[OutputRow](
-          new TestStatefulProcessor(),
-          TimeMode.None(),
-          OutputMode.Append())
-        .groupBy(window($"outputEventTime", "1 minute"))
-        .count()
-
-      checkError(
-        exception = intercept[AnalysisException] {
-          testStream(result, OutputMode.Append())(
-            StartStream()
-          )
-        },
-        condition = "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
-        sqlState = "42KDE",
-        parameters = Map(
-          "outputMode" -> "append",
-          "operation" -> "streaming aggregations without watermark"))
+        val result = inputData.toDS()
+          .withWatermark("eventTime", "1 minute")
+          .groupByKey(x => x.key)
+          .transformWithState[OutputRow](
+            new TestStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Append())
+          .groupBy(window($"outputEventTime", "1 minute"))
+          .count()
+
+        checkError(
+          exception = intercept[AnalysisException] {
+            testStream(result, OutputMode.Append())(
+              StartStream()
+            )
+          },
+          condition = "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
+          sqlState = "42KDE",
+          parameters = Map(
+            "outputMode" -> "append",
+            "operation" -> "streaming aggregations without watermark"))
+      }
     }
   }
 
@@ -234,25 +242,27 @@ class TransformWithStateChainingSuite extends StreamTest {
 
   test("dropDuplicateWithWatermark after transformWithState operator" +
     " fails if watermark column is not provided") {
-    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
-      classOf[RocksDBStateStoreProvider].getName) {
-      val inputData = MemoryStream[InputEventRow]
-      val result = inputData.toDS()
-        .withWatermark("eventTime", "1 minute")
-        .groupByKey(x => x.key)
-        .transformWithState[OutputRow](
-          new TestStatefulProcessor(),
-          TimeMode.None(),
-          OutputMode.Append())
-        .dropDuplicatesWithinWatermark()
+    if (!isAvroEnabled) {
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName) {
+        val inputData = MemoryStream[InputEventRow]
+        val result = inputData.toDS()
+          .withWatermark("eventTime", "1 minute")
+          .groupByKey(x => x.key)
+          .transformWithState[OutputRow](
+            new TestStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Append())
+          .dropDuplicatesWithinWatermark()
 
-      val ex = intercept[ExtendedAnalysisException] {
-        testStream(result, OutputMode.Append())(
-          StartStream()
-        )
+        val ex = intercept[ExtendedAnalysisException] {
+          testStream(result, OutputMode.Append())(
+            StartStream()
+          )
+        }
+        assert(ex.getMessage.contains("dropDuplicatesWithinWatermark is not supported on" +
+          " streaming DataFrames/DataSets without watermark"))
       }
-      assert(ex.getMessage.contains("dropDuplicatesWithinWatermark is not supported on" +
-        " streaming DataFrames/DataSets without watermark"))
     }
   }
 
@@ -269,14 +279,25 @@ class TransformWithStateChainingSuite extends StreamTest {
           OutputMode.Append())
         .dropDuplicatesWithinWatermark()
 
-      testStream(result, OutputMode.Append())(
-        AddData(inputData, InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1"),
-          InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1")),
-        CheckNewAnswer(OutputRow("k1", timestamp("2024-02-01 00:00:00"), 2)),
-        Execute("assertWatermarkEquals") { q =>
-          assertWatermarkEquals(q, timestamp("2024-01-31 23:59:00"))
+      if (!isAvroEnabled) {
+        testStream(result, OutputMode.Append())(
+          AddData(inputData, InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1"),
+            InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1")),
+          CheckNewAnswer(OutputRow("k1", timestamp("2024-02-01 00:00:00"), 2)),
+          Execute("assertWatermarkEquals") { q =>
+            assertWatermarkEquals(q, timestamp("2024-01-31 23:59:00"))
+          }
+        )
+      } else {
+        val ex = intercept[Exception] {
+          testStream(result, OutputMode.Append())(
+            AddData(inputData, InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1"),
+              InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1")),
+            ProcessAllAvailable()
+          )
         }
-      )
+        assert(ex.getMessage.contains("State store encoding format as avro is not supported"))
+      }
     }
   }
 
@@ -340,7 +361,7 @@ class TransformWithStateChainingSuite extends StreamTest {
       val inputData = MemoryStream[InputEventRow]
       inputData.toDS()
         .withWatermark("eventTime", "1 minute")
-        .createTempView("tempViewWithWatermark")
+        .createOrReplaceTempView("tempViewWithWatermark")
 
       val result = spark.readStream.table("tempViewWithWatermark")
         .as[InputEventRow]
@@ -365,7 +386,7 @@ class TransformWithStateChainingSuite extends StreamTest {
       classOf[RocksDBStateStoreProvider].getName) {
       val inputData = MemoryStream[InputEventRow]
       inputData.toDS()
-        .createTempView("tempViewWithoutWatermark")
+        .createOrReplaceTempView("tempViewWithoutWatermark")
 
       val ex = intercept[AnalysisException] {
         val result = spark.readStream.table("tempViewWithoutWatermark")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateClusterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateClusterSuite.scala
new file mode 100644
index 0000000000000..3e2899f7c6ee7
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateClusterSuite.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.sql.{Dataset, Encoders, Row, SparkSession}
+import org.apache.spark.sql.LocalSparkSession.withSparkSession
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider
+import org.apache.spark.sql.internal.SQLConf
+
+case class FruitState(
+    name: String,
+    count: Long,
+    family: String
+)
+
+class FruitCountStatefulProcessor(useImplicits: Boolean)
+  extends StatefulProcessor[String, String, (String, Long, String)] {
+  import implicits._
+
+  @transient protected var _fruitState: ValueState[FruitState] = _
+
+  override def init(outputMode: OutputMode, timeMode: TimeMode): Unit = {
+    if (useImplicits) {
+      _fruitState = getHandle.getValueState[FruitState]("fruitState", TTLConfig.NONE)
+    } else {
+      _fruitState = getHandle.getValueState("fruitState", Encoders.product[FruitState],
+        TTLConfig.NONE)
+    }
+  }
+
+  private def getFamily(fruitName: String): String = {
+    if (fruitName == "orange" || fruitName == "lemon" || fruitName == "lime") {
+      "citrus"
+    } else {
+      "non-citrus"
+    }
+  }
+
+  override def handleInputRows(key: String, inputRows: Iterator[String], timerValues: TimerValues):
+    Iterator[(String, Long, String)] = {
+    val new_cnt = _fruitState.getOption().map(x => x.count).getOrElse(0L) + inputRows.size
+    val family = getFamily(key)
+    _fruitState.update(FruitState(key, new_cnt, family))
+    Iterator.single((key, new_cnt, family))
+  }
+}
+
+class FruitCountStatefulProcessorWithInitialState(useImplicits: Boolean)
+  extends StatefulProcessorWithInitialState[String, String, (String, Long, String), String] {
+  import implicits._
+
+  @transient protected var _fruitState: ValueState[FruitState] = _
+
+  override def init(outputMode: OutputMode, timeMode: TimeMode): Unit = {
+    if (useImplicits) {
+      _fruitState = getHandle.getValueState[FruitState]("fruitState", TTLConfig.NONE)
+    } else {
+      _fruitState = getHandle.getValueState("fruitState", Encoders.product[FruitState],
+        TTLConfig.NONE)
+    }
+  }
+
+  private def getFamily(fruitName: String): String = {
+    if (fruitName == "orange" || fruitName == "lemon" || fruitName == "lime") {
+      "citrus"
+    } else {
+      "non-citrus"
+    }
+  }
+
+  override def handleInitialState(key: String, initialState: String,
+    timerValues: TimerValues): Unit = {
+    val new_cnt = _fruitState.getOption().map(x => x.count).getOrElse(0L) + 1
+    val family = getFamily(key)
+    _fruitState.update(FruitState(key, new_cnt, family))
+  }
+
+  override def handleInputRows(key: String, inputRows: Iterator[String], timerValues: TimerValues):
+    Iterator[(String, Long, String)] = {
+    val new_cnt = _fruitState.getOption().map(x => x.count).getOrElse(0L) + inputRows.size
+    val family = getFamily(key)
+    _fruitState.update(FruitState(key, new_cnt, family))
+    Iterator.single((key, new_cnt, family))
+  }
+}
+
+trait TransformWithStateClusterSuiteBase extends SparkFunSuite {
+  def getSparkConf(): SparkConf = {
+    val conf = new SparkConf()
+      .setMaster("local-cluster[2, 2, 1024]")
+      .set(SQLConf.STATE_STORE_PROVIDER_CLASS.key,
+        classOf[RocksDBStateStoreProvider].getCanonicalName)
+      .set(SQLConf.SHUFFLE_PARTITIONS.key,
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString)
+      .set(SQLConf.STREAMING_STOP_TIMEOUT, 5000L)
+    conf
+  }
+
+  // Start a new test with cluster containing two executors and streaming stop timeout set to 5s
+  val testSparkConf = getSparkConf()
+
+  protected def testWithAndWithoutImplicitEncoders(name: String)
+    (func: (SparkSession, Boolean) => Any): Unit = {
+    Seq(false, true).foreach { useImplicits =>
+      test(s"$name - useImplicits = $useImplicits") {
+        withSparkSession(SparkSession.builder().config(testSparkConf).getOrCreate()) { spark =>
+          func(spark, useImplicits)
+        }
+      }
+    }
+  }
+}
+
+/**
+ * Test suite spawning local cluster with multiple executors to test serde of stateful
+ * processors along with use of implicit encoders, if applicable in transformWithState operator.
+ */
+class TransformWithStateClusterSuite extends StreamTest with TransformWithStateClusterSuiteBase {
+  testWithAndWithoutImplicitEncoders("streaming with transformWithState - " +
+   "without initial state") { (spark, useImplicits) =>
+    import spark.implicits._
+    val input = MemoryStream(Encoders.STRING, spark.sqlContext)
+    val agg = input.toDS()
+      .groupByKey(x => x)
+      .transformWithState(new FruitCountStatefulProcessor(useImplicits),
+        TimeMode.None(),
+        OutputMode.Update()
+      )
+
+    val query = agg.writeStream
+      .format("memory")
+      .outputMode("update")
+      .queryName("output")
+      .start()
+
+    input.addData("apple", "apple", "orange", "orange", "orange")
+    query.processAllAvailable()
+
+    checkAnswer(spark.sql("select * from output"),
+      Seq(Row("apple", 2, "non-citrus"),
+        Row("orange", 3, "citrus")))
+
+    input.addData("lemon", "lime")
+    query.processAllAvailable()
+    checkAnswer(spark.sql("select * from output"),
+      Seq(Row("apple", 2, "non-citrus"),
+        Row("orange", 3, "citrus"),
+        Row("lemon", 1, "citrus"),
+        Row("lime", 1, "citrus")))
+
+    query.stop()
+  }
+
+  testWithAndWithoutImplicitEncoders("streaming with transformWithState - " +
+   "with initial state") { (spark, useImplicits) =>
+    import spark.implicits._
+
+    val fruitCountInitialDS: Dataset[String] = Seq(
+      "apple", "apple", "orange", "orange", "orange").toDS()
+
+    val fruitCountInitial = fruitCountInitialDS
+      .groupByKey(x => x)
+
+    val input = MemoryStream(Encoders.STRING, spark.sqlContext)
+    val agg = input.toDS()
+      .groupByKey(x => x)
+      .transformWithState(new FruitCountStatefulProcessorWithInitialState(useImplicits),
+        TimeMode.None(),
+        OutputMode.Update(), fruitCountInitial)
+
+    val query = agg.writeStream
+      .format("memory")
+      .outputMode("update")
+      .queryName("output")
+      .start()
+
+    input.addData("apple", "apple", "orange", "orange", "orange")
+    query.processAllAvailable()
+
+    checkAnswer(spark.sql("select * from output"),
+      Seq(Row("apple", 4, "non-citrus"),
+        Row("orange", 6, "citrus")))
+
+    input.addData("lemon", "lime")
+    query.processAllAvailable()
+    checkAnswer(spark.sql("select * from output"),
+      Seq(Row("apple", 4, "non-citrus"),
+        Row("orange", 6, "citrus"),
+        Row("lemon", 1, "citrus"),
+        Row("lime", 1, "citrus")))
+
+    query.stop()
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateInitialStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateInitialStateSuite.scala
index 806d2f19f6f5c..cf304301565ba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateInitialStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateInitialStateSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.streaming
 import org.apache.spark.sql.{DataFrame, Dataset, Encoders, KeyValueGroupedDataset}
 import org.apache.spark.sql.execution.datasources.v2.state.StateSourceOptions
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, RocksDBStateStoreProvider}
+import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithRocksDBFeatures, RocksDBStateStoreProvider}
 import org.apache.spark.sql.functions.{col, timestamp_seconds}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
@@ -47,6 +47,8 @@ case class UnionUnflattenInitialStateRow(
 abstract class StatefulProcessorWithInitialStateTestClass[V]
     extends StatefulProcessorWithInitialState[
         String, InitInputRow, (String, String, Double), V] {
+  import implicits._
+
   @transient var _valState: ValueState[Double] = _
   @transient var _listState: ListState[Double] = _
   @transient var _mapState: MapState[Double, Int] = _
@@ -54,13 +56,9 @@ abstract class StatefulProcessorWithInitialStateTestClass[V]
   override def init(
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
-    _valState = getHandle.getValueState[Double]("testValueInit", Encoders.scalaDouble,
-      TTLConfig.NONE)
-    _listState = getHandle.getListState[Double]("testListInit", Encoders.scalaDouble,
-      TTLConfig.NONE)
-    _mapState = getHandle.getMapState[Double, Int](
-      "testMapInit", Encoders.scalaDouble, Encoders.scalaInt,
-        TTLConfig.NONE)
+    _valState = getHandle.getValueState[Double]("testValueInit", TTLConfig.NONE)
+    _listState = getHandle.getListState[Double]("testListInit", TTLConfig.NONE)
+    _mapState = getHandle.getMapState[Double, Int]("testMapInit", TTLConfig.NONE)
   }
 
   override def handleInputRows(
@@ -363,7 +361,7 @@ class StatefulProcessorWithInitialStateEventTimerClass
  * streaming operator with user-defined initial state
  */
 class TransformWithStateInitialStateSuite extends StateStoreMetricsTest
-  with AlsoTestWithChangelogCheckpointingEnabled {
+  with AlsoTestWithRocksDBFeatures {
 
   import testImplicits._
 
@@ -379,6 +377,8 @@ class TransformWithStateInitialStateSuite extends StateStoreMetricsTest
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
       classOf[RocksDBStateStoreProvider].getName) {
 
+      val clock = new StreamManualClock
+
       val inputData = MemoryStream[InitInputRow]
       val kvDataSet = inputData.toDS()
         .groupByKey(x => x.key)
@@ -390,10 +390,12 @@ class TransformWithStateInitialStateSuite extends StateStoreMetricsTest
         TimeMode.None(), OutputMode.Append(), initStateDf)
 
       testStream(query, OutputMode.Update())(
+        StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
         // non-exist key test
         AddData(inputData, InitInputRow("k1", "update", 37.0)),
         AddData(inputData, InitInputRow("k2", "update", 40.0)),
         AddData(inputData, InitInputRow("non-exist", "getOption", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("non-exist", "getOption", -1.0)),
         Execute { q =>
           assert(q.lastProgress
@@ -402,59 +404,80 @@ class TransformWithStateInitialStateSuite extends StateStoreMetricsTest
         AddData(inputData, InitInputRow("k1", "appendList", 37.0)),
         AddData(inputData, InitInputRow("k2", "appendList", 40.0)),
         AddData(inputData, InitInputRow("non-exist", "getList", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(),
 
         AddData(inputData, InitInputRow("k1", "incCount", 37.0)),
         AddData(inputData, InitInputRow("k2", "incCount", 40.0)),
         AddData(inputData, InitInputRow("non-exist", "getCount", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("non-exist", "getCount", 0.0)),
+
         AddData(inputData, InitInputRow("k2", "incCount", 40.0)),
         AddData(inputData, InitInputRow("k2", "getCount", 40.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("k2", "getCount", 2.0)),
 
         // test every row in initial State is processed
         AddData(inputData, InitInputRow("init_1", "getOption", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("init_1", "getOption", 40.0)),
+
         AddData(inputData, InitInputRow("init_2", "getOption", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("init_2", "getOption", 100.0)),
 
         AddData(inputData, InitInputRow("init_1", "getList", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("init_1", "getList", 40.0)),
+
         AddData(inputData, InitInputRow("init_2", "getList", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("init_2", "getList", 100.0)),
 
         AddData(inputData, InitInputRow("init_1", "getCount", 40.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("init_1", "getCount", 1.0)),
+
         AddData(inputData, InitInputRow("init_2", "getCount", 100.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("init_2", "getCount", 1.0)),
 
         // Update row with key in initial row will work
         AddData(inputData, InitInputRow("init_1", "update", 50.0)),
         AddData(inputData, InitInputRow("init_1", "getOption", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("init_1", "getOption", 50.0)),
+
         AddData(inputData, InitInputRow("init_1", "remove", -1.0)),
         AddData(inputData, InitInputRow("init_1", "getOption", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("init_1", "getOption", -1.0)),
 
         AddData(inputData, InitInputRow("init_1", "appendList", 50.0)),
         AddData(inputData, InitInputRow("init_1", "getList", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("init_1", "getList", 50.0), ("init_1", "getList", 40.0)),
 
         AddData(inputData, InitInputRow("init_1", "incCount", 40.0)),
         AddData(inputData, InitInputRow("init_1", "getCount", 40.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("init_1", "getCount", 2.0)),
 
         // test remove
         AddData(inputData, InitInputRow("k1", "remove", -1.0)),
         AddData(inputData, InitInputRow("k1", "getOption", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("k1", "getOption", -1.0)),
 
         AddData(inputData, InitInputRow("init_1", "clearCount", -1.0)),
         AddData(inputData, InitInputRow("init_1", "getCount", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(("init_1", "getCount", 0.0)),
 
         AddData(inputData, InitInputRow("init_1", "clearList", -1.0)),
         AddData(inputData, InitInputRow("init_1", "getList", -1.0)),
+        AdvanceManualClock(1 * 1000),
         CheckNewAnswer()
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateSuite.scala
index 505775d4f6a9b..97dad5fe78a19 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateSuite.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.functions.timestamp_seconds
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
+import org.apache.spark.tags.SlowSQLTest
 
 object TransformWithStateSuiteUtils {
   val NUM_SHUFFLE_PARTITIONS = 5
@@ -45,13 +46,13 @@ object TransformWithStateSuiteUtils {
 
 class RunningCountStatefulProcessor extends StatefulProcessor[String, String, (String, String)]
   with Logging {
+  import implicits._
   @transient protected var _countState: ValueState[Long] = _
 
   override def init(
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
-    _countState = getHandle.getValueState[Long]("countState",
-      Encoders.scalaLong, TTLConfig.NONE)
+    _countState = getHandle.getValueState[Long]("countState", TTLConfig.NONE)
   }
 
   override def handleInputRows(
@@ -72,12 +73,13 @@ class RunningCountStatefulProcessor extends StatefulProcessor[String, String, (S
 class RunningCountStatefulProcessorWithTTL
   extends StatefulProcessor[String, String, (String, String)]
   with Logging {
+  import implicits._
   @transient protected var _countState: ValueState[Long] = _
 
   override def init(
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
-    _countState = getHandle.getValueState[Long]("countState", Encoders.scalaLong,
+    _countState = getHandle.getValueState[Long]("countState",
       TTLConfig(Duration.ofMillis(1000)))
   }
 
@@ -384,20 +386,32 @@ class RunningCountStatefulProcessorWithError extends RunningCountStatefulProcess
 }
 
 // class for verify state schema is correctly written for all state var types
-class StatefulProcessorWithCompositeTypes extends RunningCountStatefulProcessor {
+class StatefulProcessorWithCompositeTypes(useImplicits: Boolean)
+  extends RunningCountStatefulProcessor {
+  import implicits._
   @transient private var _listState: ListState[TestClass] = _
   @transient private var _mapState: MapState[POJOTestClass, String] = _
 
   override def init(
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
-    _countState = getHandle.getValueState[Long]("countState", Encoders.scalaLong,
-      TTLConfig.NONE)
-    _listState = getHandle.getListState[TestClass](
-      "listState", Encoders.product[TestClass], TTLConfig.NONE)
-    _mapState = getHandle.getMapState[POJOTestClass, String](
-      "mapState", Encoders.bean(classOf[POJOTestClass]), Encoders.STRING,
-      TTLConfig.NONE)
+
+    if (useImplicits) {
+      _countState = getHandle.getValueState[Long]("countState", TTLConfig.NONE)
+      _listState = getHandle.getListState[TestClass](
+        "listState", TTLConfig.NONE)
+      _mapState = getHandle.getMapState[POJOTestClass, String](
+        "mapState", Encoders.bean(classOf[POJOTestClass]), Encoders.STRING,
+        TTLConfig.NONE)
+    } else {
+      _countState = getHandle.getValueState[Long]("countState", Encoders.scalaLong,
+        TTLConfig.NONE)
+      _listState = getHandle.getListState[TestClass](
+        "listState", Encoders.product[TestClass], TTLConfig.NONE)
+      _mapState = getHandle.getMapState[POJOTestClass, String](
+        "mapState", Encoders.bean(classOf[POJOTestClass]), Encoders.STRING,
+        TTLConfig.NONE)
+    }
   }
 }
 
@@ -428,12 +442,14 @@ class SleepingTimerProcessor extends StatefulProcessor[String, String, String] {
 /**
  * Class that adds tests for transformWithState stateful streaming operator
  */
+@SlowSQLTest
 class TransformWithStateSuite extends StateStoreMetricsTest
-  with AlsoTestWithChangelogCheckpointingEnabled {
+  with AlsoTestWithRocksDBFeatures with AlsoTestWithEncodingTypes {
 
   import testImplicits._
 
-  test("transformWithState - streaming with rocksdb and invalid processor should fail") {
+  test("transformWithState - streaming with rocksdb and" +
+    " invalid processor should fail") {
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
       classOf[RocksDBStateStoreProvider].getName,
       SQLConf.SHUFFLE_PARTITIONS.key ->
@@ -688,7 +704,8 @@ class TransformWithStateSuite extends StateStoreMetricsTest
     }
   }
 
-  test("transformWithState - streaming with rocksdb and event time based timer") {
+  test("transformWithState - streaming with rocksdb and event " +
+  "time based timer") {
     val inputData = MemoryStream[(String, Int)]
     val result =
       inputData.toDS()
@@ -778,7 +795,8 @@ class TransformWithStateSuite extends StateStoreMetricsTest
     )
   }
 
-  test("Use statefulProcessor without transformWithState - handle should be absent") {
+  test("Use statefulProcessor without transformWithState -" +
+    " handle should be absent") {
     val processor = new RunningCountStatefulProcessor()
     val ex = intercept[Exception] {
       processor.getHandle
@@ -1034,84 +1052,87 @@ class TransformWithStateSuite extends StateStoreMetricsTest
     }
   }
 
-  test("transformWithState - verify StateSchemaV3 writes correct SQL schema of key/value") {
-    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
-      classOf[RocksDBStateStoreProvider].getName,
-      SQLConf.SHUFFLE_PARTITIONS.key ->
-        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
-      withTempDir { checkpointDir =>
-        val metadataPathPostfix = "state/0/_stateSchema/default"
-        val stateSchemaPath = new Path(checkpointDir.toString,
-          s"$metadataPathPostfix")
-        val hadoopConf = spark.sessionState.newHadoopConf()
-        val fm = CheckpointFileManager.create(stateSchemaPath, hadoopConf)
-
-        val keySchema = new StructType().add("value", StringType)
-        val schema0 = StateStoreColFamilySchema(
-          "countState",
-          keySchema,
-          new StructType().add("value", LongType, false),
-          Some(NoPrefixKeyStateEncoderSpec(keySchema)),
-          None
-        )
-        val schema1 = StateStoreColFamilySchema(
-          "listState",
-          keySchema,
-          new StructType()
+  Seq(false, true).foreach { useImplicits =>
+    test("transformWithState - verify StateSchemaV3 writes " +
+      s"correct SQL schema of key/value with useImplicits=$useImplicits") {
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName,
+        SQLConf.SHUFFLE_PARTITIONS.key ->
+          TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+        withTempDir { checkpointDir =>
+          val metadataPathPostfix = "state/0/_stateSchema/default"
+          val stateSchemaPath = new Path(checkpointDir.toString,
+            s"$metadataPathPostfix")
+          val hadoopConf = spark.sessionState.newHadoopConf()
+          val fm = CheckpointFileManager.create(stateSchemaPath, hadoopConf)
+
+          val keySchema = new StructType().add("value", StringType)
+          val schema0 = StateStoreColFamilySchema(
+            "countState",
+            keySchema,
+            new StructType().add("value", LongType, false),
+            Some(NoPrefixKeyStateEncoderSpec(keySchema)),
+            None
+          )
+          val schema1 = StateStoreColFamilySchema(
+            "listState",
+            keySchema,
+            new StructType()
               .add("id", LongType, false)
               .add("name", StringType),
-          Some(NoPrefixKeyStateEncoderSpec(keySchema)),
-          None
-        )
-
-        val userKeySchema = new StructType()
-          .add("id", IntegerType, false)
-          .add("name", StringType)
-        val compositeKeySchema = new StructType()
-          .add("key", new StructType().add("value", StringType))
-          .add("userKey", userKeySchema)
-        val schema2 = StateStoreColFamilySchema(
-          "mapState",
-          compositeKeySchema,
-          new StructType().add("value", StringType),
-          Some(PrefixKeyScanStateEncoderSpec(compositeKeySchema, 1)),
-          Option(userKeySchema)
-        )
-
-        val inputData = MemoryStream[String]
-        val result = inputData.toDS()
-          .groupByKey(x => x)
-          .transformWithState(new StatefulProcessorWithCompositeTypes(),
-            TimeMode.None(),
-            OutputMode.Update())
-
-        testStream(result, OutputMode.Update())(
-          StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
-          AddData(inputData, "a", "b"),
-          CheckNewAnswer(("a", "1"), ("b", "1")),
-          Execute { q =>
-            q.lastProgress.runId
-            val schemaFilePath = fm.list(stateSchemaPath).toSeq.head.getPath
-            val providerId = StateStoreProviderId(StateStoreId(
-              checkpointDir.getCanonicalPath, 0, 0), q.lastProgress.runId)
-            val checker = new StateSchemaCompatibilityChecker(providerId,
-              hadoopConf, Some(schemaFilePath))
-            val colFamilySeq = checker.readSchemaFile()
-
-            assert(TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS ==
-              q.lastProgress.stateOperators.head.customMetrics.get("numValueStateVars").toInt)
-            assert(TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS ==
-              q.lastProgress.stateOperators.head.customMetrics.get("numListStateVars").toInt)
-            assert(TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS ==
-              q.lastProgress.stateOperators.head.customMetrics.get("numMapStateVars").toInt)
-
-            assert(colFamilySeq.length == 3)
-            assert(colFamilySeq.map(_.toString).toSet == Set(
-              schema0, schema1, schema2
-            ).map(_.toString))
-          },
-          StopStream
-        )
+            Some(NoPrefixKeyStateEncoderSpec(keySchema)),
+            None
+          )
+
+          val userKeySchema = new StructType()
+            .add("id", IntegerType, false)
+            .add("name", StringType)
+          val compositeKeySchema = new StructType()
+            .add("key", new StructType().add("value", StringType))
+            .add("userKey", userKeySchema)
+          val schema2 = StateStoreColFamilySchema(
+            "mapState",
+            compositeKeySchema,
+            new StructType().add("value", StringType),
+            Some(PrefixKeyScanStateEncoderSpec(compositeKeySchema, 1)),
+            Option(userKeySchema)
+          )
+
+          val inputData = MemoryStream[String]
+          val result = inputData.toDS()
+            .groupByKey(x => x)
+            .transformWithState(new StatefulProcessorWithCompositeTypes(useImplicits),
+              TimeMode.None(),
+              OutputMode.Update())
+
+          testStream(result, OutputMode.Update())(
+            StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
+            AddData(inputData, "a", "b"),
+            CheckNewAnswer(("a", "1"), ("b", "1")),
+            Execute { q =>
+              q.lastProgress.runId
+              val schemaFilePath = fm.list(stateSchemaPath).toSeq.head.getPath
+              val providerId = StateStoreProviderId(StateStoreId(
+                checkpointDir.getCanonicalPath, 0, 0), q.lastProgress.runId)
+              val checker = new StateSchemaCompatibilityChecker(providerId,
+                hadoopConf, Some(schemaFilePath))
+              val colFamilySeq = checker.readSchemaFile()
+
+              assert(TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS ==
+                q.lastProgress.stateOperators.head.customMetrics.get("numValueStateVars").toInt)
+              assert(TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS ==
+                q.lastProgress.stateOperators.head.customMetrics.get("numListStateVars").toInt)
+              assert(TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS ==
+                q.lastProgress.stateOperators.head.customMetrics.get("numMapStateVars").toInt)
+
+              assert(colFamilySeq.length == 3)
+              assert(colFamilySeq.map(_.toString).toSet == Set(
+                schema0, schema1, schema2
+              ).map(_.toString))
+            },
+            StopStream
+          )
+        }
       }
     }
   }
@@ -1605,7 +1626,8 @@ class TransformWithStateSuite extends StateStoreMetricsTest
     }
   }
 
-  test("transformWithState - verify that schema file is kept after metadata is purged") {
+  test("transformWithState - verify that schema file " +
+    "is kept after metadata is purged") {
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
       classOf[RocksDBStateStoreProvider].getName,
       SQLConf.SHUFFLE_PARTITIONS.key ->
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateTTLTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateTTLTest.scala
index 2ddf69aa49e04..e1df2d640f1fc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateTTLTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateTTLTest.scala
@@ -21,7 +21,7 @@ import java.sql.Timestamp
 import java.time.Duration
 
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider
+import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithEncodingTypes, AlsoTestWithRocksDBFeatures, RocksDBStateStoreProvider}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
 
@@ -41,7 +41,8 @@ case class OutputEvent(
  * Test suite base for TransformWithState with TTL support.
  */
 abstract class TransformWithStateTTLTest
-  extends StreamTest {
+  extends StreamTest with AlsoTestWithRocksDBFeatures
+  with AlsoTestWithEncodingTypes {
   import testImplicits._
 
   def getProcessor(ttlConfig: TTLConfig): StatefulProcessor[String, InputEvent, OutputEvent]
@@ -143,18 +144,24 @@ abstract class TransformWithStateTTLTest
         AddData(inputStream, InputEvent("k1", "put", 1)),
         // advance clock to trigger processing
         AdvanceManualClock(1 * 1000),
+        // In the primary index, we should have that k1 -> [(1, 61000)].
+        // The TTL index has (61000, k1) -> empty. The min-expiry index has k1 -> 61000.
         CheckNewAnswer(),
+
         // get this state, and make sure we get unexpired value
         AddData(inputStream, InputEvent("k1", "get", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(OutputEvent("k1", 1, isTTLValue = false, -1)),
+
         // ensure ttl values were added correctly
         AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(OutputEvent("k1", 1, isTTLValue = true, 61000)),
+
         AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(OutputEvent("k1", -1, isTTLValue = true, 61000)),
+
         // advance clock and update expiration time
         AdvanceManualClock(30 * 1000),
         AddData(inputStream, InputEvent("k1", "put", 1)),
@@ -162,24 +169,30 @@ abstract class TransformWithStateTTLTest
         // advance clock to trigger processing
         AdvanceManualClock(1 * 1000),
         // validate value is not expired
+        //
+        // In the primary index, we still get that k1 -> [(1, 95000)].
+        // The TTL index should now have (95000, k1) -> empty, and the min-expiry index
+        // should have k1 -> 95000.
         CheckNewAnswer(OutputEvent("k1", 1, isTTLValue = false, -1)),
+
         // validate ttl value is updated in the state
         AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(OutputEvent("k1", 1, isTTLValue = true, 95000)),
-        // validate ttl state has both ttl values present
+
+        // validate ttl state has only the newer ttl value present
         AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1)),
         AdvanceManualClock(1 * 1000),
-        CheckNewAnswer(OutputEvent("k1", -1, isTTLValue = true, 61000),
-          OutputEvent("k1", -1, isTTLValue = true, 95000)
-        ),
-        // advance clock after older expiration value
+        CheckNewAnswer( OutputEvent("k1", -1, isTTLValue = true, 95000)),
+
+        // advance clock after original expiration value; this shouldn't do anything
         AdvanceManualClock(30 * 1000),
         // ensure unexpired value is still present in the state
         AddData(inputStream, InputEvent("k1", "get", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(OutputEvent("k1", 1, isTTLValue = false, -1)),
-        // validate that the older expiration value is removed from ttl state
+
+        // validate that the ttl index still has the newer value
         AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(OutputEvent("k1", -1, isTTLValue = true, 95000))
@@ -285,4 +298,59 @@ abstract class TransformWithStateTTLTest
       )
     }
   }
+
+  test("validate that clear only clears the current grouping key") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
+      val inputStream = MemoryStream[InputEvent]
+      val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
+      val result = inputStream.toDS()
+        .groupByKey(x => x.key)
+        .transformWithState(
+          getProcessor(ttlConfig),
+          TimeMode.ProcessingTime(),
+          OutputMode.Append())
+
+      val clock = new StreamManualClock
+      testStream(result)(
+        StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+        AddData(inputStream,
+          InputEvent("k1", "put", 1),
+          InputEvent("k2", "put", 2),
+          InputEvent("k3", "put", 3)
+        ),
+        // advance clock to trigger processing
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(),
+
+        AddData(
+          inputStream,
+          InputEvent("k1", "clear", -1),
+          InputEvent("k1", "get_ttl_value_from_state", -1),
+          InputEvent("k1", "get_values_in_ttl_state", -1)
+        ),
+        // advance clock to trigger processing
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(),
+
+        AddData(inputStream,
+          InputEvent("k2", "get_ttl_value_from_state", -1),
+          InputEvent("k2", "get_values_in_ttl_state", -1),
+
+          InputEvent("k3", "get_ttl_value_from_state", -1),
+          InputEvent("k3", "get_values_in_ttl_state", -1)
+        ),
+        // advance clock to trigger processing
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(
+          OutputEvent("k2", 2, isTTLValue = true, 61000),
+          OutputEvent("k2", -1, isTTLValue = true, 61000),
+
+          OutputEvent("k3", 3, isTTLValue = true, 61000),
+          OutputEvent("k3", -1, isTTLValue = true, 61000)
+        )
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithValueStateTTLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithValueStateTTLSuite.scala
index 21c3beb79314c..4c7f3a06ea7b9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithValueStateTTLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithValueStateTTLSuite.scala
@@ -55,10 +55,12 @@ object TTLInputProcessFunction {
     } else if (row.action == "put") {
       valueState.update(row.value)
     } else if (row.action == "get_values_in_ttl_state") {
-      val ttlValues = valueState.getValuesInTTLState()
+      val ttlValues = valueState.getValueInTTLState()
       ttlValues.foreach { v =>
         results = OutputEvent(key, -1, isTTLValue = true, ttlValue = v) :: results
       }
+    } else if (row.action == "clear") {
+      valueState.clear()
     }
 
     results.iterator
@@ -76,6 +78,8 @@ object TTLInputProcessFunction {
       }
     } else if (row.action == "put") {
       valueState.update(row.value)
+    } else if (row.action == "clear") {
+      valueState.clear()
     }
 
     results.iterator
@@ -262,7 +266,8 @@ class TransformWithValueStateTTLSuite extends TransformWithStateTTLTest {
     }
   }
 
-  test("verify StateSchemaV3 writes correct SQL schema of key/value and with TTL") {
+  test("verify StateSchemaV3 writes correct SQL " +
+    "schema of key/value and with TTL") {
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
       classOf[RocksDBStateStoreProvider].getName,
       SQLConf.SHUFFLE_PARTITIONS.key ->
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index fe5a0f8ee257a..c93f17701c620 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -41,10 +41,11 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.PlanTestBase
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.classic.{ClassicConversions, ColumnConversions}
 import org.apache.spark.sql.execution.FilterExec
 import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecution
 import org.apache.spark.sql.execution.datasources.DataSourceUtils
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{ColumnNodeToExpressionConverter, SQLConf}
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.UninterruptibleThread
 import org.apache.spark.util.Utils
@@ -239,9 +240,12 @@ private[sql] trait SQLTestUtilsBase
    * This is because we create the `SparkSession` immediately before the first test is run,
    * but the implicits import is needed in the constructor.
    */
-  protected object testImplicits extends SQLImplicits {
+  protected object testImplicits
+    extends SQLImplicits
+      with ClassicConversions
+      with ColumnConversions {
     override protected def session: SparkSession = self.spark
-    implicit def toRichColumn(c: Column): SparkSession#RichColumn = session.RichColumn(c)
+    override protected def converter: ColumnNodeToExpressionConverter = self.spark.converter
   }
 
   protected override def withSQLConf[T](pairs: (String, String)*)(f: => T): T = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
index be91f5e789e2c..7e6f10bcc46f0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
@@ -22,7 +22,7 @@ import java.lang.{Long => JLong}
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark._
-import org.apache.spark.sql.{functions, Dataset, QueryTest, Row, SparkSession}
+import org.apache.spark.sql.{functions, Dataset, Encoder, Encoders, QueryTest, Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project}
 import org.apache.spark.sql.execution.{QueryExecution, WholeStageCodegenExec}
@@ -30,6 +30,7 @@ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, LeafRunnableCommand}
 import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
+import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StringType
@@ -339,6 +340,51 @@ class DataFrameCallbackSuite extends QueryTest
     }
   }
 
+  test("SPARK-50581: support observe with udaf") {
+    withUserDefinedFunction(("someUdaf", true)) {
+      spark.udf.register("someUdaf", functions.udaf(new Aggregator[JLong, JLong, JLong] {
+        def zero: JLong = 0L
+        def reduce(b: JLong, a: JLong): JLong = a + b
+        def merge(b1: JLong, b2: JLong): JLong = b1 + b2
+        def finish(r: JLong): JLong = r
+        def bufferEncoder: Encoder[JLong] = Encoders.LONG
+        def outputEncoder: Encoder[JLong] = Encoders.LONG
+      }))
+
+      val df = spark.range(100)
+
+      val metricMaps = ArrayBuffer.empty[Map[String, Row]]
+      val listener = new QueryExecutionListener {
+        override def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = {
+          if (qe.observedMetrics.nonEmpty) {
+            metricMaps += qe.observedMetrics
+          }
+        }
+
+        override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {
+          // No-op
+        }
+      }
+      try {
+        spark.listenerManager.register(listener)
+
+        // udaf usage in observe is not working (serialization exception)
+        df.observe(
+            name = "my_metrics",
+            expr("someUdaf(id)").as("agg")
+          )
+          .collect()
+
+        sparkContext.listenerBus.waitUntilEmpty()
+        assert(metricMaps.size === 1)
+        assert(metricMaps.head("my_metrics") === Row(4950L))
+
+      } finally {
+        spark.listenerManager.unregister(listener)
+      }
+    }
+  }
+
   private def validateObservedMetrics(df: Dataset[JLong]): Unit = {
     val metricMaps = ArrayBuffer.empty[Map[String, Row]]
     val listener = new QueryExecutionListener {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/vectorized/ArrowColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/vectorized/ArrowColumnVectorSuite.scala
index 436cea50ad972..9180ce1aee198 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/vectorized/ArrowColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/vectorized/ArrowColumnVectorSuite.scala
@@ -515,4 +515,28 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
     columnVector.close()
     allocator.close()
   }
+
+  test("struct with TimestampNTZType") {
+    val allocator = ArrowUtils.rootAllocator.newChildAllocator("struct", 0, Long.MaxValue)
+    val schema = new StructType().add("ts", TimestampNTZType)
+    val vector = ArrowUtils.toArrowField("struct", schema, nullable = true, null)
+      .createVector(allocator).asInstanceOf[StructVector]
+    vector.allocateNew()
+    val timestampVector = vector.getChildByOrdinal(0).asInstanceOf[TimeStampMicroVector]
+
+    vector.setIndexDefined(0)
+    timestampVector.setSafe(0, 1000L)
+
+    timestampVector.setValueCount(1)
+    vector.setValueCount(1)
+
+    val columnVector = new ArrowColumnVector(vector)
+    assert(columnVector.dataType === schema)
+
+    val row0 = columnVector.getStruct(0)
+    assert(row0.get(0, TimestampNTZType) === 1000L)
+
+    columnVector.close()
+    allocator.close()
+  }
 }
diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py
index b69a903b44f90..4db22ff3b8e46 100644
--- a/sql/gen-sql-config-docs.py
+++ b/sql/gen-sql-config-docs.py
@@ -103,6 +103,14 @@ def generate_sql_configs_table_html(sql_configs, path):
                     )
                 )
 
+            if config.name == "spark.sql.files.ignoreInvalidPartitionPaths":
+                description = config.description.replace("<", "&lt;").replace(">", "&gt;")
+            elif config.name == "spark.sql.hive.quoteHiveStructFieldName":
+                description = config.description.replace(
+                    "<", "&lt;").replace(">", "&gt;").replace("`", "&#96;")
+            else:
+                description = config.description
+
             f.write(dedent(
                 """
                 <tr>
@@ -115,7 +123,7 @@ def generate_sql_configs_table_html(sql_configs, path):
                 .format(
                     name=config.name,
                     default=default,
-                    description=markdown.markdown(config.description),
+                    description=markdown.markdown(description),
                     version=config.version
                 )
             ))
diff --git a/sql/hive-thriftserver/src/test/resources/log4j2.properties b/sql/hive-thriftserver/src/test/resources/log4j2.properties
index ebb3a6ccb2fca..e6753047c9055 100644
--- a/sql/hive-thriftserver/src/test/resources/log4j2.properties
+++ b/sql/hive-thriftserver/src/test/resources/log4j2.properties
@@ -32,12 +32,6 @@ appender.console.filter.1.type = Filters
 appender.console.filter.1.a.type = ThresholdFilter
 appender.console.filter.1.a.level = warn
 
-# SPARK-34128: Suppress undesirable TTransportException warnings, due to THRIFT-4805
-appender.console.filter.1.b.type = RegexFilter
-appender.console.filter.1.b.regex = .*Thrift error occurred during processing of message.*
-appender.console.filter.1.b.onMatch = deny
-appender.console.filter.1.b.onMismatch = neutral
-
 #File Appender
 appender.file.type = File
 appender.file.name = File
@@ -47,14 +41,9 @@ appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex
 
 appender.file.filter.1.type = Filters
 
-appender.file.filter.1.a.type = RegexFilter
-appender.file.filter.1.a.regex = .*Thrift error occurred during processing of message.*
-appender.file.filter.1.a.onMatch = deny
-appender.file.filter.1.a.onMismatch = neutral
-
 # Set the logger level of File Appender to WARN
-appender.file.filter.1.b.type = ThresholdFilter
-appender.file.filter.1.b.level = debug
+appender.file.filter.1.a.type = ThresholdFilter
+appender.file.filter.1.a.level = debug
 
 # Some packages are noisy for no good reason.
 logger.parquet_recordreader.name = org.apache.parquet.hadoop.ParquetRecordReader
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index 662f43fc00399..bc367d0cc856b 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -94,6 +94,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
     // SPARK-28636
     "decimalArithmeticOperations.sql",
     "literals.sql",
+    "random.sql",
     "subquery/scalar-subquery/scalar-subquery-predicate.sql",
     "subquery/in-subquery/in-limit.sql",
     "subquery/in-subquery/in-group-by.sql",
@@ -104,6 +105,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
     "timestampNTZ/datetime-special-ansi.sql",
     // SPARK-47264
     "collations.sql",
+    "listagg-collations.sql",
     "pipe-operators.sql",
     // VARIANT type
     "variant/named-function-arguments.sql"
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index d7645a3c84692..5152c2193499a 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -214,7 +214,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
       val sessionHandle = client.openSession(user, "")
       val infoValue = client.getInfo(sessionHandle, GetInfoType.CLI_ODBC_KEYWORDS)
       // scalastyle:off line.size.limit
-      assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,END,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXECUTE,EXISTS,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GLOBAL,GRANT,GROUP,GROUPING,HAVING,HOUR,HOURS,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MERGE,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NO,NONE,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,REDUCE,REFERENCES,REFRESH,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,START,STATISTICS,STORED,STRATIFY,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,X,YEAR,YEARS,ZONE")
+      assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,END,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXECUTE,EXISTS,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GLOBAL,GRANT,GROUP,GROUPING,HAVING,HOUR,HOURS,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,JSON,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MERGE,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NO,NONE,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,RECURSIVE,REDUCE,REFERENCES,REFRESH,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,START,STATISTICS,STORED,STRATIFY,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,X,YEAR,YEARS,ZONE")
       // scalastyle:on line.size.limit
     }
   }
diff --git a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt
index 6e8c140c72dcc..4c44860c4618a 100644
--- a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt
+++ b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt
@@ -1,11 +1,11 @@
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 insert hive table benchmark:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-INSERT INTO DYNAMIC                                3695           3849         218          0.0      360836.1       1.0X
-INSERT INTO HYBRID                                  536            551          17          0.0       52374.2       6.9X
-INSERT INTO STATIC                                  151            177          15          0.1       14737.4      24.5X
-INSERT OVERWRITE DYNAMIC                           3057           3228         241          0.0      298536.0       1.2X
-INSERT OVERWRITE HYBRID                             455            467          15          0.0       44443.5       8.1X
-INSERT OVERWRITE STATIC                             173            180           4          0.1       16911.3      21.3X
+INSERT INTO DYNAMIC                                3480           3775         417          0.0      339817.0       1.0X
+INSERT INTO HYBRID                                  562            581          13          0.0       54901.2       6.2X
+INSERT INTO STATIC                                  157            174          14          0.1       15316.1      22.2X
+INSERT OVERWRITE DYNAMIC                           2961           3195         331          0.0      289121.3       1.2X
+INSERT OVERWRITE HYBRID                             426            431           6          0.0       41557.2       8.2X
+INSERT OVERWRITE STATIC                             161            168           5          0.1       15682.4      21.7X
 
diff --git a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk21-hive2.3-results.txt b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk21-hive2.3-results.txt
index 7a901f75ddb35..38e3b10eb5d00 100644
--- a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk21-hive2.3-results.txt
+++ b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk21-hive2.3-results.txt
@@ -1,11 +1,11 @@
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 insert hive table benchmark:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-INSERT INTO DYNAMIC                                3762           3968         292          0.0      367406.9       1.0X
-INSERT INTO HYBRID                                  516            591          80          0.0       50355.2       7.3X
-INSERT INTO STATIC                                  168            192          24          0.1       16403.7      22.4X
-INSERT OVERWRITE DYNAMIC                           3524           3643         169          0.0      344143.1       1.1X
-INSERT OVERWRITE HYBRID                             493            510          13          0.0       48137.8       7.6X
-INSERT OVERWRITE STATIC                             178            190          14          0.1       17346.8      21.2X
+INSERT INTO DYNAMIC                                3406           3754         493          0.0      332568.8       1.0X
+INSERT INTO HYBRID                                  496            523          22          0.0       48481.9       6.9X
+INSERT INTO STATIC                                  146            168          20          0.1       14228.9      23.4X
+INSERT OVERWRITE DYNAMIC                           3031           3148         166          0.0      295998.0       1.1X
+INSERT OVERWRITE HYBRID                             419            444          26          0.0       40901.7       8.1X
+INSERT OVERWRITE STATIC                             158            169          12          0.1       15420.2      21.6X
 
diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk21-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk21-results.txt
index f185c50f929bf..9f7cd0bfd8762 100644
--- a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk21-results.txt
+++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk21-results.txt
@@ -2,44 +2,44 @@
 Hive UDAF vs Spark AF
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 hive udaf vs spark af:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hive udaf w/o group by                             3232           3292          46          0.0       49313.1       1.0X
-spark af w/o group by                                20             26           4          3.3         303.1     162.7X
-hive udaf w/ group by                              2002           2055          40          0.0       30540.8       1.6X
-spark af w/ group by w/o fallback                    22             25           3          3.0         334.3     147.5X
-spark af w/ group by w/ fallback                     25             27           3          2.7         376.5     131.0X
+hive udaf w/o group by                             3071           3289         131          0.0       46855.4       1.0X
+spark af w/o group by                                21             27           5          3.2         315.4     148.6X
+hive udaf w/ group by                              2138           2161          26          0.0       32618.6       1.4X
+spark af w/ group by w/o fallback                    22             26           5          3.0         338.3     138.5X
+spark af w/ group by w/ fallback                     26             30           7          2.5         395.2     118.6X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - typed_count
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                              23962          24276         445          4.4         228.5       1.0X
-object agg w/ group by w/o fallback                7346           7389          41         14.3          70.1       3.3X
-object agg w/ group by w/ fallback                15904          16415         443          6.6         151.7       1.5X
-sort agg w/o group by                              4041           4060          17         26.0          38.5       5.9X
-object agg w/o group by w/o fallback               3872           3914          42         27.1          36.9       6.2X
+sort agg w/ group by                              23012          23051          55          4.6         219.5       1.0X
+object agg w/ group by w/o fallback                6670           7292         278         15.7          63.6       3.5X
+object agg w/ group by w/ fallback                15467          15512          48          6.8         147.5       1.5X
+sort agg w/o group by                              4075           4142          34         25.7          38.9       5.6X
+object agg w/o group by w/o fallback               3715           3810          67         28.2          35.4       6.2X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - percentile_approx
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                                417            449          16          5.0         198.9       1.0X
-object agg w/ group by w/o fallback                 328            339           5          6.4         156.5       1.3X
-object agg w/ group by w/ fallback                  467            501          15          4.5         222.4       0.9X
-sort agg w/o group by                               274            283           6          7.6         130.8       1.5X
-object agg w/o group by w/o fallback                271            277           3          7.7         129.3       1.5X
+sort agg w/ group by                                412            436          14          5.1         196.4       1.0X
+object agg w/ group by w/o fallback                 324            333           6          6.5         154.7       1.3X
+object agg w/ group by w/ fallback                  414            421           7          5.1         197.3       1.0X
+sort agg w/o group by                               238            242           3          8.8         113.5       1.7X
+object agg w/o group by w/o fallback                227            234           6          9.2         108.4       1.8X
 
 
diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
index fb426c84414ba..1e143f39fbf91 100644
--- a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
+++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
@@ -2,44 +2,44 @@
 Hive UDAF vs Spark AF
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 hive udaf vs spark af:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hive udaf w/o group by                             3271           3305          29          0.0       49904.4       1.0X
-spark af w/o group by                                21             26           4          3.2         316.4     157.7X
-hive udaf w/ group by                              2070           2109          30          0.0       31591.0       1.6X
-spark af w/ group by w/o fallback                    22             26           3          3.0         335.8     148.6X
-spark af w/ group by w/ fallback                     25             27           3          2.6         379.4     131.5X
+hive udaf w/o group by                             3797           3861          45          0.0       57941.7       1.0X
+spark af w/o group by                                21             27           5          3.2         314.9     184.0X
+hive udaf w/ group by                              2569           2575          11          0.0       39194.8       1.5X
+spark af w/ group by w/o fallback                    22             26           3          3.0         333.7     173.6X
+spark af w/ group by w/ fallback                     25             28           3          2.6         388.1     149.3X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - typed_count
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                              24310          24337          39          4.3         231.8       1.0X
-object agg w/ group by w/o fallback                6916           7223         137         15.2          66.0       3.5X
-object agg w/ group by w/ fallback                14558          14693         128          7.2         138.8       1.7X
-sort agg w/o group by                              4079           4125          48         25.7          38.9       6.0X
-object agg w/o group by w/o fallback               3577           3608          22         29.3          34.1       6.8X
+sort agg w/ group by                              24523          24678         220          4.3         233.9       1.0X
+object agg w/ group by w/o fallback                6979           7355         177         15.0          66.6       3.5X
+object agg w/ group by w/ fallback                14572          14619          33          7.2         139.0       1.7X
+sort agg w/o group by                              4265           4283          19         24.6          40.7       5.7X
+object agg w/o group by w/o fallback               3614           3660          29         29.0          34.5       6.8X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - percentile_approx
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                                403            412           6          5.2         192.3       1.0X
-object agg w/ group by w/o fallback                 341            347           5          6.1         162.7       1.2X
-object agg w/ group by w/ fallback                  469            473           4          4.5         223.6       0.9X
-sort agg w/o group by                               304            310           4          6.9         144.9       1.3X
-object agg w/o group by w/o fallback                297            305           3          7.1         141.4       1.4X
+sort agg w/ group by                                413            422           7          5.1         196.8       1.0X
+object agg w/ group by w/o fallback                 335            343           4          6.3         159.6       1.2X
+object agg w/ group by w/ fallback                  446            453           5          4.7         212.7       0.9X
+sort agg w/o group by                               274            280           4          7.7         130.6       1.5X
+object agg w/o group by w/o fallback                266            273           4          7.9         126.9       1.6X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-jdk21-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-jdk21-results.txt
index b941571563401..25ba0a0602b47 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-jdk21-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-jdk21-results.txt
@@ -2,221 +2,221 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   675            696          17         23.3          42.9       1.0X
-Native ORC MR                                       745            759          24         21.1          47.3       0.9X
-Native ORC Vectorized                                91            118           9        172.4           5.8       7.4X
+Hive built-in ORC                                   711            756          43         22.1          45.2       1.0X
+Native ORC MR                                       762            842          92         20.7          48.4       0.9X
+Native ORC Vectorized                                94            115          17        167.8           6.0       7.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   680            728          47         23.1          43.3       1.0X
-Native ORC MR                                       726            755          25         21.7          46.1       0.9X
-Native ORC Vectorized                                83             99          11        190.0           5.3       8.2X
+Hive built-in ORC                                   693            722          32         22.7          44.0       1.0X
+Native ORC MR                                       738            767          35         21.3          46.9       0.9X
+Native ORC Vectorized                                81            100          15        193.2           5.2       8.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   696            716          28         22.6          44.3       1.0X
-Native ORC MR                                       741            766          32         21.2          47.1       0.9X
-Native ORC Vectorized                                86             98          12        181.9           5.5       8.0X
+Hive built-in ORC                                   776            792          27         20.3          49.3       1.0X
+Native ORC MR                                       895            907          18         17.6          56.9       0.9X
+Native ORC Vectorized                               102            120          14        154.7           6.5       7.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   720            729          14         21.9          45.8       1.0X
-Native ORC MR                                       766            783          16         20.5          48.7       0.9X
-Native ORC Vectorized                                92            108          11        171.7           5.8       7.9X
+Hive built-in ORC                                   831            857          34         18.9          52.9       1.0X
+Native ORC MR                                       938            996          55         16.8          59.6       0.9X
+Native ORC Vectorized                               100            116          22        157.1           6.4       8.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   754            792          65         20.9          47.9       1.0X
-Native ORC MR                                       861            879          27         18.3          54.7       0.9X
-Native ORC Vectorized                               147            164          13        107.3           9.3       5.1X
+Hive built-in ORC                                   768            806          36         20.5          48.8       1.0X
+Native ORC MR                                       950            972          25         16.6          60.4       0.8X
+Native ORC Vectorized                               139            160          34        113.4           8.8       5.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   826            833           6         19.0          52.5       1.0X
-Native ORC MR                                       947            975          43         16.6          60.2       0.9X
-Native ORC Vectorized                               218            234          24         72.0          13.9       3.8X
+Hive built-in ORC                                   914            959          38         17.2          58.1       1.0X
+Native ORC MR                                       994           1007          18         15.8          63.2       0.9X
+Native ORC Vectorized                               223            240          31         70.6          14.2       4.1X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1632           1653          30          6.4         155.6       1.0X
-Native ORC MR                                      1523           1528           8          6.9         145.2       1.1X
-Native ORC Vectorized                               610            643          24         17.2          58.2       2.7X
+Hive built-in ORC                                  1770           1819          69          5.9         168.8       1.0X
+Native ORC MR                                      1606           1611           6          6.5         153.2       1.1X
+Native ORC Vectorized                               606            646          44         17.3          57.8       2.9X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column - Hive built-in ORC                     937            953          14         16.8          59.6       1.0X
-Data column - Native ORC MR                         988           1040          73         15.9          62.8       0.9X
-Data column - Native ORC Vectorized                  89            107          13        177.2           5.6      10.6X
-Partition column - Hive built-in ORC                640            690          55         24.6          40.7       1.5X
-Partition column - Native ORC MR                    695            708          16         22.6          44.2       1.3X
-Partition column - Native ORC Vectorized             38             49           9        416.8           2.4      24.8X
-Both columns - Hive built-in ORC                    978           1015          42         16.1          62.2       1.0X
-Both columns - Native ORC MR                       1055           1076          29         14.9          67.1       0.9X
-Both columns - Native ORC Vectorized                102            125          24        153.8           6.5       9.2X
+Data column - Hive built-in ORC                     989           1049          85         15.9          62.8       1.0X
+Data column - Native ORC MR                        1076           1078           2         14.6          68.4       0.9X
+Data column - Native ORC Vectorized                 103            143          29        152.9           6.5       9.6X
+Partition column - Hive built-in ORC                648            687          35         24.3          41.2       1.5X
+Partition column - Native ORC MR                    680            716          32         23.1          43.2       1.5X
+Partition column - Native ORC Vectorized             36             55          17        431.5           2.3      27.1X
+Both columns - Hive built-in ORC                    993           1019          38         15.8          63.1       1.0X
+Both columns - Native ORC MR                       1137           1173          51         13.8          72.3       0.9X
+Both columns - Native ORC Vectorized                138            188          25        114.1           8.8       7.2X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   928            944          14         11.3          88.5       1.0X
-Native ORC MR                                       711            733          25         14.8          67.8       1.3X
-Native ORC Vectorized                               127            139          19         82.9          12.1       7.3X
+Hive built-in ORC                                   947            974          24         11.1          90.3       1.0X
+Native ORC MR                                       934            950          18         11.2          89.0       1.0X
+Native ORC Vectorized                               127            132           6         82.8          12.1       7.5X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1539           1597          83          6.8         146.7       1.0X
-Native ORC MR                                      1223           1232          12          8.6         116.7       1.3X
-Native ORC Vectorized                               286            320          27         36.6          27.3       5.4X
+Hive built-in ORC                                  1476           1489          17          7.1         140.8       1.0X
+Native ORC MR                                      1310           1328          25          8.0         125.0       1.1X
+Native ORC Vectorized                               308            350          29         34.1          29.3       4.8X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1381           1397          22          7.6         131.7       1.0X
-Native ORC MR                                      1112           1124          17          9.4         106.0       1.2X
-Native ORC Vectorized                               363            394          30         28.9          34.6       3.8X
+Hive built-in ORC                                  1258           1259           2          8.3         119.9       1.0X
+Native ORC MR                                      1168           1173           7          9.0         111.4       1.1X
+Native ORC Vectorized                               362            408          50         29.0          34.5       3.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   733            751          24         14.3          69.9       1.0X
-Native ORC MR                                       742            771          48         14.1          70.8       1.0X
-Native ORC Vectorized                               148            171          26         70.8          14.1       5.0X
+Hive built-in ORC                                   749            774          43         14.0          71.4       1.0X
+Native ORC MR                                       797            830          51         13.2          76.0       0.9X
+Native ORC Vectorized                               148            168          22         71.1          14.1       5.1X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   562            588          25          1.9         536.0       1.0X
-Native ORC MR                                        87            109          15         12.0          83.3       6.4X
-Native ORC Vectorized                                30             37           6         34.9          28.7      18.7X
+Hive built-in ORC                                   558            611          65          1.9         532.2       1.0X
+Native ORC MR                                        90            110          25         11.7          85.5       6.2X
+Native ORC Vectorized                                32             40          10         33.1          30.2      17.6X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1022           1040          26          1.0         974.3       1.0X
-Native ORC MR                                       100            114          11         10.5          95.2      10.2X
-Native ORC Vectorized                                37             44           7         28.6          35.0      27.8X
+Hive built-in ORC                                  1029           1033           5          1.0         981.4       1.0X
+Native ORC MR                                        98            117          20         10.7          93.2      10.5X
+Native ORC Vectorized                                39             50           9         26.7          37.5      26.2X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1522           1617         134          0.7        1451.1       1.0X
-Native ORC MR                                       104            114           9         10.1          99.4      14.6X
-Native ORC Vectorized                                49             65          12         21.4          46.7      31.1X
+Hive built-in ORC                                  1512           1536          35          0.7        1441.8       1.0X
+Native ORC MR                                       106            128          17          9.9         101.1      14.3X
+Native ORC Vectorized                                46             67          17         22.9          43.7      33.0X
 
 
 ================================================================================================
 Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   285            321          35          3.7         272.0       1.0X
-Native ORC MR                                       208            274          55          5.1         198.0       1.4X
-Native ORC Vectorized                                97            119          25         10.8          92.8       2.9X
+Hive built-in ORC                                   324            369          49          3.2         309.0       1.0X
+Native ORC MR                                       213            245          34          4.9         203.5       1.5X
+Native ORC Vectorized                                99            118          19         10.6          94.1       3.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 100 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   1963           2005          59          0.5        1871.9       1.0X
-Native ORC MR                                       1612           1677          92          0.7        1537.5       1.2X
-Native ORC Vectorized                                859            944          92          1.2         819.4       2.3X
+Hive built-in ORC                                   2169           2204          48          0.5        2069.0       1.0X
+Native ORC MR                                       1765           1841         107          0.6        1683.6       1.2X
+Native ORC Vectorized                                858            940         100          1.2         818.1       2.5X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 300 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   5793           5868         107          0.2        5524.2       1.0X
-Native ORC MR                                       5247           5321         105          0.2        5003.5       1.1X
-Native ORC Vectorized                               5404           5425          30          0.2        5153.5       1.1X
+Hive built-in ORC                                   6111           6228         166          0.2        5828.0       1.0X
+Native ORC MR                                       5474           5540          93          0.2        5220.3       1.1X
+Native ORC Vectorized                               5605           5658          74          0.2        5345.5       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 600 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  12664          12690          37          0.1       12077.5       1.0X
-Native ORC MR                                      12398          12513         162          0.1       11823.9       1.0X
-Native ORC Vectorized                              12552          12553           1          0.1       11970.4       1.0X
+Hive built-in ORC                                  13063          13093          42          0.1       12458.1       1.0X
+Native ORC MR                                      12754          12782          39          0.1       12163.1       1.0X
+Native ORC Vectorized                              13004          13082         111          0.1       12401.2       1.0X
 
 
 ================================================================================================
 Nested Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 10 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        1981           2003          30          0.5        1889.3       1.0X
-Native ORC MR                                            2095           2133          54          0.5        1997.9       0.9X
-Native ORC Vectorized                                     564            605          45          1.9         537.6       3.5X
+Hive built-in ORC                                        2130           2182          73          0.5        2031.7       1.0X
+Native ORC MR                                            2179           2290         156          0.5        2078.2       1.0X
+Native ORC Vectorized                                     568            575           7          1.8         541.9       3.7X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 30 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        5412           5426          21          0.2        5161.0       1.0X
-Native ORC MR                                            4556           4639         117          0.2        4345.2       1.2X
-Native ORC Vectorized                                    1478           1506          39          0.7        1409.7       3.7X
+Hive built-in ORC                                        5890           5894           5          0.2        5617.5       1.0X
+Native ORC MR                                            5089           5121          45          0.2        4853.2       1.2X
+Native ORC Vectorized                                    1512           1550          53          0.7        1442.1       3.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 10 Elements, 30 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        5018           5079          87          0.2        4785.1       1.0X
-Native ORC MR                                            5380           5388          11          0.2        5130.5       0.9X
-Native ORC Vectorized                                    1975           2012          52          0.5        1883.8       2.5X
+Hive built-in ORC                                        5276           5277           2          0.2        5031.7       1.0X
+Native ORC MR                                            5272           5293          29          0.2        5027.8       1.0X
+Native ORC Vectorized                                    1906           1913           9          0.6        1818.0       2.8X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
index 64d738858b1a2..7eca721b2d23d 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
@@ -2,221 +2,221 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   724            754          46         21.7          46.0       1.0X
-Native ORC MR                                       838            865          38         18.8          53.3       0.9X
-Native ORC Vectorized                                83            104          10        188.5           5.3       8.7X
+Hive built-in ORC                                   738            797          51         21.3          46.9       1.0X
+Native ORC MR                                       814            860          41         19.3          51.8       0.9X
+Native ORC Vectorized                               112            127          13        140.1           7.1       6.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   709            746          43         22.2          45.1       1.0X
-Native ORC MR                                       791            822          28         19.9          50.3       0.9X
-Native ORC Vectorized                                85            101           9        184.6           5.4       8.3X
+Hive built-in ORC                                   640            730          82         24.6          40.7       1.0X
+Native ORC MR                                       713            744          35         22.0          45.4       0.9X
+Native ORC Vectorized                                91            110          15        173.3           5.8       7.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   766            777          17         20.5          48.7       1.0X
-Native ORC MR                                       772            801          25         20.4          49.1       1.0X
-Native ORC Vectorized                                89             98           6        177.0           5.7       8.6X
+Hive built-in ORC                                   673            679          10         23.4          42.8       1.0X
+Native ORC MR                                       787            816          29         20.0          50.0       0.9X
+Native ORC Vectorized                                91            103           9        172.3           5.8       7.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   755            762           6         20.8          48.0       1.0X
-Native ORC MR                                       811            818          10         19.4          51.6       0.9X
-Native ORC Vectorized                                87            101          11        181.7           5.5       8.7X
+Hive built-in ORC                                   648            662          11         24.3          41.2       1.0X
+Native ORC MR                                       749            768          20         21.0          47.6       0.9X
+Native ORC Vectorized                                88            103          11        178.9           5.6       7.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   775            794          18         20.3          49.3       1.0X
-Native ORC MR                                       847            857           9         18.6          53.9       0.9X
-Native ORC Vectorized                               141            157          17        111.6           9.0       5.5X
+Hive built-in ORC                                   715            728          23         22.0          45.4       1.0X
+Native ORC MR                                       785            801          14         20.0          49.9       0.9X
+Native ORC Vectorized                               140            149           8        112.4           8.9       5.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   867            875           7         18.1          55.1       1.0X
-Native ORC MR                                       914            940          22         17.2          58.1       0.9X
-Native ORC Vectorized                               219            232          15         71.8          13.9       4.0X
+Hive built-in ORC                                   785            794           9         20.0          49.9       1.0X
+Native ORC MR                                       871            899          43         18.1          55.4       0.9X
+Native ORC Vectorized                               221            239          24         71.2          14.0       3.6X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1588           1596          12          6.6         151.4       1.0X
-Native ORC MR                                      1563           1567           6          6.7         149.1       1.0X
-Native ORC Vectorized                               628            676          63         16.7          59.8       2.5X
+Hive built-in ORC                                  1494           1514          28          7.0         142.5       1.0X
+Native ORC MR                                      1427           1433           8          7.3         136.1       1.0X
+Native ORC Vectorized                               602            610          12         17.4          57.4       2.5X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column - Hive built-in ORC                    1117           1162          63         14.1          71.0       1.0X
-Data column - Native ORC MR                        1293           1306          17         12.2          82.2       0.9X
-Data column - Native ORC Vectorized                  91            103          10        173.7           5.8      12.3X
-Partition column - Hive built-in ORC                717            722           7         21.9          45.6       1.6X
-Partition column - Native ORC MR                    633            673          42         24.8          40.3       1.8X
-Partition column - Native ORC Vectorized             37             50           7        419.5           2.4      29.8X
-Both columns - Hive built-in ORC                    948           1010          69         16.6          60.3       1.2X
-Both columns - Native ORC MR                       1102           1109          10         14.3          70.1       1.0X
-Both columns - Native ORC Vectorized                105            121          13        149.7           6.7      10.6X
+Data column - Hive built-in ORC                     803            826          24         19.6          51.0       1.0X
+Data column - Native ORC MR                         910            941          39         17.3          57.9       0.9X
+Data column - Native ORC Vectorized                  91            105          11        172.8           5.8       8.8X
+Partition column - Hive built-in ORC                589            612          19         26.7          37.5       1.4X
+Partition column - Native ORC MR                    616            640          20         25.5          39.2       1.3X
+Partition column - Native ORC Vectorized             37             49           8        422.4           2.4      21.6X
+Both columns - Hive built-in ORC                    935            945          12         16.8          59.4       0.9X
+Both columns - Native ORC MR                        994           1000           9         15.8          63.2       0.8X
+Both columns - Native ORC Vectorized                103            123          15        153.0           6.5       7.8X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   904            909           5         11.6          86.2       1.0X
-Native ORC MR                                       804            812           7         13.0          76.7       1.1X
-Native ORC Vectorized                               128            148          19         82.0          12.2       7.1X
+Hive built-in ORC                                   830            846          27         12.6          79.1       1.0X
+Native ORC MR                                       747            751           3         14.0          71.3       1.1X
+Native ORC Vectorized                               131            147          15         80.0          12.5       6.3X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1429           1453          33          7.3         136.3       1.0X
-Native ORC MR                                      1288           1291           4          8.1         122.9       1.1X
-Native ORC Vectorized                               294            300           6         35.7          28.0       4.9X
+Hive built-in ORC                                  1475           1479           5          7.1         140.7       1.0X
+Native ORC MR                                      1230           1236           7          8.5         117.3       1.2X
+Native ORC Vectorized                               294            311          21         35.6          28.1       5.0X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1282           1290          13          8.2         122.2       1.0X
-Native ORC MR                                      1195           1199           7          8.8         113.9       1.1X
-Native ORC Vectorized                               346            382          45         30.3          33.0       3.7X
+Hive built-in ORC                                  1243           1244           2          8.4         118.5       1.0X
+Native ORC MR                                      1157           1166          13          9.1         110.3       1.1X
+Native ORC Vectorized                               349            377          20         30.0          33.3       3.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   771            803          29         13.6          73.5       1.0X
-Native ORC MR                                       776            784          12         13.5          74.0       1.0X
-Native ORC Vectorized                               149            166          13         70.4          14.2       5.2X
+Hive built-in ORC                                   689            715          35         15.2          65.7       1.0X
+Native ORC MR                                       762            767           5         13.8          72.6       0.9X
+Native ORC Vectorized                               149            174          20         70.5          14.2       4.6X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   400            431          29          2.6         381.3       1.0X
-Native ORC MR                                        89            102          11         11.8          84.9       4.5X
-Native ORC Vectorized                                32             38           6         33.3          30.1      12.7X
+Hive built-in ORC                                   447            510          71          2.3         426.4       1.0X
+Native ORC MR                                        86            101          11         12.1          82.4       5.2X
+Native ORC Vectorized                                32             38           6         33.0          30.3      14.1X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   742            748           8          1.4         707.9       1.0X
-Native ORC MR                                        95            108          12         11.0          90.6       7.8X
-Native ORC Vectorized                                38             44           5         27.8          36.0      19.7X
+Hive built-in ORC                                   733            744          19          1.4         698.9       1.0X
+Native ORC MR                                        94            109          10         11.1          89.8       7.8X
+Native ORC Vectorized                                38             46           7         27.7          36.1      19.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1056           1100          62          1.0        1007.5       1.0X
-Native ORC MR                                       104            114           8         10.1          99.4      10.1X
-Native ORC Vectorized                                47             54           5         22.5          44.5      22.7X
+Hive built-in ORC                                  1079           1079           0          1.0        1028.8       1.0X
+Native ORC MR                                       103            118          14         10.2          98.2      10.5X
+Native ORC Vectorized                                47             55           9         22.5          44.4      23.2X
 
 
 ================================================================================================
 Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   434            451          12          2.4         413.9       1.0X
-Native ORC MR                                       273            294          14          3.8         260.8       1.6X
-Native ORC Vectorized                               104            139          22         10.0          99.6       4.2X
+Hive built-in ORC                                   366            439          60          2.9         348.8       1.0X
+Native ORC MR                                       268            290          23          3.9         255.2       1.4X
+Native ORC Vectorized                               104            116          15         10.1          99.5       3.5X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 100 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   2530           2556          38          0.4        2412.6       1.0X
-Native ORC MR                                       1530           1598          97          0.7        1458.7       1.7X
-Native ORC Vectorized                                802            891          89          1.3         764.7       3.2X
+Hive built-in ORC                                   2452           2519          94          0.4        2338.9       1.0X
+Native ORC MR                                       1620           1739         169          0.6        1544.6       1.5X
+Native ORC Vectorized                                940           1015         106          1.1         896.2       2.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 300 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   7576           7591          20          0.1        7225.4       1.0X
-Native ORC MR                                       5344           5377          47          0.2        5096.4       1.4X
-Native ORC Vectorized                               5351           5375          35          0.2        5102.9       1.4X
+Hive built-in ORC                                   7321           7391         100          0.1        6981.8       1.0X
+Native ORC MR                                       5367           5395          40          0.2        5118.2       1.4X
+Native ORC Vectorized                               5121           5225         148          0.2        4883.4       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 600 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  16196          16239          60          0.1       15446.2       1.0X
-Native ORC MR                                      12920          12974          76          0.1       12321.6       1.3X
-Native ORC Vectorized                              12604          12735         185          0.1       12019.9       1.3X
+Hive built-in ORC                                  15706          15710           6          0.1       14978.4       1.0X
+Native ORC MR                                      12801          12832          45          0.1       12208.0       1.2X
+Native ORC Vectorized                              12607          12815         294          0.1       12023.4       1.2X
 
 
 ================================================================================================
 Nested Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 10 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        2614           2637          32          0.4        2493.1       1.0X
-Native ORC MR                                            2025           2027           2          0.5        1931.2       1.3X
-Native ORC Vectorized                                     629            638          10          1.7         599.7       4.2X
+Hive built-in ORC                                        2792           2816          34          0.4        2662.9       1.0X
+Native ORC MR                                            2210           2291         114          0.5        2107.9       1.3X
+Native ORC Vectorized                                     657            684          24          1.6         626.2       4.3X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 30 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        7193           7232          55          0.1        6860.0       1.0X
-Native ORC MR                                            4480           4694         302          0.2        4272.6       1.6X
-Native ORC Vectorized                                    1453           1458           6          0.7        1386.2       4.9X
+Hive built-in ORC                                        7307           7509         286          0.1        6968.8       1.0X
+Native ORC MR                                            4974           5189         305          0.2        4743.4       1.5X
+Native ORC Vectorized                                    1578           1604          37          0.7        1504.9       4.6X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1017-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 10 Elements, 30 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        6660           6679          27          0.2        6351.1       1.0X
-Native ORC MR                                            5078           5085           9          0.2        4842.7       1.3X
-Native ORC Vectorized                                    1762           1793          43          0.6        1680.6       3.8X
+Hive built-in ORC                                        6461           6466           7          0.2        6161.5       1.0X
+Native ORC MR                                            5289           5352          89          0.2        5043.9       1.2X
+Native ORC Vectorized                                    2077           2086          13          0.5        1980.8       3.1X
 
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/DataSourceWithHiveResolver.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/DataSourceWithHiveResolver.scala
new file mode 100644
index 0000000000000..842faba66cc30
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/DataSourceWithHiveResolver.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.datasources.{DataSourceResolver, LogicalRelation}
+
+/**
+ * [[DataSourceWithHiveResolver]] is a [[DataSourceResolver]] that additionally handles
+ * [[HiveTableRelation]] conversion using [[RelationConversions]].
+ */
+class DataSourceWithHiveResolver(sparkSession: SparkSession, hiveCatalog: HiveSessionCatalog)
+    extends DataSourceResolver(sparkSession) {
+  private val relationConversions = RelationConversions(hiveCatalog)
+
+  /**
+   * Invoke [[DataSourceResolver]] to resolve the input operator. If [[DataSourceResolver]] produces
+   * [[HiveTableRelation]], convert it to [[LogicalRelation]] if possible.
+   */
+  override def resolveOperator: PartialFunction[LogicalPlan, LogicalPlan] = {
+    case operator: LogicalPlan if super.resolveOperator.isDefinedAt(operator) =>
+      val relationAfterDataSourceResolver = super.resolveOperator(operator)
+
+      relationAfterDataSourceResolver match {
+        case hiveTableRelation: HiveTableRelation =>
+          resolveHiveTableRelation(hiveTableRelation)
+        case other => other
+      }
+  }
+
+  private def resolveHiveTableRelation(hiveTableRelation: HiveTableRelation): LogicalPlan = {
+    if (relationConversions.doConvertHiveTableRelationForRead(hiveTableRelation)) {
+      val logicalRelation: LogicalRelation =
+        relationConversions.convertHiveTableRelationForRead(hiveTableRelation)
+      logicalRelation.newInstance()
+    } else {
+      hiveTableRelation.newInstance()
+    }
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index dbeb8607facc2..a1cf27510838f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.udf.generic.{AbstractGenericUDAFResolver, Gener
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, InvokeProcedures, ReplaceCharWithVarchar, ResolveSessionCatalog, ResolveTranspose}
+import org.apache.spark.sql.catalyst.analysis.resolver.ResolverExtension
 import org.apache.spark.sql.catalyst.catalog.{ExternalCatalogWithListener, InvalidUDFClassException}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -84,6 +85,14 @@ class HiveSessionStateBuilder(
    * A logical query plan `Analyzer` with rules specific to Hive.
    */
   override protected def analyzer: Analyzer = new Analyzer(catalogManager) {
+    override val singlePassResolverExtensions: Seq[ResolverExtension] = Seq(
+      new DataSourceWithHiveResolver(session, catalog)
+    )
+
+    override val singlePassMetadataResolverExtensions: Seq[ResolverExtension] = Seq(
+      new FileResolver(session)
+    )
+
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new ResolveHiveSerdeTable(session) +:
         new FindDataSourceTable(session) +:
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 87ce809914e10..73d0327e2bcad 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceStrate
 import org.apache.spark.sql.hive.execution._
 import org.apache.spark.sql.hive.execution.HiveScriptTransformationExec
 import org.apache.spark.sql.hive.execution.InsertIntoHiveTable.BY_CTAS
-import org.apache.spark.sql.internal.HiveSerDe
+import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 
 
 /**
@@ -117,6 +117,9 @@ class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] {
 }
 
 class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
+
+  override def conf: SQLConf = session.sessionState.conf
+
   private def hiveTableWithStats(relation: HiveTableRelation): HiveTableRelation = {
     val table = relation.tableMeta
     val partitionCols = relation.partitionCols
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index ba03b7fe3cee1..00407f0ecc178 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -517,6 +517,8 @@ private[hive] class HiveClientImpl(
     val excludedTableProperties = HiveStatisticsProperties ++ Set(
       // The property value of "comment" is moved to the dedicated field "comment"
       "comment",
+      // The property value of "collation" is moved to the dedicated field "collation"
+      "collation",
       // For EXTERNAL_TABLE, the table properties has a particular field "EXTERNAL". This is added
       // in the function toHiveTable.
       "EXTERNAL"
@@ -526,6 +528,7 @@ private[hive] class HiveClientImpl(
       case (key, _) => excludedTableProperties.contains(key)
     }
     val comment = properties.get("comment")
+    val collation = properties.get("collation")
 
     CatalogTable(
       identifier = TableIdentifier(h.getTableName, Option(h.getDbName)),
@@ -568,6 +571,7 @@ private[hive] class HiveClientImpl(
       properties = filteredProperties,
       stats = readHiveStats(properties),
       comment = comment,
+      collation = collation,
       // In older versions of Spark(before 2.2.0), we expand the view original text and
       // store that into `viewExpandedText`, that should be used in view resolution.
       // We get `viewExpandedText` as viewText, and also get `viewOriginalText` in order to
@@ -1181,6 +1185,7 @@ private[hive] object HiveClientImpl extends Logging {
     table.storage.properties.foreach { case (k, v) => hiveTable.setSerdeParam(k, v) }
     table.properties.foreach { case (k, v) => hiveTable.setProperty(k, v) }
     table.comment.foreach { c => hiveTable.setProperty("comment", c) }
+    table.collation.foreach { c => hiveTable.setProperty("collation", c) }
     // Hive will expand the view text, so it needs 2 fields: viewOriginalText and viewExpandedText.
     // Since we don't expand the view text, but only add table properties, we map the `viewText` to
     // the both fields in hive table.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
index cabdddd4c475d..0d4efd9e77742 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
@@ -55,6 +55,8 @@ class HiveFileFormat(fileSinkConf: FileSinkDesc)
 
   override def shortName(): String = "hive"
 
+  override def toString: String = "Hive"
+
   override def inferSchema(
       sparkSession: SparkSession,
       options: Map[String, String],
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala
index 0fcc43e5c3919..de2d15415837a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala
@@ -283,9 +283,7 @@ object HiveScriptIOSchema extends HiveInspectors {
     propsMap = propsMap + (serdeConstants.LIST_COLUMN_TYPES -> columnTypesNames)
 
     val properties = new Properties()
-    // Can not use properties.putAll(propsMap.asJava) in scala-2.12
-    // See https://github.com/scala/bug/issues/10418
-    propsMap.foreach { case (k, v) => properties.put(k, v) }
+    properties.putAll(propsMap.asJava)
     serde.initialize(null, properties)
 
     serde
@@ -299,9 +297,7 @@ object HiveScriptIOSchema extends HiveInspectors {
       val instance = Utils.classForName[RecordReader](klass).getConstructor().
         newInstance()
       val props = new Properties()
-      // Can not use props.putAll(outputSerdeProps.toMap.asJava) in scala-2.12
-      // See https://github.com/scala/bug/issues/10418
-      ioschema.outputSerdeProps.toMap.foreach { case (k, v) => props.put(k, v) }
+      props.putAll(ioschema.outputSerdeProps.toMap.asJava)
       instance.initialize(inputStream, conf, props)
       instance
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTempPath.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTempPath.scala
index 16edfea67e38e..d97d3cd6dd4a9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTempPath.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTempPath.scala
@@ -165,4 +165,6 @@ class HiveTempPath(session: SparkSession, val hadoopConf: Configuration, path: P
   def deleteIfNotStagingDir(path: Path, fs: FileSystem): Unit = {
     if (Option(path) != stagingDirForCreating) fs.delete(path, true)
   }
+
+  override def toString: String = s"HiveTempPath($path)"
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
index 779562bed5b0f..6486904fe65af 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.FilterEstimation
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Prune hive table partitions using partition filters on [[HiveTableRelation]]. The pruned
@@ -43,6 +44,8 @@ import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 private[sql] class PruneHiveTablePartitions(session: SparkSession)
   extends Rule[LogicalPlan] with CastSupport with PredicateHelper {
 
+  override def conf: SQLConf = session.sessionState.conf
+
   /**
    * Extract the partition filters from the filters on the table.
    */
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
index 700a4984a4e39..f5bf49439d3f9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
@@ -23,10 +23,11 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileApprox
 
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.{Column, DataFrame, SparkSession}
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.functions.{lit, percentile_approx => pa}
 import org.apache.spark.sql.hive.execution.TestingTypedCount
 import org.apache.spark.sql.hive.test.TestHive
-import org.apache.spark.sql.internal.ExpressionUtils.{column => toCol, expression}
+import org.apache.spark.sql.internal.ExpressionUtils.expression
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.LongType
 
@@ -117,7 +118,7 @@ object ObjectHashAggregateExecBenchmark extends SqlBasedBenchmark {
       output = output
     )
 
-    def typed_count(column: Column): Column = TestingTypedCount(column)
+    def typed_count(column: Column): Column = Column(TestingTypedCount(expression(column)))
 
     val df = spark.range(N)
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/DataSourceWithHiveResolverSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/DataSourceWithHiveResolverSuite.scala
new file mode 100644
index 0000000000000..cb26354521b02
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/DataSourceWithHiveResolverSuite.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.analysis.resolver.{MetadataResolver, Resolver}
+import org.apache.spark.sql.catalyst.catalog.{HiveTableRelation, UnresolvedCatalogRelation}
+import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.hive.HiveUtils
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
+
+class DataSourceWithHiveResolverSuite extends TestHiveSingleton with SQLTestUtils {
+  private val keyValueTableSchema = StructType(
+    Seq(
+      StructField("key", IntegerType, true),
+      StructField("value", StringType, true)
+    )
+  )
+
+  test("ORC table resolution") {
+    withTable("src_orc") {
+      spark.sql("CREATE TABLE src_orc (key INT, value STRING) STORED AS ORC")
+
+      checkResolveOperator(
+        sqlText = "SELECT * FROM src_orc",
+        expectedTableName = "spark_catalog.default.src_orc",
+        expectedTableSchema = keyValueTableSchema,
+        convertedToLogicalRelation = true
+      )
+    }
+  }
+
+  test("ORC table resolution without conversion") {
+    withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "false") {
+      withTable("src_orc_no_conversion") {
+        spark.sql("CREATE TABLE src_orc_no_conversion (key INT, value STRING) STORED AS ORC")
+
+        checkResolveOperator(
+          sqlText = "SELECT * FROM src_orc_no_conversion",
+          expectedTableName = "spark_catalog.default.src_orc_no_conversion",
+          expectedTableSchema = keyValueTableSchema,
+          convertedToLogicalRelation = false
+        )
+      }
+    }
+  }
+
+  private def checkResolveOperator(
+      sqlText: String,
+      expectedTableName: String,
+      expectedTableSchema: StructType,
+      convertedToLogicalRelation: Boolean) = {
+    val metadataResolver = new MetadataResolver(
+      spark.sessionState.catalogManager,
+      Resolver.createRelationResolution(spark.sessionState.catalogManager)
+    )
+    val dataSourceWithHiveResolver = new DataSourceWithHiveResolver(
+      spark,
+      spark.sessionState.catalog.asInstanceOf[HiveSessionCatalog]
+    )
+
+    val unresolvedPlan = spark.sql(sqlText).queryExecution.logical
+
+    metadataResolver.resolve(unresolvedPlan)
+
+    val unresolvedRelations = unresolvedPlan.collect {
+      case unresolvedRelation: UnresolvedRelation => unresolvedRelation
+    }
+    assert(unresolvedRelations.size == 1)
+
+    val partiallyResolvedRelation = metadataResolver
+      .getRelationWithResolvedMetadata(unresolvedRelations.head)
+      .get
+      .asInstanceOf[SubqueryAlias]
+      .child
+    assert(partiallyResolvedRelation.isInstanceOf[UnresolvedCatalogRelation])
+
+    dataSourceWithHiveResolver.resolveOperator(partiallyResolvedRelation) match {
+      case logicalRelation: LogicalRelation =>
+        assert(convertedToLogicalRelation)
+        assert(logicalRelation.catalogTable.get.identifier.unquotedString == expectedTableName)
+        assert(logicalRelation.relation.schema == expectedTableSchema)
+      case hiveTableRelation: HiveTableRelation =>
+        assert(!convertedToLogicalRelation)
+        assert(hiveTableRelation.tableMeta.identifier.unquotedString == expectedTableName)
+        assert(hiveTableRelation.tableMeta.schema == expectedTableSchema)
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveCharVarcharTestSuite.scala
similarity index 98%
rename from sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
rename to sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveCharVarcharTestSuite.scala
index c12d727e59740..90cb5501ee6f6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveCharVarcharTestSuite.scala
@@ -15,8 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql
+package org.apache.spark.sql.hive
 
+import org.apache.spark.sql.{CharVarcharTestSuite, Row}
 import org.apache.spark.sql.execution.command.CharVarcharDDLTestBase
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/OptimizeHiveMetadataOnlyQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/OptimizeHiveMetadataOnlyQuerySuite.scala
index 2152a29b17ff4..6709a139dcf96 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/OptimizeHiveMetadataOnlyQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/OptimizeHiveMetadataOnlyQuerySuite.scala
@@ -32,7 +32,7 @@ class OptimizeHiveMetadataOnlyQuerySuite extends QueryTest with TestHiveSingleto
     with BeforeAndAfter with SQLTestUtils {
 
   import spark.implicits._
-  import spark.RichColumn
+  import spark.toRichColumn
 
   override def beforeAll(): Unit = {
     super.beforeAll()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
index f7e453a1dbdec..b67370f6eb9f6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
@@ -34,6 +34,9 @@ class PartitionedTablePerfStatsSuite
 
   override def beforeEach(): Unit = {
     super.beforeEach()
+    // Hive operation counters are doubled in dual-analyzer mode.
+    hiveContext.sparkSession.conf.set(
+      SQLConf.ANALYZER_DUAL_RUN_LEGACY_AND_SINGLE_PASS_RESOLVER.key, "false")
     FileStatusCache.resetForTesting()
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 9c2f4461ff263..e2f0040afe57c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -609,12 +609,15 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       }
 
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        val message = intercept[AnalysisException] {
-          sql(s"ANALYZE TABLE $tableName PARTITION (DS='2010-01-01') COMPUTE STATISTICS")
-        }.getMessage
-        assert(message.contains(
-          "DS is not a valid partition column in table " +
-            s"`$SESSION_CATALOG_NAME`.`default`.`$tableName`"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"ANALYZE TABLE $tableName PARTITION (DS='2010-01-01') COMPUTE STATISTICS")
+          },
+          condition = "PARTITIONS_NOT_FOUND",
+          parameters = Map(
+            "partitionList" -> "`DS`",
+            "tableName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$tableName`")
+        )
       }
     }
   }
@@ -692,16 +695,26 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
 
       sql(s"INSERT INTO TABLE $tableName PARTITION (ds='2010-01-01') SELECT * FROM src")
 
-      assertAnalysisException(
-        s"ANALYZE TABLE $tableName PARTITION (hour=20) COMPUTE STATISTICS",
-        "hour is not a valid partition column in table " +
-          s"`$SESSION_CATALOG_NAME`.`default`.`${tableName.toLowerCase(Locale.ROOT)}`"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ANALYZE TABLE $tableName PARTITION (hour=20) COMPUTE STATISTICS")
+        },
+        condition = "PARTITIONS_NOT_FOUND",
+        parameters = Map(
+          "partitionList" -> "`hour`",
+          "tableName" ->
+            s"`$SESSION_CATALOG_NAME`.`default`.`${tableName.toLowerCase(Locale.ROOT)}`")
       )
 
-      assertAnalysisException(
-        s"ANALYZE TABLE $tableName PARTITION (hour) COMPUTE STATISTICS",
-        "hour is not a valid partition column in table " +
-          s"`$SESSION_CATALOG_NAME`.`default`.`${tableName.toLowerCase(Locale.ROOT)}`"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ANALYZE TABLE $tableName PARTITION (hour) COMPUTE STATISTICS")
+        },
+        condition = "PARTITIONS_NOT_FOUND",
+        parameters = Map(
+          "partitionList" -> "`hour`",
+          "tableName" ->
+            s"`$SESSION_CATALOG_NAME`.`default`.`${tableName.toLowerCase(Locale.ROOT)}`")
       )
 
       intercept[NoSuchPartitionException] {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
index 5c65eb8b12bac..27dc80fbfc173 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{DatabaseAlreadyExistsException, NoSuchDatabaseException, PartitionsAlreadyExistException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
+import org.apache.spark.sql.connector.catalog.TableCatalog
 import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.test.TestHiveVersion
 import org.apache.spark.sql.types.{IntegerType, StructType}
@@ -68,11 +69,13 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) {
   }
 
   def table(database: String, tableName: String,
+      collation: Option[String] = None,
       tableType: CatalogTableType = CatalogTableType.MANAGED): CatalogTable = {
     CatalogTable(
       identifier = TableIdentifier(tableName, Some(database)),
       tableType = tableType,
       schema = new StructType().add("key", "int"),
+      collation = collation,
       storage = CatalogStorageFormat(
         locationUri = None,
         inputFormat = Some(classOf[TextInputFormat].getName),
@@ -204,6 +207,22 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) {
       ignoreIfExists = false)
   }
 
+  test("create/alter table with collations") {
+    client.createTable(table("default", tableName = "collation_table",
+      collation = Some("UNICODE")), ignoreIfExists = false)
+
+    val readBack = client.getTable("default", "collation_table")
+    assert(!readBack.properties.contains(TableCatalog.PROP_COLLATION))
+    assert(readBack.collation === Some("UNICODE"))
+
+    client.alterTable("default", "collation_table",
+      readBack.copy(collation = Some("UNICODE_CI")))
+    val alteredTbl = client.getTable("default", "collation_table")
+    assert(alteredTbl.collation === Some("UNICODE_CI"))
+
+    client.dropTable("default", "collation_table", ignoreIfNotExists = true, purge = true)
+  }
+
   test("loadTable") {
     client.loadTable(
       emptyDir,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 53a65e195e3f0..a58adbce7ec52 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -657,10 +657,10 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $externalTab DROP PARTITION (ds='2008-04-09', unknownCol='12')")
           },
-          condition = "_LEGACY_ERROR_TEMP_1231",
+          condition = "PARTITIONS_NOT_FOUND",
           parameters = Map(
-            "key" -> "unknownCol",
-            "tblName" -> s"`$SESSION_CATALOG_NAME`.`default`.`exttable_with_partitions`")
+            "partitionList" -> "`unknownCol`",
+            "tableName" -> s"`$SESSION_CATALOG_NAME`.`default`.`exttable_with_partitions`")
         )
 
         sql(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index c41370c96241a..5431066c30a9f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -72,7 +72,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
 
   override def afterEach(): Unit = {
     try {
-      spark.artifactManager.cleanUpResources()
+      spark.artifactManager.cleanUpResourcesForTesting()
     } finally {
       super.afterEach()
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index df6ef57a581d0..ecf89e59c501c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -117,7 +117,6 @@ class HiveResolutionSuite extends HiveComparisonTest {
 
   /**
    * Negative examples.  Currently only left here for documentation purposes.
-   * TODO(marmbrus): Test that catalyst fails on these queries.
    */
 
   /* SemanticException [Error 10009]: Line 1:7 Invalid table alias 'src'
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
index bcd0644af0782..008a324f73dac 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
@@ -23,12 +23,11 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax
 import org.scalatest.matchers.must.Matchers._
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.expressions.{ExpressionEvalHelper}
+import org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.functions.{col, count_distinct, first, lit, max, percentile_approx => pa}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.internal.ExpressionUtils.{column => toCol, expression}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
@@ -181,7 +180,7 @@ class ObjectHashAggregateSuite
     pa(column, lit(percentage), lit(10000))
   }
 
-  private def typed_count(column: Column): Column = TestingTypedCount(column)
+  private def typed_count(column: Column): Column = Column(TestingTypedCount(column.expr))
 
   // Generates 50 random rows for a given schema.
   private def generateRandomRows(schemaForGenerator: StructType): Seq[Row] = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
index 9ee3a0277c9a1..de6af30e663d2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
@@ -39,8 +39,8 @@ class ShowTablesSuite extends v1.ShowTablesSuiteBase with CommandSuiteBase {
       catalog: String,
       namespace: String,
       table: String): (String, Map[String, String]) = {
-    ("_LEGACY_ERROR_TEMP_1231",
-      Map("key" -> "id", "tblName" -> s"`$catalog`.`$namespace`.`$table`"))
+    ("PARTITIONS_NOT_FOUND",
+      Map("partitionList" -> "`id`", "tableName" -> s"`$catalog`.`$namespace`.`$table`"))
   }
 
   protected override def extendedPartExpectedResult: String =
@@ -99,7 +99,7 @@ class ShowTablesSuite extends v1.ShowTablesSuiteBase with CommandSuiteBase {
              |View Original Text: SELECT id FROM $catalog.$namespace.$table
              |View Schema Mode: COMPENSATION
              |View Catalog and Namespace: $catalog.$namespace
-             |View Query Output Columns: [id]
+             |View Query Output Columns: [`id`]
              |Table Properties: <table properties>
              |Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              |InputFormat: org.apache.hadoop.mapred.SequenceFileInputFormat
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
index 770e1da94a1c7..7a0599cda2fe7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
@@ -42,7 +42,7 @@ trait TestHiveSingleton extends SparkFunSuite with BeforeAndAfterAll {
 
   protected override def afterEach(): Unit = {
     try {
-      spark.artifactManager.cleanUpResources()
+      spark.artifactManager.cleanUpResourcesForTesting()
     } finally {
       super.afterEach()
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 87d6a4909fdd4..f0f9046c6b623 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -467,7 +467,7 @@ abstract class DStream[T: ClassTag] (
         // Explicitly remove blocks of BlockRDD
         rdd match {
           case b: BlockRDD[_] =>
-            logInfo(log"Removing blocks of RDD ${MDC(LogKeys.RDD_ID, b)} " +
+            logInfo(log"Removing blocks of RDD ${MDC(LogKeys.RDD, b)} " +
               log"of time ${MDC(LogKeys.TIME, time)}")
             b.removeBlocks()
           case _ =>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
index e0e85712a2301..fae68123773dd 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
@@ -142,7 +142,7 @@ private[streaming] class BlockGenerator(
         state = StoppedAddingData
       } else {
         logWarning(log"Cannot stop BlockGenerator as its not in the Active state " +
-          log"[state = ${MDC(STATUS, state)}]")
+          log"[state = ${MDC(BLOCK_GENERATOR_STATUS, state)}]")
         return
       }
     }